From 784e9c833ef13ca0ebfd442875c9ec60de157d4e Mon Sep 17 00:00:00 2001 From: safishamsi Date: Wed, 1 Jul 2026 15:09:42 +0100 Subject: [PATCH 1/9] fix(extract): case-sensitive cross-file resolution in case-sensitive languages (#1581) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cross-file name resolution folded case for every language, so `from pathlib import Path` resolved to a shell script's `export PATH=...` node — one variable becoming the corpus's #1 god-node (266 false incoming edges on a real repo), polluting god-node rankings, affected blast-radius, and clustering. Reported with a precise diagnosis by @sheik-hiiobd. Case is semantic in Python/Rust/Go/Java/C#/Kotlin/Swift/Ruby/C/C++/JS/TS: `Path` (class), `PATH` (env var), `path` (variable) are distinct. Fix gates folding by language at the two resolution sites the repro exercised: - global cross-file CALL resolver: index by exact case; a folded index is built only for case-insensitive-language nodes (PHP/SQL/Nim) and consulted only when the calling file is such a language. - type-reference STUB rewire (_rewire_unique_stub_nodes): match stubs to real defs by exact case, with a folded fallback restricted to case-insensitive- language definitions — so a case-sensitive `PATH` can never absorb a `Path`. For case-sensitive languages this only ever removes false edges. Concept/doc dedup (dedup.py, guarded to non-code nodes) is intentionally left folding. Regression tests: Python `Path` no longer hits shell `PATH`; a case-differing cross-file ref doesn't resolve; exact-case resolution still works; PHP fold preserved. Full suite 2777. Co-Authored-By: Claude Opus 4.8 (1M context) --- CHANGELOG.md | 1 + graphify/extract.py | 58 ++++++++++++++--- tests/test_case_sensitive_resolution.py | 87 +++++++++++++++++++++++++ 3 files changed, 138 insertions(+), 8 deletions(-) create mode 100644 tests/test_case_sensitive_resolution.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 4fa1df316..0626098a6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ Full release notes with details on each version: [GitHub Releases](https://githu ## Unreleased +- Fix: cross-file name resolution now respects case in case-sensitive languages (#1581, thanks @sheik-hiiobd). Resolution matched identifiers case-insensitively for every language, so in Python/Rust/Go/Java/etc. `from pathlib import Path` resolved to an unrelated shell-script `export PATH=...` node — a single variable becoming the corpus's #1 god-node (266 false incoming edges on one real repo), inflating god-node rankings, `affected` blast-radius, and community assignment. Both the cross-file call resolver and the type-reference stub-rewire now match by exact case; only genuinely case-insensitive languages (PHP functions/classes, SQL, Nim) still fold. For case-sensitive languages this only ever removes false edges. - Fix: Julia qualified / relative / scoped-selected imports now emit edges (#1580, thanks @Synvoya). Only bare `using Foo` was handled; `using Base.Threads` (scoped), `using ..Parent` (relative import_path), and the scoped package of `import Base.Threads: nthreads` were dropped. - Fix: Rust tuple-struct field types now emit `references` edges (#1582, thanks @Synvoya). `struct Wrapper(Logger, Vec);` referenced nothing — positional fields nest under `ordered_field_declaration_list` with no `field_declaration` wrapper, the same shape as tuple enum variants (#1579); that path wasn't traversed for structs. - Fix: SystemVerilog class properties with leading qualifiers now emit field `references` (#1583, thanks @Synvoya). The field regex only matched unqualified ` ;`, so `rand Config x;` / `protected Base b;` (qualifier + type + name) failed to match and their type references were dropped. diff --git a/graphify/extract.py b/graphify/extract.py index 6e8ab9964..0c67ac5cd 100644 --- a/graphify/extract.py +++ b/graphify/extract.py @@ -9032,9 +9032,27 @@ def _canonicalize_csharp_namespace_nodes(all_nodes: list[dict], all_edges: list[ all_nodes[:] = [node for node in all_nodes if id(node) not in drop_node_ids] -def _node_label_key(node: dict) -> str: +# Languages whose identifiers are case-insensitive, so cross-file name resolution +# may fold case. Everywhere else, case is semantic (`Path` the class vs `PATH` the +# env var are distinct) and folding manufactures false edges / super-hubs (#1581). +_CASE_INSENSITIVE_EXTS = frozenset({ + ".php", ".phtml", ".php3", ".php4", ".php5", ".php7", ".phps", # PHP fns/classes + ".sql", # SQL identifiers + ".nim", ".nims", ".nimble", # Nim (style-insensitive) +}) + + +def _lang_is_case_insensitive(source_file: object) -> bool: + """True when the file's language resolves identifiers case-insensitively (#1581).""" + if not source_file: + return False + return Path(str(source_file)).suffix.lower() in _CASE_INSENSITIVE_EXTS + + +def _node_label_key(node: dict, fold: bool = False) -> str: label = str(node.get("label", "")).strip() - return re.sub(r"[^a-zA-Z0-9]+", "", label).lower() + key = re.sub(r"[^a-zA-Z0-9]+", "", label) + return key.lower() if fold else key def _is_type_like_definition(node: dict) -> bool: @@ -9052,7 +9070,8 @@ def _is_type_like_definition(node: dict) -> bool: def _rewire_unique_stub_nodes(nodes: list[dict], edges: list[dict]) -> None: """Map unresolved no-source stubs to a unique real definition with the same label.""" - real_by_label: dict[str, list[dict]] = {} + real_by_label: dict[str, list[dict]] = {} # exact-case (all languages) + real_by_label_ci: dict[str, list[dict]] = {} # case-INSENSITIVE-language reals only stubs: list[dict] = [] for node in nodes: @@ -9061,7 +9080,13 @@ def _rewire_unique_stub_nodes(nodes: list[dict], edges: list[dict]) -> None: continue if node.get("source_file"): if _is_type_like_definition(node): + # Match stubs case-SENSITIVELY: a `Path` reference must not rewire to a + # `PATH` env var (#1581). Fold only for genuinely case-insensitive + # languages, where `foo` legitimately resolves to `Foo`. real_by_label.setdefault(key, []).append(node) + if _lang_is_case_insensitive(node.get("source_file")): + real_by_label_ci.setdefault( + _node_label_key(node, fold=True), []).append(node) continue stubs.append(node) @@ -9072,7 +9097,12 @@ def _rewire_unique_stub_nodes(nodes: list[dict], edges: list[dict]) -> None: continue candidates = real_by_label.get(_node_label_key(stub), []) if len(candidates) != 1: - continue + # No unique exact match — fall back to a case-insensitive match, but + # only against case-insensitive-language definitions (so a case-sensitive + # `PATH` can never absorb a `Path` reference). + candidates = real_by_label_ci.get(_node_label_key(stub, fold=True), []) + if len(candidates) != 1: + continue target_id = candidates[0].get("id") if isinstance(target_id, str) and target_id and target_id != stub_id: remap[stub_id] = target_id @@ -15464,15 +15494,21 @@ def extract( # Build label -> node_id index for cross-file call resolution. # Skip rationale nodes (their labels are docstring text, not callable # identifiers, and they were polluting matches for short names — #563). - global_label_to_nids: dict[str, list[str]] = {} + global_label_to_nids: dict[str, list[str]] = {} # exact-case (all languages) + global_label_to_nids_ci: dict[str, list[str]] = {} # case-INSENSITIVE-language nodes for n in all_nodes: if n.get("file_type") == "rationale" or n.get("type") == "namespace": continue raw = n.get("label", "") normalised = raw.strip("()").lstrip(".") if normalised: - key = normalised.lower() - global_label_to_nids.setdefault(key, []).append(n["id"]) + # Case is semantic in most languages, so index (and match, below) by exact + # case — folding collapses `Path` (class) into `PATH` (env var) and makes a + # single shell variable the #1 god-node (#1581). Only case-insensitive + # languages (PHP/SQL/Nim) also get a folded key for legitimate fold-matching. + global_label_to_nids.setdefault(normalised, []).append(n["id"]) + if _lang_is_case_insensitive(n.get("source_file")): + global_label_to_nids_ci.setdefault(normalised.lower(), []).append(n["id"]) # Callable-def ids for the indirect_call callable guard, read from the `_callable` # marker on the FINAL (post-remap) nodes — so a callback resolves only to a real @@ -15532,7 +15568,13 @@ def extract( # and collides with any top-level function named "log" in the corpus. if rc.get("is_member_call"): continue - candidates = global_label_to_nids.get(callee.lower(), []) + # Exact-case match first (case is semantic). Fold only when the CALLING + # file's language is case-insensitive, and only against the folded index of + # case-insensitive-language definitions — so a Python `Path()` call can never + # resolve to a shell `PATH` node (#1581). + candidates = global_label_to_nids.get(callee, []) + if not candidates and _lang_is_case_insensitive(rc.get("source_file")): + candidates = global_label_to_nids_ci.get(callee.lower(), []) if not candidates: continue caller = rc["caller_nid"] diff --git a/tests/test_case_sensitive_resolution.py b/tests/test_case_sensitive_resolution.py new file mode 100644 index 000000000..5838b02bc --- /dev/null +++ b/tests/test_case_sensitive_resolution.py @@ -0,0 +1,87 @@ +"""Cross-file name resolution respects case in case-sensitive languages (#1581). + +Case is semantic in most languages: `Path` (a class), `PATH` (an env var), and +`path` (a variable) are distinct. Cross-file resolution used to fold case for every +language, so `from pathlib import Path` (ubiquitous) resolved to a shell script's +`export PATH=...` node — turning one shell variable into the corpus's #1 god-node. + +These tests pin: case-sensitive languages match by exact case (removing that false +edge), while genuinely case-insensitive languages (PHP) still fold. +""" +from __future__ import annotations + +import os +from pathlib import Path + +from graphify.extract import extract + + +def _extract(tmp_path, files: dict[str, str]): + for name, body in files.items(): + (tmp_path / name).write_text(body) + old = os.getcwd() + try: + os.chdir(tmp_path) + r = extract([Path(n) for n in files], cache_root=tmp_path) + finally: + os.chdir(old) + return r + + +def _labels(r): + return {n["id"]: n["label"] for n in r["nodes"]} + + +def test_python_Path_does_not_resolve_to_shell_PATH(tmp_path): + r = _extract(tmp_path, { + "run.sh": "export PATH=/usr/local/bin:$PATH\n", + "mod.py": ( + "from pathlib import Path\n" + "def load(p: Path) -> Path:\n return Path(p)\n" + "def other():\n return load(Path('x'))\n" + ), + }) + lbl = _labels(r) + path_nid = next((n["id"] for n in r["nodes"] if n["label"] == "PATH"), None) + assert path_nid is not None + # No edge from the Python functions should land on the shell PATH node + false_edges = [ + e for e in r["edges"] + if e["target"] == path_nid and lbl.get(e["source"], "").startswith(("load", "other")) + ] + assert not false_edges, f"Python Path leaked onto shell PATH: {false_edges}" + # PATH keeps only its own `defines` edge (from run.sh), not a false super-hub + assert sum(1 for e in r["edges"] if e["target"] == path_nid) <= 1 + + +def test_case_sensitive_cross_file_ref_respects_case(tmp_path): + r = _extract(tmp_path, { + "consts.rs": 'pub const PATH: &str = "/x";\n', + "use.rs": "struct Wrap(Path);\n", # `Path` — no such node in the corpus + }) + lbl = _labels(r) + path_nid = next((n["id"] for n in r["nodes"] if n["label"] == "PATH"), None) + xref = [e for e in r["edges"] if e["target"] == path_nid and lbl.get(e["source"]) == "Wrap"] + assert not xref, "a `Path` reference must not resolve to a case-differing `PATH`" + + +def test_exact_case_cross_file_still_resolves(tmp_path): + r = _extract(tmp_path, { + "h.py": "def helper():\n return 1\n", + "m.py": "from h import helper\ndef go():\n return helper()\n", + }) + lbl = _labels(r) + calls = {(lbl.get(e["source"]), lbl.get(e["target"])) + for e in r["edges"] if e["relation"] == "calls"} + assert ("go()", "helper()") in calls + + +def test_php_case_insensitive_resolution_preserved(tmp_path): + r = _extract(tmp_path, { + "lib.php": " Date: Thu, 2 Jul 2026 00:14:58 +1000 Subject: [PATCH 2/9] fix(scala): emit field type references for var declarations The Scala field handler matched only `val_definition`, so a mutable field (`var b: Repo`), which parses as `var_definition`, had its type reference silently dropped from the graph. val and var nodes are structurally identical (both expose a `type` field), so the existing type-collection logic works unchanged. Widen the guard to accept var_definition. Adds a var field to the Scala fixture and a regression test. --- graphify/extract.py | 2 +- tests/fixtures/sample.scala | 1 + tests/test_languages.py | 5 +++++ 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/graphify/extract.py b/graphify/extract.py index 0c67ac5cd..7101d6164 100644 --- a/graphify/extract.py +++ b/graphify/extract.py @@ -3868,7 +3868,7 @@ def _emit_java_parent_type(type_node, rel: str, at_line: int) -> None: return if (config.ts_module == "tree_sitter_scala" - and t == "val_definition" + and t in ("val_definition", "var_definition") and parent_class_nid): type_node = node.child_by_field_name("type") if type_node is not None: diff --git a/tests/fixtures/sample.scala b/tests/fixtures/sample.scala index 95755a877..8a35888d8 100644 --- a/tests/fixtures/sample.scala +++ b/tests/fixtures/sample.scala @@ -7,6 +7,7 @@ abstract class BaseClient class HttpClient(config: Config) extends BaseClient with Loggable { val source: Config = config + var fallback: BaseClient = null def get(path: String): String = { buildRequest("GET", path) diff --git a/tests/test_languages.py b/tests/test_languages.py index 941fcac88..bd370c9d3 100644 --- a/tests/test_languages.py +++ b/tests/test_languages.py @@ -655,6 +655,11 @@ def test_scala_val_definition_field_context(): assert ("HttpClient", "Config") in _edge_labels(r, "references", "field") +def test_scala_var_definition_field_context(): + r = extract_scala(FIXTURES / "sample.scala") + assert ("HttpClient", "BaseClient") in _edge_labels(r, "references", "field") + + def test_scala_method_return_type_context(): r = extract_scala(FIXTURES / "sample.scala") assert ("create", "HttpClient") in _edge_labels(r, "references", "return_type") From a129ff2cd60b423debb9351f793a0551faa9dc71 Mon Sep 17 00:00:00 2001 From: Synvoya <16019863+Synvoya@users.noreply.github.com> Date: Thu, 2 Jul 2026 00:20:39 +1000 Subject: [PATCH 3/9] fix(powershell): emit inherits/implements edges for class base types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The `class_statement` handler read only the first `simple_name` child — the class name — and never inspected the base type(s) after the `:` token. As a result `class Dog : Animal` dropped the Dog->Animal inheritance edge entirely; derived classes appeared as isolated nodes. Walk the class_statement children, and once the `:` token is seen treat each following `simple_name` as a base type. Matching the C# convention (PowerShell has no syntactic base-vs-interface split), the first base is emitted as `inherits` and the rest as `implements`, resolved via ensure_named_node. Adds a Shape/Circle inheritance pair to tests/fixtures/sample.ps1 and a regression test asserting ("Circle","Shape") in the inherits edges. --- graphify/extract.py | 16 ++++++++++++++++ tests/fixtures/sample.ps1 | 16 ++++++++++++++++ tests/test_languages.py | 7 +++++++ 3 files changed, 39 insertions(+) diff --git a/graphify/extract.py b/graphify/extract.py index 7101d6164..19dcaa243 100644 --- a/graphify/extract.py +++ b/graphify/extract.py @@ -8495,6 +8495,22 @@ def walk(node, parent_class_nid: str | None = None) -> None: class_nid = _make_id(stem, class_name) add_node(class_nid, class_name, line) add_edge(file_nid, class_nid, "contains", line) + # Base type(s) after ':'. PowerShell has no syntactic base vs + # interface split, so (matching the C# convention) treat the + # first base as the superclass (inherits) and the rest as + # interfaces (implements). Bases are the simple_name children + # after the ':' token. + colon_seen = False + base_index = 0 + for child in node.children: + if child.type == ":": + colon_seen = True + elif colon_seen and child.type == "simple_name": + base_nid = ensure_named_node(_read_text(child, source), line) + if base_nid != class_nid: + rel = "inherits" if base_index == 0 else "implements" + add_edge(class_nid, base_nid, rel, line) + base_index += 1 for child in node.children: walk(child, parent_class_nid=class_nid) return diff --git a/tests/fixtures/sample.ps1 b/tests/fixtures/sample.ps1 index 2cdb6aa78..43c27fd7d 100644 --- a/tests/fixtures/sample.ps1 +++ b/tests/fixtures/sample.ps1 @@ -30,3 +30,19 @@ class DataProcessor { Set-Content -Path $path -Value $this.Source } } + +class Shape { + [string]$Kind + + [double] Area() { + return 0.0 + } +} + +class Circle : Shape { + [double]$Radius + + [double] Area() { + return 3.14159 * $this.Radius * $this.Radius + } +} diff --git a/tests/test_languages.py b/tests/test_languages.py index bd370c9d3..35569c508 100644 --- a/tests/test_languages.py +++ b/tests/test_languages.py @@ -1631,6 +1631,13 @@ def test_powershell_finds_class_and_method(): assert any("Transform" in l for l in labels) +def test_powershell_class_base_type_emits_inherits_edge(): + # `class Circle : Shape` — the base type after ':' was previously dropped + # because the handler only read the first simple_name (the class name). + r = extract_powershell(FIXTURES / "sample.ps1") + assert ("Circle", "Shape") in _edge_labels(r, "inherits") + + def test_powershell_property_field_type_context(): r = extract_powershell(FIXTURES / "sample.ps1") assert ("DataProcessor", "string") in _edge_labels(r, "references", "field") From cd3a376030d0dcfdccdac05eac0f4d5c34308fb6 Mon Sep 17 00:00:00 2001 From: Synvoya <16019863+Synvoya@users.noreply.github.com> Date: Thu, 2 Jul 2026 00:21:45 +1000 Subject: [PATCH 4/9] fix(objc): emit implements edge for protocol-to-protocol adoption `@protocol Derived ` dropped the protocol-adoption (inheritance) edge. The protocol_declaration handler in extract_objc walked children for method declarations but ignored the protocol_reference_list child that holds the adopted protocols, so no implements edge was ever emitted for protocol-on-protocol adoption. The extractor already handled `@interface Foo ` adoption, but that nests the protocol name under a parameterized_arguments node; protocol-on- protocol adoption uses a different grammar node (protocol_reference_list) whose adopted-name is a direct `identifier` child, so it was never matched. Walk protocol_reference_list and emit an implements edge for each adopted protocol, mirroring the @interface handling. Adds a defined Base/Derived protocol pair to the ObjC fixture and a regression test asserting the Derived->Base implements edge. --- graphify/extract.py | 12 ++++++++++++ tests/fixtures/sample.m | 12 ++++++++++++ tests/test_languages.py | 10 ++++++++++ 3 files changed, 34 insertions(+) diff --git a/graphify/extract.py b/graphify/extract.py index 19dcaa243..6887d08e9 100644 --- a/graphify/extract.py +++ b/graphify/extract.py @@ -11625,6 +11625,18 @@ def walk(node, parent_nid: str | None = None) -> None: proto_nid = _make_id(stem, name) add_node(proto_nid, f"<{name}>", line) add_edge(file_nid, proto_nid, "contains", line) + # Adopted protocols: `@protocol Derived `. These + # nest under a protocol_reference_list node (distinct from the + # parameterized_arguments node used by @interface adoption), so + # they were never emitted. Emit an `implements` edge for each, + # matching how @interface protocol adoption is handled. + for child in node.children: + if child.type == "protocol_reference_list": + for sub in child.children: + if sub.type == "identifier": + base_nid = ensure_named_node(_read(sub), line) + if base_nid != proto_nid: + add_edge(proto_nid, base_nid, "implements", line) for child in node.children: walk(child, proto_nid) return diff --git a/tests/fixtures/sample.m b/tests/fixtures/sample.m index 2f1209a4b..4fd0f9374 100644 --- a/tests/fixtures/sample.m +++ b/tests/fixtures/sample.m @@ -40,3 +40,15 @@ - (void)fetch { } @end + +@protocol Base + +- (void)baseMethod; + +@end + +@protocol Derived + +- (void)derivedMethod; + +@end diff --git a/tests/test_languages.py b/tests/test_languages.py index 35569c508..8227131fb 100644 --- a/tests/test_languages.py +++ b/tests/test_languages.py @@ -1064,6 +1064,16 @@ def test_objc_splits_inherits_and_implements(): assert ("Animal", "SampleDelegate") in _edge_labels(r, "implements") +def test_objc_protocol_adopts_protocol(): + """`@protocol Derived ` must emit an implements edge Derived->Base. + Protocol-on-protocol adoption nests under a protocol_reference_list node + (distinct from the parameterized_arguments node used by @interface + adoption), so the edge was previously dropped. Protocol nodes are labeled + ``, so the edge reads (, ).""" + r = extract_objc(FIXTURES / "sample.m") + assert ("", "") in _edge_labels(r, "implements") + + def test_objc_property_type_context(): r = extract_objc(FIXTURES / "sample.m") assert ("Animal", "NSString") in _edge_labels(r, "references", "field") From 51f805e9537d01d2bc31b83d3c3fe3f76640b35f Mon Sep 17 00:00:00 2001 From: Synvoya <16019863+Synvoya@users.noreply.github.com> Date: Thu, 2 Jul 2026 00:22:51 +1000 Subject: [PATCH 5/9] fix(php): emit type references for promoted constructor properties MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PHP 8 constructor property promotion (`__construct(private Repo $repo)`) parses the promoted parameter as `property_promotion_parameter`, not `simple_parameter`. The PHP parameter loop filtered on `simple_parameter` only, so promoted params were skipped entirely: their type emitted no `parameter_type` edge on the constructor, and — because a promoted param is also a real class field — no `field` edge on the class either. A non-promoted param in the same signature still emitted `parameter_type`, so the type reference was silently dropped for exactly the promoted case. The promoted param's type sits in the same direct named-child shape the loop already reads for `simple_parameter`, so widening the filter to accept `property_promotion_parameter` makes the existing type extraction emit the `parameter_type` edge. Additionally, for a promoted param, emit a `field`-context references edge on the class (mirroring the `property_declaration` handler), guarded so it only fires when a parent class is in scope and the target is not the class node itself. Normal `simple_parameter` behaviour is unchanged. Adds a promoted-property constructor to tests/fixtures/sample.php and test_php_constructor_property_promotion_contexts asserting the promoted type appears as both `field` and `parameter_type`, and that a non-promoted param does not leak a field edge. --- graphify/extract.py | 15 ++++++++++++++- tests/fixtures/sample.php | 7 +++++++ tests/test_languages.py | 10 ++++++++++ 3 files changed, 31 insertions(+), 1 deletion(-) diff --git a/graphify/extract.py b/graphify/extract.py index 6887d08e9..e6c611d13 100644 --- a/graphify/extract.py +++ b/graphify/extract.py @@ -4069,8 +4069,14 @@ def _emit_java_parent_type(type_node, rel: str, at_line: int) -> None: break if params_container is not None: for p in params_container.children: - if p.type != "simple_parameter": + # PHP 8 constructor property promotion (`__construct(private + # Repo $repo)`) parses the promoted param as + # property_promotion_parameter, not simple_parameter. Its + # type sits in the same direct named child shape, so accept + # both here; a promoted param is additionally a class field. + if p.type not in ("simple_parameter", "property_promotion_parameter"): continue + is_promoted = p.type == "property_promotion_parameter" type_node = None for sub in p.children: if sub.type in ("named_type", "primitive_type", "nullable_type", @@ -4084,6 +4090,13 @@ def _emit_java_parent_type(type_node, rel: str, at_line: int) -> None: target_nid = ensure_named_node(ref_name, line) if target_nid != func_nid: add_edge(func_nid, target_nid, "references", line, context=ctx) + # A promoted param declares a real class field; mirror + # the property_declaration field-context edge so the + # type is discoverable as a class field too. + if is_promoted and parent_class_nid and target_nid != parent_class_nid: + fctx = "generic_arg" if role == "generic_arg" else "field" + add_edge(parent_class_nid, target_nid, "references", + line, context=fctx) return_node = _php_method_return_type_node(node) if return_node is not None: refs = [] diff --git a/tests/fixtures/sample.php b/tests/fixtures/sample.php index 1397f5631..5ff337af4 100644 --- a/tests/fixtures/sample.php +++ b/tests/fixtures/sample.php @@ -66,6 +66,13 @@ public function log(): void } } +class Service +{ + public function __construct(private Result $result, string $label) + { + } +} + function parseResponse(string $raw): array { return json_decode($raw, true); diff --git a/tests/test_languages.py b/tests/test_languages.py index 8227131fb..95dfd9a19 100644 --- a/tests/test_languages.py +++ b/tests/test_languages.py @@ -777,6 +777,16 @@ def test_php_property_parameter_and_return_contexts(): assert ("run", "Result") in _edge_labels(r, "references", "return_type") +def test_php_constructor_property_promotion_contexts(): + # PHP 8 constructor property promotion: a promoted param is both a + # constructor parameter (parameter_type) and a class field (field). + r = extract_php(FIXTURES / "sample.php") + assert ("Service", "Result") in _edge_labels(r, "references", "field") + assert ("__construct", "Result") in _edge_labels(r, "references", "parameter_type") + # A non-promoted param must not leak a field edge onto the class. + assert ("Service", "string") not in _edge_labels(r, "references", "field") + + # ── Swift ──────────────────────────────────────────────────────────────────── def test_swift_no_error(): From bb5e5192df14a522cf0c8ec72ca1bd5452af29da Mon Sep 17 00:00:00 2001 From: Synvoya <16019863+Synvoya@users.noreply.github.com> Date: Thu, 2 Jul 2026 00:24:22 +1000 Subject: [PATCH 6/9] fix(csharp): emit type references for properties MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The C# class-body walker only handled field_declaration, so a property's type produced no references(field) edge. In idiomatic C#, auto-properties (`public Widget Main { get; set; }`) — not bare fields — are the standard way to declare state, so this silently dropped most of a class's type relationships. Add a property_declaration branch alongside the field_declaration handler, guarded the same way (ts_module == tree_sitter_c_sharp, parent_class_nid set). A property exposes its type on the node directly (no variable_declaration wrapper), so read it via child_by_field_name("type") and collect refs with _csharp_collect_type_refs, mirroring the Java/PHP/Kotlin siblings so List yields both the List field ref and the Widget generic_arg ref. Only emit when target != parent_class_nid. --- graphify/extract.py | 29 +++++++++++++++++++++++++++++ tests/fixtures/sample.cs | 4 ++++ tests/test_languages.py | 11 +++++++++++ 3 files changed, 44 insertions(+) diff --git a/graphify/extract.py b/graphify/extract.py index e6c611d13..5a3ec2ab7 100644 --- a/graphify/extract.py +++ b/graphify/extract.py @@ -3786,6 +3786,35 @@ def _emit_java_parent_type(type_node, rel: str, at_line: int) -> None: "references", line, context="field", metadata=metadata) return + if (config.ts_module == "tree_sitter_c_sharp" + and t == "property_declaration" + and parent_class_nid): + # C# auto-properties (`public Widget Main { get; set; }`) are the + # idiomatic way to declare state, yet only field_declaration was + # handled — so property types produced no references edge. Unlike a + # field, a property exposes its type on the node directly (no + # variable_declaration wrapper), so read it straight off the `type` + # field. Use _csharp_collect_type_refs (like the Java/PHP/Kotlin + # siblings) so `List` yields both the List field ref and the + # Widget generic_arg ref. + type_node = node.child_by_field_name("type") + if type_node is not None: + line = node.start_point[0] + 1 + refs: list[tuple[str, str, bool, str]] = [] + _csharp_collect_type_refs(type_node, source, False, refs) + for ref_name, role, qualified, qualifier in refs: + ctx = "generic_arg" if role == "generic_arg" else "field" + target_nid = ensure_named_node(ref_name, line) + if target_nid != parent_class_nid: + metadata = {"ref_token": ref_name} + if qualified: + metadata["qualified"] = True + if qualifier: + metadata["ref_qualifier"] = qualifier + add_edge(parent_class_nid, target_nid, "references", + line, context=ctx, metadata=metadata) + return + if (config.ts_module == "tree_sitter_java" and t == "field_declaration" and parent_class_nid): diff --git a/tests/fixtures/sample.cs b/tests/fixtures/sample.cs index 4ee1b06e1..729651c61 100644 --- a/tests/fixtures/sample.cs +++ b/tests/fixtures/sample.cs @@ -21,6 +21,10 @@ public class DataProcessor : Processor, IProcessor { private readonly HttpClient _client; + public Processor Owner { get; set; } + + public List Workers { get; set; } + public DataProcessor() { _client = new HttpClient(); diff --git a/tests/test_languages.py b/tests/test_languages.py index 95dfd9a19..318320526 100644 --- a/tests/test_languages.py +++ b/tests/test_languages.py @@ -542,6 +542,17 @@ def test_csharp_field_type_references_have_field_context(): ), "DataProcessor field declarations should reference HttpClient with field context" +def test_csharp_property_type_references_have_field_context(): + r = extract_csharp(FIXTURES / "sample.cs") + field_refs = _edge_labels(r, "references", "field") + # `public Processor Owner { get; set; }` — property type -> field ref. + assert ("DataProcessor", "Processor") in field_refs + # `public List Workers { get; set; }` — the List container -> field. + assert ("DataProcessor", "List") in field_refs + # ...and the generic argument -> generic_arg. + assert ("DataProcessor", "Processor") in _edge_labels(r, "references", "generic_arg") + + def test_csharp_call_edges_have_call_context(): r = extract_csharp(FIXTURES / "sample.cs") node_by_id = {n["id"]: n["label"] for n in r["nodes"]} From 21bcb436b58c5922e437e6abbd3dfbe95a14ad4b Mon Sep 17 00:00:00 2001 From: Synvoya <16019863+Synvoya@users.noreply.github.com> Date: Thu, 2 Jul 2026 00:30:26 +1000 Subject: [PATCH 7/9] fix(cpp): emit generic_arg references for base-class template arguments The C++ base_class_clause handler's `template_type` branch read the base name (`sub.child_by_field_name("name")`) and emitted the `inherits` edge, but never descended into the base's `template_argument_list`. As a result `class Car : public Base` emitted `Car -> Base` (inherits) yet dropped the `Car -> Dep` generic_arg reference entirely. The Java handler `_emit_java_parent_type` already emits these generic_arg references for base-class type arguments; C++ was the asymmetric gap. Fix: after emitting the `inherits` edge, grab the base's `arguments` field (the `template_argument_list`) and run `_cpp_collect_type_refs` over each named argument with the generic flag set, emitting a `references` edge (context "generic_arg") per collected type, guarding target != class node. `_cpp_collect_type_refs` already handles nested/qualified args, so `Base>` is covered too. Adds a templated base (`Connection`) + derived class (`PooledClient : public Connection`) to tests/fixtures/sample.cpp and a test mirroring the Java generic-parents test. --- graphify/extract.py | 20 ++++++++++++++++++++ tests/fixtures/sample.cpp | 11 +++++++++++ tests/test_languages.py | 9 +++++++++ 3 files changed, 40 insertions(+) diff --git a/graphify/extract.py b/graphify/extract.py index 5a3ec2ab7..9b366abac 100644 --- a/graphify/extract.py +++ b/graphify/extract.py @@ -3668,6 +3668,7 @@ def _emit_java_parent_type(type_node, rel: str, at_line: int) -> None: continue for sub in child.children: base = "" + template_args_node = None if sub.type == "type_identifier": base = _read_text(sub, source) elif sub.type == "qualified_identifier": @@ -3679,6 +3680,12 @@ def _emit_java_parent_type(type_node, rel: str, at_line: int) -> None: elif sub.type == "template_type": tname = sub.child_by_field_name("name") base = _read_text(tname, source) if tname else _read_text(sub, source) + # The base's template_argument_list carries generic + # type arguments (class Car : public Base). The + # Java handler (_emit_java_parent_type) emits these as + # generic_arg references; C++ dropped them because we + # only emitted the `inherits` edge on the base name. + template_args_node = sub.child_by_field_name("arguments") else: continue if not base: @@ -3696,6 +3703,19 @@ def _emit_java_parent_type(type_node, rel: str, at_line: int) -> None: }) seen_ids.add(base_nid) add_edge(class_nid, base_nid, "inherits", line) + # Emit a generic_arg reference for each type argument on the + # base (Base -> Car references Dep). _cpp_collect_type_refs + # handles nested/qualified args (Base>) too. + if template_args_node is not None: + arg_refs: list[tuple[str, str]] = [] + for arg in template_args_node.children: + if arg.is_named: + _cpp_collect_type_refs(arg, source, True, arg_refs) + for ref_name, _role in arg_refs: + target_nid = ensure_named_node(ref_name, line) + if target_nid != class_nid: + add_edge(class_nid, target_nid, "references", + line, context="generic_arg") # Find body and recurse body = _find_body(node, config) diff --git a/tests/fixtures/sample.cpp b/tests/fixtures/sample.cpp index f48f83355..aa68e6ec7 100644 --- a/tests/fixtures/sample.cpp +++ b/tests/fixtures/sample.cpp @@ -36,6 +36,17 @@ struct RetryingHttpClient : HttpClient { int maxRetries; }; +template +class Connection { +public: + T resource; +}; + +class PooledClient : public Connection { +public: + int poolSize; +}; + int main() { HttpClient client("https://api.example.com"); std::string response = client.get("/users"); diff --git a/tests/test_languages.py b/tests/test_languages.py index 318320526..70ae13504 100644 --- a/tests/test_languages.py +++ b/tests/test_languages.py @@ -227,6 +227,15 @@ def test_cpp_struct_inherits_edge(): assert found, "RetryingHttpClient (struct) should have inherits edge to HttpClient" +def test_cpp_generic_parents_include_type_argument_references(): + """`class PooledClient : public Connection` must emit the inherits + edge to Connection AND a generic_arg reference to the HttpClient type argument, + matching the Java base-class behaviour (_emit_java_parent_type).""" + r = extract_cpp(FIXTURES / "sample.cpp") + assert ("PooledClient", "Connection") in _edge_labels(r, "inherits") + assert ("PooledClient", "HttpClient") in _edge_labels(r, "references", "generic_arg") + + # ── CUDA ────────────────────────────────────────────────────────────────────── # CUDA is a C++ superset, so .cu/.cuh route through the C++ (tree-sitter-cpp) # extractor. These tests guard that __global__/__device__ kernels, host From ad7015262b8876980c819ed20cd9dabe51facc8b Mon Sep 17 00:00:00 2001 From: Synvoya <16019863+Synvoya@users.noreply.github.com> Date: Thu, 2 Jul 2026 00:30:39 +1000 Subject: [PATCH 8/9] fix(swift): emit references for enum associated-value types The Swift `enum_entry` handler in `_swift_extra_walk` iterated the entry's children only for the `simple_identifier` case name (emitting a `case_of` edge) and never descended into the sibling `enum_type_parameters` node, where associated-value types live (`enum_type_parameters -> user_type -> type_identifier`). As a result `case started(Session)` silently dropped the `Event -> Session` type reference. Descend into each `enum_type_parameters` child after emitting `case_of`, run `_swift_collect_type_refs` over its named children, and emit a `references` edge from the enum node to each collected type (context `type`, or `generic_arg` for generic roles), guarding target != enum node. Mirrors the existing Swift property/parameter/return-type emit style. Fixture: add `case failed(Config)` to `NetworkError` in sample.swift. Test: assert (`NetworkError`, `Config`) in references(context=type). --- graphify/extract.py | 28 +++++++++++++++++++++++++--- tests/fixtures/sample.swift | 1 + tests/test_languages.py | 4 ++++ 3 files changed, 30 insertions(+), 3 deletions(-) diff --git a/graphify/extract.py b/graphify/extract.py index 9b366abac..4a99636b0 100644 --- a/graphify/extract.py +++ b/graphify/extract.py @@ -2598,16 +2598,37 @@ def _csharp_extra_walk(node, source: bytes, file_nid: str, stem: str, str_path: def _swift_extra_walk(node, source: bytes, file_nid: str, stem: str, str_path: str, nodes: list, edges: list, seen_ids: set, function_bodies: list, - parent_class_nid: str | None, add_node_fn, add_edge_fn) -> bool: + parent_class_nid: str | None, add_node_fn, add_edge_fn, + ensure_named_node_fn) -> bool: """Handle enum_entry for Swift. Returns True if handled.""" if node.type == "enum_entry" and parent_class_nid: + line = node.start_point[0] + 1 for child in node.children: if child.type == "simple_identifier": case_name = _read_text(child, source) case_nid = _make_id(parent_class_nid, case_name) - line = node.start_point[0] + 1 add_node_fn(case_nid, case_name, line) add_edge_fn(parent_class_nid, case_nid, "case_of", line) + # Associated-value types nest as `enum_type_parameters -> user_type -> + # type_identifier` (a sibling of the case-name simple_identifier). The + # case-name loop above never descends into them, so `case started(Session)` + # used to drop the Event -> Session reference entirely. Mirror the Swift + # property/parameter emit style: collect the type refs and emit a + # `references` edge from the ENUM node to each collected type. + for child in node.children: + if child.type != "enum_type_parameters": + continue + for grand in child.children: + if not grand.is_named: + continue + refs: list[tuple[str, str]] = [] + _swift_collect_type_refs(grand, source, False, refs) + for ref_name, role in refs: + ctx = "generic_arg" if role == "generic_arg" else "type" + target_nid = ensure_named_node_fn(ref_name, line) + if target_nid != parent_class_nid: + add_edge_fn(parent_class_nid, target_nid, "references", + line, context=ctx) return True return False @@ -4369,7 +4390,8 @@ def _emit_java_parent_type(type_node, rel: str, at_line: int) -> None: if config.ts_module == "tree_sitter_swift": if _swift_extra_walk(node, source, file_nid, stem, str_path, nodes, edges, seen_ids, function_bodies, - parent_class_nid, add_node, add_edge): + parent_class_nid, add_node, add_edge, + ensure_named_node): return # Python's `@property` / `@staticmethod` / `@classmethod` wrap the diff --git a/tests/fixtures/sample.swift b/tests/fixtures/sample.swift index 0a51d2fa1..64d5a42eb 100644 --- a/tests/fixtures/sample.swift +++ b/tests/fixtures/sample.swift @@ -51,6 +51,7 @@ enum NetworkError { case timeout case connectionFailed case unauthorized + case failed(Config) func describe() -> String { return "error" diff --git a/tests/test_languages.py b/tests/test_languages.py index 70ae13504..ca5169d54 100644 --- a/tests/test_languages.py +++ b/tests/test_languages.py @@ -900,6 +900,10 @@ def test_swift_enum_cases_have_case_of_edge(): case_edges = [e for e in r["edges"] if e["relation"] == "case_of"] assert len(case_edges) >= 2 +def test_swift_enum_associated_value_type_emits_references(): + r = extract_swift(FIXTURES / "sample.swift") + assert ("NetworkError", "Config") in _edge_labels(r, "references", "type") + def test_swift_finds_deinit(): r = extract_swift(FIXTURES / "sample.swift") assert any("deinit" in l for l in _labels(r)) From f4a799492670b3bf9865d4cbd6055407a5192d8e Mon Sep 17 00:00:00 2001 From: safishamsi Date: Wed, 1 Jul 2026 18:34:48 +0100 Subject: [PATCH 9/9] docs(changelog): note 7-language type-reference/inheritance fixes (#1587-#1593) Co-Authored-By: Claude Opus 4.8 (1M context) --- CHANGELOG.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0626098a6..456639297 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,14 @@ Full release notes with details on each version: [GitHub Releases](https://githu ## Unreleased +- Fix: type-reference / inheritance edge gaps closed across seven languages (all thanks @Synvoya): + - Scala: `var` field declarations now emit type `references` like `val` (#1587). + - PowerShell: class base types after `:` now emit `inherits` (first) / `implements` (rest), matching the C# convention (#1588). + - Objective-C: protocol-to-protocol adoption (`@protocol Derived `) now emits an `implements` edge (#1589). + - PHP: promoted constructor properties (`__construct(private Repo $r)`) now emit type `references` (method + class field) (#1590). + - C#: auto-properties (`public Widget Main { get; set; }`) now emit type `references` like fields, including generic args (#1591). + - C++: base-class template arguments (`class Car : Base`) now emit `generic_arg` references, matching the Java behavior (#1592). + - Swift: enum associated-value types (`case started(Session)`) now emit `references` (#1593). - Fix: cross-file name resolution now respects case in case-sensitive languages (#1581, thanks @sheik-hiiobd). Resolution matched identifiers case-insensitively for every language, so in Python/Rust/Go/Java/etc. `from pathlib import Path` resolved to an unrelated shell-script `export PATH=...` node — a single variable becoming the corpus's #1 god-node (266 false incoming edges on one real repo), inflating god-node rankings, `affected` blast-radius, and community assignment. Both the cross-file call resolver and the type-reference stub-rewire now match by exact case; only genuinely case-insensitive languages (PHP functions/classes, SQL, Nim) still fold. For case-sensitive languages this only ever removes false edges. - Fix: Julia qualified / relative / scoped-selected imports now emit edges (#1580, thanks @Synvoya). Only bare `using Foo` was handled; `using Base.Threads` (scoped), `using ..Parent` (relative import_path), and the scoped package of `import Base.Threads: nthreads` were dropped. - Fix: Rust tuple-struct field types now emit `references` edges (#1582, thanks @Synvoya). `struct Wrapper(Logger, Vec);` referenced nothing — positional fields nest under `ordered_field_declaration_list` with no `field_declaration` wrapper, the same shape as tuple enum variants (#1579); that path wasn't traversed for structs.