Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,15 @@ Full release notes with details on each version: [GitHub Releases](https://githu

## Unreleased

- Fix: type-reference / inheritance edge gaps closed across seven languages (all thanks @Synvoya):
- Scala: `var` field declarations now emit type `references` like `val` (#1587).
- PowerShell: class base types after `:` now emit `inherits` (first) / `implements` (rest), matching the C# convention (#1588).
- Objective-C: protocol-to-protocol adoption (`@protocol Derived <Base>`) now emits an `implements` edge (#1589).
- PHP: promoted constructor properties (`__construct(private Repo $r)`) now emit type `references` (method + class field) (#1590).
- C#: auto-properties (`public Widget Main { get; set; }`) now emit type `references` like fields, including generic args (#1591).
- C++: base-class template arguments (`class Car : Base<Dep>`) now emit `generic_arg` references, matching the Java behavior (#1592).
- Swift: enum associated-value types (`case started(Session)`) now emit `references` (#1593).
- Fix: cross-file name resolution now respects case in case-sensitive languages (#1581, thanks @sheik-hiiobd). Resolution matched identifiers case-insensitively for every language, so in Python/Rust/Go/Java/etc. `from pathlib import Path` resolved to an unrelated shell-script `export PATH=...` node — a single variable becoming the corpus's #1 god-node (266 false incoming edges on one real repo), inflating god-node rankings, `affected` blast-radius, and community assignment. Both the cross-file call resolver and the type-reference stub-rewire now match by exact case; only genuinely case-insensitive languages (PHP functions/classes, SQL, Nim) still fold. For case-sensitive languages this only ever removes false edges.
- Fix: Julia qualified / relative / scoped-selected imports now emit edges (#1580, thanks @Synvoya). Only bare `using Foo` was handled; `using Base.Threads` (scoped), `using ..Parent` (relative import_path), and the scoped package of `import Base.Threads: nthreads` were dropped.
- Fix: Rust tuple-struct field types now emit `references` edges (#1582, thanks @Synvoya). `struct Wrapper(Logger, Vec<Config>);` referenced nothing — positional fields nest under `ordered_field_declaration_list` with no `field_declaration` wrapper, the same shape as tuple enum variants (#1579); that path wasn't traversed for structs.
- Fix: SystemVerilog class properties with leading qualifiers now emit field `references` (#1583, thanks @Synvoya). The field regex only matched unqualified `<type> <name>;`, so `rand Config x;` / `protected Base b;` (qualifier + type + name) failed to match and their type references were dropped.
Expand Down
180 changes: 167 additions & 13 deletions graphify/extract.py

Large diffs are not rendered by default.

11 changes: 11 additions & 0 deletions tests/fixtures/sample.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,17 @@ struct RetryingHttpClient : HttpClient {
int maxRetries;
};

template <typename T>
class Connection {
public:
T resource;
};

class PooledClient : public Connection<HttpClient> {
public:
int poolSize;
};

int main() {
HttpClient client("https://api.example.com");
std::string response = client.get("/users");
Expand Down
4 changes: 4 additions & 0 deletions tests/fixtures/sample.cs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ public class DataProcessor : Processor, IProcessor
{
private readonly HttpClient _client;

public Processor Owner { get; set; }

public List<Processor> Workers { get; set; }

public DataProcessor()
{
_client = new HttpClient();
Expand Down
12 changes: 12 additions & 0 deletions tests/fixtures/sample.m
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,15 @@ - (void)fetch {
}

@end

@protocol Base

- (void)baseMethod;

@end

@protocol Derived <Base>

- (void)derivedMethod;

@end
7 changes: 7 additions & 0 deletions tests/fixtures/sample.php
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,13 @@ public function log(): void
}
}

class Service
{
public function __construct(private Result $result, string $label)
{
}
}

function parseResponse(string $raw): array
{
return json_decode($raw, true);
Expand Down
16 changes: 16 additions & 0 deletions tests/fixtures/sample.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,19 @@ class DataProcessor {
Set-Content -Path $path -Value $this.Source
}
}

class Shape {
[string]$Kind

[double] Area() {
return 0.0
}
}

class Circle : Shape {
[double]$Radius

[double] Area() {
return 3.14159 * $this.Radius * $this.Radius
}
}
1 change: 1 addition & 0 deletions tests/fixtures/sample.scala
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ abstract class BaseClient

class HttpClient(config: Config) extends BaseClient with Loggable {
val source: Config = config
var fallback: BaseClient = null

def get(path: String): String = {
buildRequest("GET", path)
Expand Down
1 change: 1 addition & 0 deletions tests/fixtures/sample.swift
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ enum NetworkError {
case timeout
case connectionFailed
case unauthorized
case failed(Config)

func describe() -> String {
return "error"
Expand Down
87 changes: 87 additions & 0 deletions tests/test_case_sensitive_resolution.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
"""Cross-file name resolution respects case in case-sensitive languages (#1581).

Case is semantic in most languages: `Path` (a class), `PATH` (an env var), and
`path` (a variable) are distinct. Cross-file resolution used to fold case for every
language, so `from pathlib import Path` (ubiquitous) resolved to a shell script's
`export PATH=...` node — turning one shell variable into the corpus's #1 god-node.

These tests pin: case-sensitive languages match by exact case (removing that false
edge), while genuinely case-insensitive languages (PHP) still fold.
"""
from __future__ import annotations

import os
from pathlib import Path

from graphify.extract import extract


def _extract(tmp_path, files: dict[str, str]):
for name, body in files.items():
(tmp_path / name).write_text(body)
old = os.getcwd()
try:
os.chdir(tmp_path)
r = extract([Path(n) for n in files], cache_root=tmp_path)
finally:
os.chdir(old)
return r


def _labels(r):
return {n["id"]: n["label"] for n in r["nodes"]}


def test_python_Path_does_not_resolve_to_shell_PATH(tmp_path):
r = _extract(tmp_path, {
"run.sh": "export PATH=/usr/local/bin:$PATH\n",
"mod.py": (
"from pathlib import Path\n"
"def load(p: Path) -> Path:\n return Path(p)\n"
"def other():\n return load(Path('x'))\n"
),
})
lbl = _labels(r)
path_nid = next((n["id"] for n in r["nodes"] if n["label"] == "PATH"), None)
assert path_nid is not None
# No edge from the Python functions should land on the shell PATH node
false_edges = [
e for e in r["edges"]
if e["target"] == path_nid and lbl.get(e["source"], "").startswith(("load", "other"))
]
assert not false_edges, f"Python Path leaked onto shell PATH: {false_edges}"
# PATH keeps only its own `defines` edge (from run.sh), not a false super-hub
assert sum(1 for e in r["edges"] if e["target"] == path_nid) <= 1


def test_case_sensitive_cross_file_ref_respects_case(tmp_path):
r = _extract(tmp_path, {
"consts.rs": 'pub const PATH: &str = "/x";\n',
"use.rs": "struct Wrap(Path);\n", # `Path` — no such node in the corpus
})
lbl = _labels(r)
path_nid = next((n["id"] for n in r["nodes"] if n["label"] == "PATH"), None)
xref = [e for e in r["edges"] if e["target"] == path_nid and lbl.get(e["source"]) == "Wrap"]
assert not xref, "a `Path` reference must not resolve to a case-differing `PATH`"


def test_exact_case_cross_file_still_resolves(tmp_path):
r = _extract(tmp_path, {
"h.py": "def helper():\n return 1\n",
"m.py": "from h import helper\ndef go():\n return helper()\n",
})
lbl = _labels(r)
calls = {(lbl.get(e["source"]), lbl.get(e["target"]))
for e in r["edges"] if e["relation"] == "calls"}
assert ("go()", "helper()") in calls


def test_php_case_insensitive_resolution_preserved(tmp_path):
r = _extract(tmp_path, {
"lib.php": "<?php\nfunction Greet() { return 1; }\n",
"main.php": "<?php\nfunction run() { return greet(); }\n",
})
lbl = _labels(r)
calls = {(lbl.get(e["source"]), lbl.get(e["target"]))
for e in r["edges"] if e["relation"] == "calls"}
assert ("run()", "Greet()") in calls, "PHP identifiers are case-insensitive; fold must still apply"
56 changes: 56 additions & 0 deletions tests/test_languages.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,15 @@ def test_cpp_struct_inherits_edge():
assert found, "RetryingHttpClient (struct) should have inherits edge to HttpClient"


def test_cpp_generic_parents_include_type_argument_references():
"""`class PooledClient : public Connection<HttpClient>` must emit the inherits
edge to Connection AND a generic_arg reference to the HttpClient type argument,
matching the Java base-class behaviour (_emit_java_parent_type)."""
r = extract_cpp(FIXTURES / "sample.cpp")
assert ("PooledClient", "Connection") in _edge_labels(r, "inherits")
assert ("PooledClient", "HttpClient") in _edge_labels(r, "references", "generic_arg")


# ── CUDA ──────────────────────────────────────────────────────────────────────
# CUDA is a C++ superset, so .cu/.cuh route through the C++ (tree-sitter-cpp)
# extractor. These tests guard that __global__/__device__ kernels, host
Expand Down Expand Up @@ -542,6 +551,17 @@ def test_csharp_field_type_references_have_field_context():
), "DataProcessor field declarations should reference HttpClient with field context"


def test_csharp_property_type_references_have_field_context():
r = extract_csharp(FIXTURES / "sample.cs")
field_refs = _edge_labels(r, "references", "field")
# `public Processor Owner { get; set; }` — property type -> field ref.
assert ("DataProcessor", "Processor") in field_refs
# `public List<Processor> Workers { get; set; }` — the List container -> field.
assert ("DataProcessor", "List") in field_refs
# ...and the generic argument -> generic_arg.
assert ("DataProcessor", "Processor") in _edge_labels(r, "references", "generic_arg")


def test_csharp_call_edges_have_call_context():
r = extract_csharp(FIXTURES / "sample.cs")
node_by_id = {n["id"]: n["label"] for n in r["nodes"]}
Expand Down Expand Up @@ -655,6 +675,11 @@ def test_scala_val_definition_field_context():
assert ("HttpClient", "Config") in _edge_labels(r, "references", "field")


def test_scala_var_definition_field_context():
r = extract_scala(FIXTURES / "sample.scala")
assert ("HttpClient", "BaseClient") in _edge_labels(r, "references", "field")


def test_scala_method_return_type_context():
r = extract_scala(FIXTURES / "sample.scala")
assert ("create", "HttpClient") in _edge_labels(r, "references", "return_type")
Expand Down Expand Up @@ -772,6 +797,16 @@ def test_php_property_parameter_and_return_contexts():
assert ("run", "Result") in _edge_labels(r, "references", "return_type")


def test_php_constructor_property_promotion_contexts():
# PHP 8 constructor property promotion: a promoted param is both a
# constructor parameter (parameter_type) and a class field (field).
r = extract_php(FIXTURES / "sample.php")
assert ("Service", "Result") in _edge_labels(r, "references", "field")
assert ("__construct", "Result") in _edge_labels(r, "references", "parameter_type")
# A non-promoted param must not leak a field edge onto the class.
assert ("Service", "string") not in _edge_labels(r, "references", "field")


# ── Swift ────────────────────────────────────────────────────────────────────

def test_swift_no_error():
Expand Down Expand Up @@ -865,6 +900,10 @@ def test_swift_enum_cases_have_case_of_edge():
case_edges = [e for e in r["edges"] if e["relation"] == "case_of"]
assert len(case_edges) >= 2

def test_swift_enum_associated_value_type_emits_references():
r = extract_swift(FIXTURES / "sample.swift")
assert ("NetworkError", "Config") in _edge_labels(r, "references", "type")

def test_swift_finds_deinit():
r = extract_swift(FIXTURES / "sample.swift")
assert any("deinit" in l for l in _labels(r))
Expand Down Expand Up @@ -1059,6 +1098,16 @@ def test_objc_splits_inherits_and_implements():
assert ("Animal", "SampleDelegate") in _edge_labels(r, "implements")


def test_objc_protocol_adopts_protocol():
"""`@protocol Derived <Base>` must emit an implements edge Derived->Base.
Protocol-on-protocol adoption nests under a protocol_reference_list node
(distinct from the parameterized_arguments node used by @interface
adoption), so the edge was previously dropped. Protocol nodes are labeled
`<Name>`, so the edge reads (<Derived>, <Base>)."""
r = extract_objc(FIXTURES / "sample.m")
assert ("<Derived>", "<Base>") in _edge_labels(r, "implements")


def test_objc_property_type_context():
r = extract_objc(FIXTURES / "sample.m")
assert ("Animal", "NSString") in _edge_labels(r, "references", "field")
Expand Down Expand Up @@ -1626,6 +1675,13 @@ def test_powershell_finds_class_and_method():
assert any("Transform" in l for l in labels)


def test_powershell_class_base_type_emits_inherits_edge():
# `class Circle : Shape` — the base type after ':' was previously dropped
# because the handler only read the first simple_name (the class name).
r = extract_powershell(FIXTURES / "sample.ps1")
assert ("Circle", "Shape") in _edge_labels(r, "inherits")


def test_powershell_property_field_type_context():
r = extract_powershell(FIXTURES / "sample.ps1")
assert ("DataProcessor", "string") in _edge_labels(r, "references", "field")
Expand Down
Loading