From 10d39e0095d720508eed94d96aad25b57a9d725d Mon Sep 17 00:00:00 2001 From: Teun Huijben Date: Fri, 1 May 2026 11:27:05 -0700 Subject: [PATCH 1/5] view.remove_node speedup by not looping over all edges every time --- src/tracksdata/graph/_graph_view.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/src/tracksdata/graph/_graph_view.py b/src/tracksdata/graph/_graph_view.py index a9350258..cd3b814a 100644 --- a/src/tracksdata/graph/_graph_view.py +++ b/src/tracksdata/graph/_graph_view.py @@ -465,23 +465,20 @@ def remove_node(self, node_id: int) -> None: # Get the local node ID and remove from local graph local_node_id = self._external_to_local[node_id] + # Capture incident edges BEFORE removal. rustworkx drops them along with + # the node; afterwards we'd have no way to identify which entries to + # clean from `_edge_map_to_root` without scanning the whole bookkeeping. + incident_local_edge_ids = list(self.rx_graph.incident_edges(local_node_id)) + with self.node_removed.blocked(): super().remove_node(local_node_id) # Remove the node mapping self._remove_id_mapping(external_id=node_id) - # Update edge mappings - remove edges involving this node - edges_to_remove = [] - edge_indices = self.rx_graph.edge_indices() - for local_edge_id, _ in list(self._edge_map_to_root.items()): - # Check if this edge is still in the local graph - if local_edge_id not in edge_indices: - edges_to_remove.append(local_edge_id) - - for edge_id in edges_to_remove: - if edge_id in self._edge_map_to_root: - del self._edge_map_to_root[edge_id] + # Drop just the affected edges from the bookkeeping + for edge_id in incident_local_edge_ids: + self._edge_map_to_root.pop(edge_id, None) else: self._out_of_sync = True From ee564b562b6156f80248f9eae7b50e27d63704dc Mon Sep 17 00:00:00 2001 From: Teun Huijben Date: Fri, 1 May 2026 12:57:43 -0700 Subject: [PATCH 2/5] view.update_node_attrs defined new_attrs as old + applied changes. This saves a double fetch of the attributes from the root. --- src/tracksdata/graph/_graph_view.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/tracksdata/graph/_graph_view.py b/src/tracksdata/graph/_graph_view.py index cd3b814a..d617c212 100644 --- a/src/tracksdata/graph/_graph_view.py +++ b/src/tracksdata/graph/_graph_view.py @@ -2,6 +2,7 @@ from typing import Any, Literal, cast, overload import bidict +import numpy as np import polars as pl import rustworkx as rx @@ -737,12 +738,18 @@ def update_node_attrs( self._out_of_sync = True if view_signal_on or root_signal_on: - new_attrs_by_id = ( - self._root.filter(node_ids=node_ids) - .node_attrs(attr_keys=signal_keys) - .rows_by_key(key=DEFAULT_ATTR_KEYS.NODE_ID, named=True, unique=True, include_key=True) - ) old_attrs_by_id = cast(dict[int, dict[str, Any]], old_attrs_by_id) # for mypy + # Derive new_attrs by overlaying applied `attrs` onto old_attrs, instead of + # re-querying root. Mirrors the broadcasting semantics of + # `_root.update_node_attrs`: scalars apply to all nodes, sequences index by + # position in `node_ids`. + new_attrs_by_id: dict[int, dict[str, Any]] = {} + for i, node_id in enumerate(node_ids): + new_attrs = dict(old_attrs_by_id[node_id]) + for k, v in attrs.items(): + if k in new_attrs: + new_attrs[k] = v if np.isscalar(v) else v[i] + new_attrs_by_id[node_id] = new_attrs if root_signal_on: for node_id in node_ids: self._root.node_updated.emit( From 5138caa491b27070a878c1008fd5f327e8839249 Mon Sep 17 00:00:00 2001 From: Teun Huijben Date: Fri, 1 May 2026 13:06:30 -0700 Subject: [PATCH 3/5] add benchmarks for update_node_attrs with listener (used in GraphArrayView, for example) --- benchmarks/graph_mutations.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/benchmarks/graph_mutations.py b/benchmarks/graph_mutations.py index d7f9563c..7978ca27 100644 --- a/benchmarks/graph_mutations.py +++ b/benchmarks/graph_mutations.py @@ -30,6 +30,10 @@ N_LINEAGES = 50 +def _noop(*_args) -> None: + """No-op slot to enable the `node_updated` signal-payload path in benchmarks.""" + + def _build_graph(backend_name: str, n_nodes: int) -> td.graph.BaseGraph: graph = BACKENDS[backend_name]() graph.add_node_attr_key("score", dtype=pl.Float64) @@ -60,6 +64,16 @@ def setup(self, backend_name: str, n_nodes: int) -> None: self.removal_targets = all_ids[:N_OPS] self.update_targets = all_ids[: N_OPS * 4] + # Separate view with a no-op listener attached. Without a listener, + # update_node_attrs skips the signal-payload computation entirely, so + # the P2-2 optimization (deriving new_attrs from old + applied) isn't + # exercised. This view is the BBoxSpatialFilter / GraphArrayView use case. + self.listened_view = self.graph.filter().subgraph() + self.listened_view.node_updated.connect(_noop) + # Smaller batch, representative of interactive editing where the saved + # query overhead is a larger fraction of the total work. + self.listener_update_targets = all_ids[:N_OPS] + # --- remove_node ------------------------------------------------------ def time_remove_node_root(self, backend_name: str, n_nodes: int) -> None: @@ -78,6 +92,9 @@ def time_update_node_attrs_root(self, backend_name: str, n_nodes: int) -> None: def time_update_node_attrs_view(self, backend_name: str, n_nodes: int) -> None: self.view.update_node_attrs(node_ids=self.update_targets, attrs={"score": 1.0}) + def time_update_node_attrs_view_with_listener(self, backend_name: str, n_nodes: int) -> None: + self.listened_view.update_node_attrs(node_ids=self.listener_update_targets, attrs={"score": 1.0}) + # --- filter (standalone, materialized to ids) ------------------------ def time_filter_node_ids(self, backend_name: str, n_nodes: int) -> None: From b90f158c3e0f92d691968808408e927cfd736eb2 Mon Sep 17 00:00:00 2001 From: Teun Huijben Date: Fri, 1 May 2026 15:06:16 -0700 Subject: [PATCH 4/5] graphview helpers that allow removing nodes/edges only from the view, not the _root --- src/tracksdata/graph/_graph_view.py | 148 +++++++++++++++--- src/tracksdata/graph/_test/test_subgraph.py | 159 ++++++++++++++++++++ 2 files changed, 285 insertions(+), 22 deletions(-) diff --git a/src/tracksdata/graph/_graph_view.py b/src/tracksdata/graph/_graph_view.py index d617c212..9ea28185 100644 --- a/src/tracksdata/graph/_graph_view.py +++ b/src/tracksdata/graph/_graph_view.py @@ -429,6 +429,29 @@ def bulk_add_nodes(self, nodes: list[dict[str, Any]], indices: list[int] | None return parent_node_ids + def _remove_node_local(self, node_id: int) -> None: + """ + Remove a node from this view's local rx_graph and ID mappings only. + + No validation, no signals, no root call. Caller is responsible for those. + """ + local_node_id = self._external_to_local[node_id] + + # Capture incident edges BEFORE removal. rustworkx drops them along with + # the node; afterwards we'd have no way to identify which entries to + # clean from `_edge_map_to_root` without scanning the whole bookkeeping. + # `all_edges=True` is required — the default returns only out-edges, + # which would leave in-edge bookkeeping stale. + incident_local_edge_ids = list(self.rx_graph.incident_edges(local_node_id, all_edges=True)) + + with self.node_removed.blocked(): + super().remove_node(local_node_id) + + self._remove_id_mapping(external_id=node_id) + + for edge_id in incident_local_edge_ids: + self._edge_map_to_root.pop(edge_id, None) + def remove_node(self, node_id: int) -> None: """ Remove a node from the graph. @@ -463,23 +486,7 @@ def remove_node(self, node_id: int) -> None: self._root.remove_node(node_id) if self.sync: - # Get the local node ID and remove from local graph - local_node_id = self._external_to_local[node_id] - - # Capture incident edges BEFORE removal. rustworkx drops them along with - # the node; afterwards we'd have no way to identify which entries to - # clean from `_edge_map_to_root` without scanning the whole bookkeeping. - incident_local_edge_ids = list(self.rx_graph.incident_edges(local_node_id)) - - with self.node_removed.blocked(): - super().remove_node(local_node_id) - - # Remove the node mapping - self._remove_id_mapping(external_id=node_id) - - # Drop just the affected edges from the bookkeeping - for edge_id in incident_local_edge_ids: - self._edge_map_to_root.pop(edge_id, None) + self._remove_node_local(node_id) else: self._out_of_sync = True @@ -488,6 +495,46 @@ def remove_node(self, node_id: int) -> None: if view_signal_on: self.node_removed.emit(node_id, old_attrs) + def remove_node_from_view(self, node_id: int) -> None: + """ + Remove a node from this view only, leaving the root graph untouched. + + The view's local rx_graph and ID mappings are updated; the root is not + modified. After this call the view no longer represents a strict filter + of the root, but its internal state is consistent and traversals + (successors/predecessors) continue to work. + + Only the view's `node_removed` signal fires — the root signal does not, + because the root did not change. + + Parameters + ---------- + node_id : int + The ID of the node to remove from the view. + + Raises + ------ + ValueError + If the node_id does not exist in the view. + RuntimeError + If `sync=False` — view-only removal requires a maintained local view. + """ + if node_id not in self._external_to_local: + raise ValueError(f"Node {node_id} does not exist in the graph.") + if not self.sync: + raise RuntimeError( + "remove_node_from_view requires sync=True; the local view is not maintained otherwise." + ) + + view_signal_on = is_signal_on(self.node_removed) + if view_signal_on: + old_attrs = self.nodes[node_id].to_dict() + + self._remove_node_local(node_id) + + if view_signal_on: + self.node_removed.emit(node_id, old_attrs) + def add_edge( self, source_id: int, @@ -542,6 +589,18 @@ def bulk_add_edges(self, edges: list[dict[str, Any]], return_ids: bool = False) if return_ids: return parent_edge_ids + def _remove_edge_local(self, edge_id: int) -> None: + """ + Remove an edge from this view's local rx_graph and edge mapping only. + + No validation, no root call. Caller guarantees `edge_id` (root id) is + present in `self._edge_map_from_root`. + """ + local_edge_id = self._edge_map_from_root[edge_id] + src, tgt, _ = self.rx_graph.edge_index_map()[local_edge_id] + self.rx_graph.remove_edge(src, tgt) + del self._edge_map_to_root[local_edge_id] + def remove_edge( self, source_id: int | None = None, @@ -566,14 +625,59 @@ def remove_edge( # Remove from the local graph if synced if self.sync: if edge_id in self._edge_map_from_root: - local_edge_id = self._edge_map_from_root[edge_id] - edge_map = self.rx_graph.edge_index_map() - src, tgt, _ = edge_map[local_edge_id] - self.rx_graph.remove_edge(src, tgt) - del self._edge_map_to_root[local_edge_id] + self._remove_edge_local(edge_id) else: self._out_of_sync = True + def remove_edge_from_view( + self, + source_id: int | None = None, + target_id: int | None = None, + *, + edge_id: int | None = None, + ) -> None: + """ + Remove an edge from this view only, leaving the root graph untouched. + + Resolves the edge by `edge_id` or by `(source_id, target_id)`. The root + graph is not modified, so the view will diverge from a strict filter of + the root. + + Parameters + ---------- + source_id : int, optional + Source node id of the edge. Required if `edge_id` is not given. + target_id : int, optional + Target node id of the edge. Required if `edge_id` is not given. + edge_id : int, optional + Root edge id. If given, `source_id` and `target_id` are ignored. + + Raises + ------ + ValueError + If neither `edge_id` nor both endpoints are given, or the edge is + not in the view. + RuntimeError + If `sync=False` — view-only removal requires a maintained local view. + """ + if not self.sync: + raise RuntimeError( + "remove_edge_from_view requires sync=True; the local view is not maintained otherwise." + ) + + if edge_id is None: + if source_id is None or target_id is None: + raise ValueError("Provide either edge_id or both source_id and target_id.") + try: + edge_id = self._root.edge_id(source_id, target_id) + except rx.NoEdgeBetweenNodes as e: + raise ValueError(f"Edge {source_id}->{target_id} does not exist in the graph.") from e + + if edge_id not in self._edge_map_from_root: + raise ValueError(f"Edge {edge_id} does not exist in the view.") + + self._remove_edge_local(edge_id) + def _get_neighbors( self, neighbors_func: Callable[[rx.PyDiGraph, int], rx.NodeIndices], diff --git a/src/tracksdata/graph/_test/test_subgraph.py b/src/tracksdata/graph/_test/test_subgraph.py index f38e54c1..639cf356 100644 --- a/src/tracksdata/graph/_test/test_subgraph.py +++ b/src/tracksdata/graph/_test/test_subgraph.py @@ -1143,6 +1143,165 @@ def test_graph_view_remove_edge(graph_backend: BaseGraph) -> None: view.remove_edge() +def test_remove_node_from_view_basic(graph_backend: BaseGraph) -> None: + """`remove_node_from_view` drops the node from the view but leaves the root untouched.""" + graph_backend.add_node_attr_key("x", pl.Float64) + graph_backend.add_edge_attr_key("weight", pl.Float64) + + n0 = graph_backend.add_node({"t": 0, "x": 0.0}) + n1 = graph_backend.add_node({"t": 1, "x": 1.0}) + n2 = graph_backend.add_node({"t": 2, "x": 2.0}) + graph_backend.add_edge(n0, n1, {"weight": 0.2}) + graph_backend.add_edge(n1, n2, {"weight": 0.8}) + + view = graph_backend.filter().subgraph() + assert isinstance(view, GraphView) + + view.remove_node_from_view(n1) + + # View no longer has the node or its incident edges + assert n1 not in view.node_ids() + assert not view.has_node(n1) + assert not view.has_edge(n0, n1) + assert not view.has_edge(n1, n2) + + # Edge bookkeeping is cleaned in the view + view_edge_root_ids = set(view._edge_map_to_root.values()) + assert graph_backend.edge_id(n0, n1) not in view_edge_root_ids + assert graph_backend.edge_id(n1, n2) not in view_edge_root_ids + + # Root is untouched + assert n1 in graph_backend.node_ids() + assert graph_backend.has_node(n1) + assert graph_backend.has_edge(n0, n1) + assert graph_backend.has_edge(n1, n2) + + +def test_remove_node_from_view_traversals_still_work(graph_backend: BaseGraph) -> None: + """`_out_of_sync` is not set: successors/predecessors still work after view-only removal.""" + graph_backend.add_node_attr_key("x", pl.Float64) + n0 = graph_backend.add_node({"t": 0, "x": 0.0}) + n1 = graph_backend.add_node({"t": 1, "x": 1.0}) + n2 = graph_backend.add_node({"t": 2, "x": 2.0}) + graph_backend.add_edge(n0, n1, {}) + graph_backend.add_edge(n1, n2, {}) + + view = graph_backend.filter().subgraph() + view.remove_node_from_view(n1) + + assert view._out_of_sync is False + # Should not raise + view.successors(n0) + view.predecessors(n2) + + +def test_remove_node_from_view_signals(graph_backend: BaseGraph) -> None: + """Only the view's `node_removed` fires; the root's signal does not.""" + graph_backend.add_node_attr_key("x", pl.Float64) + n0 = graph_backend.add_node({"t": 0, "x": 0.0}) + n1 = graph_backend.add_node({"t": 1, "x": 1.0}) + + view = graph_backend.filter().subgraph() + + view_calls: list[tuple[int, dict]] = [] + root_calls: list[tuple[int, dict]] = [] + view.node_removed.connect(lambda nid, attrs: view_calls.append((nid, attrs))) + graph_backend.node_removed.connect(lambda nid, attrs: root_calls.append((nid, attrs))) + + view.remove_node_from_view(n1) + + assert len(root_calls) == 0 + assert len(view_calls) == 1 + assert view_calls[0][0] == n1 + assert view_calls[0][1]["x"] == 1.0 + # Root still has the node — the view-only removal did not touch it + assert graph_backend.has_node(n1) + # n0 is unaffected + _ = n0 + + +def test_remove_node_from_view_validation(graph_backend: BaseGraph) -> None: + """Missing node raises ValueError; sync=False raises RuntimeError.""" + graph_backend.add_node_attr_key("x", pl.Float64) + n0 = graph_backend.add_node({"t": 0, "x": 0.0}) + + view = graph_backend.filter().subgraph() + + with pytest.raises(ValueError, match=r"Node 999999 does not exist in the graph\."): + view.remove_node_from_view(999999) + + view.sync = False + with pytest.raises(RuntimeError, match=r"remove_node_from_view requires sync=True"): + view.remove_node_from_view(n0) + + +def test_remove_edge_from_view_basic(graph_backend: BaseGraph) -> None: + """`remove_edge_from_view` drops the edge from the view but leaves the root edge intact.""" + graph_backend.add_node_attr_key("x", pl.Float64) + graph_backend.add_edge_attr_key("weight", pl.Float64) + + n0 = graph_backend.add_node({"t": 0, "x": 0.0}) + n1 = graph_backend.add_node({"t": 1, "x": 1.0}) + n2 = graph_backend.add_node({"t": 2, "x": 2.0}) + graph_backend.add_edge(n0, n1, {"weight": 0.2}) + graph_backend.add_edge(n1, n2, {"weight": 0.8}) + + # Remove by endpoints + view = graph_backend.filter().subgraph() + view.remove_edge_from_view(n0, n1) + assert not view.has_edge(n0, n1) + assert graph_backend.has_edge(n0, n1) + # Other edge unaffected + assert view.has_edge(n1, n2) + + # Remove by edge_id on a fresh view + view2 = graph_backend.filter().subgraph() + eid = graph_backend.edge_id(n1, n2) + view2.remove_edge_from_view(edge_id=eid) + assert not view2.has_edge(n1, n2) + assert graph_backend.has_edge(n1, n2) + + +def test_remove_edge_from_view_traversals_still_work(graph_backend: BaseGraph) -> None: + """`_out_of_sync` stays False after view-only edge removal.""" + graph_backend.add_node_attr_key("x", pl.Float64) + n0 = graph_backend.add_node({"t": 0, "x": 0.0}) + n1 = graph_backend.add_node({"t": 1, "x": 1.0}) + graph_backend.add_edge(n0, n1, {}) + + view = graph_backend.filter().subgraph() + view.remove_edge_from_view(n0, n1) + + assert view._out_of_sync is False + view.successors(n0) + view.predecessors(n1) + + +def test_remove_edge_from_view_validation(graph_backend: BaseGraph) -> None: + """Bad inputs raise ValueError; sync=False raises RuntimeError.""" + graph_backend.add_node_attr_key("x", pl.Float64) + n0 = graph_backend.add_node({"t": 0, "x": 0.0}) + n1 = graph_backend.add_node({"t": 1, "x": 1.0}) + graph_backend.add_edge(n0, n1, {}) + + view = graph_backend.filter().subgraph() + + with pytest.raises(ValueError, match=r"Provide either edge_id or both source_id and target_id\."): + view.remove_edge_from_view() + + # Non-existent endpoints + with pytest.raises(ValueError, match=rf"Edge {n1}->{n0} does not exist in the graph\."): + view.remove_edge_from_view(n1, n0) + + # Edge id not in view + with pytest.raises(ValueError, match=r"Edge 999999 does not exist in the view\."): + view.remove_edge_from_view(edge_id=999999) + + view.sync = False + with pytest.raises(RuntimeError, match=r"remove_edge_from_view requires sync=True"): + view.remove_edge_from_view(n0, n1) + + @parametrize_subgraph_tests def test_has_node(graph_backend: BaseGraph, use_subgraph: bool) -> None: """Test has_node functionality on both original graphs and subgraphs.""" From 3750250bc1f0a21d08ec21f45aacec36b71c3347 Mon Sep 17 00:00:00 2001 From: Teun Huijben Date: Fri, 1 May 2026 15:06:40 -0700 Subject: [PATCH 5/5] precommit fix --- src/tracksdata/graph/_graph_view.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/tracksdata/graph/_graph_view.py b/src/tracksdata/graph/_graph_view.py index 9ea28185..98183c19 100644 --- a/src/tracksdata/graph/_graph_view.py +++ b/src/tracksdata/graph/_graph_view.py @@ -522,9 +522,7 @@ def remove_node_from_view(self, node_id: int) -> None: if node_id not in self._external_to_local: raise ValueError(f"Node {node_id} does not exist in the graph.") if not self.sync: - raise RuntimeError( - "remove_node_from_view requires sync=True; the local view is not maintained otherwise." - ) + raise RuntimeError("remove_node_from_view requires sync=True; the local view is not maintained otherwise.") view_signal_on = is_signal_on(self.node_removed) if view_signal_on: @@ -661,9 +659,7 @@ def remove_edge_from_view( If `sync=False` — view-only removal requires a maintained local view. """ if not self.sync: - raise RuntimeError( - "remove_edge_from_view requires sync=True; the local view is not maintained otherwise." - ) + raise RuntimeError("remove_edge_from_view requires sync=True; the local view is not maintained otherwise.") if edge_id is None: if source_id is None or target_id is None: