fix(langchain): harden langgraph resume detection

hassiebp · hassiebp · commit 9414be8c5bb3 · 2026-04-20T17:01:41.000+02:00
diff --git a/langfuse/langchain/CallbackHandler.py b/langfuse/langchain/CallbackHandler.py
@@ -1,3 +1,4 @@
+from collections import OrderedDict
 from contextvars import Token
 from typing import (
     Any,
@@ -85,12 +86,18 @@
 LANGSMITH_TAG_HIDDEN: str = "langsmith:hidden"
 CONTROL_FLOW_EXCEPTION_TYPES: Set[Type[BaseException]] = set()
 LANGGRAPH_COMMAND_TYPE: Optional[Type[Any]] = None
+MAX_PENDING_RESUME_TRACE_CONTEXTS = 1024
 
 try:
     from langgraph.errors import GraphBubbleUp
-    from langgraph.types import Command as LangGraphCommand
 
     CONTROL_FLOW_EXCEPTION_TYPES.add(GraphBubbleUp)
+except ImportError:
+    pass
+
+try:
+    from langgraph.types import Command as LangGraphCommand
+
     LANGGRAPH_COMMAND_TYPE = LangGraphCommand
 except ImportError:
     pass
@@ -140,7 +147,9 @@ def __init__(
         self._trace_context = trace_context
         # LangGraph resumes as a fresh root callback run after interrupting, so we keep
         # pending resume contexts keyed by thread/session instead of a single shared slot.
-        self._resume_trace_context_by_key: Dict[str, TraceContext] = {}
+        self._resume_trace_context_by_key: OrderedDict[str, TraceContext] = (
+            OrderedDict()
+        )
         self._root_run_resume_key_map: Dict[UUID, str] = {}
         self._child_to_parent_run_id_map: Dict[UUID, Optional[UUID]] = {}
 
@@ -190,10 +199,21 @@ def _pop_root_run_resume_key(self, run_id: UUID) -> Optional[str]:
         return self._root_run_resume_key_map.pop(run_id, None)
 
     def _is_langgraph_resume(self, inputs: Any) -> bool:
-        return LANGGRAPH_COMMAND_TYPE is not None and isinstance(
-            inputs, LANGGRAPH_COMMAND_TYPE
+        return (
+            LANGGRAPH_COMMAND_TYPE is not None
+            and isinstance(inputs, LANGGRAPH_COMMAND_TYPE)
+            and getattr(inputs, "resume", None) is not None
         )
 
+    def _store_resume_trace_context(
+        self, *, resume_key: str, trace_context: TraceContext
+    ) -> None:
+        self._resume_trace_context_by_key[resume_key] = trace_context
+        self._resume_trace_context_by_key.move_to_end(resume_key)
+
+        if len(self._resume_trace_context_by_key) > MAX_PENDING_RESUME_TRACE_CONTEXTS:
+            self._resume_trace_context_by_key.popitem(last=False)
+
     def _take_root_trace_context(
         self, *, inputs: Any, metadata: Optional[Dict[str, Any]]
     ) -> tuple[Optional[str], Optional[TraceContext]]:
@@ -228,7 +248,9 @@ def _restore_root_trace_context(
 
         # Span creation failed after we consumed the pending linkage, so put it
         # back and let the next retry resume the interrupted trace correctly.
-        self._resume_trace_context_by_key.setdefault(resume_key, trace_context)
+        self._store_resume_trace_context(
+            resume_key=resume_key, trace_context=trace_context
+        )
 
     def _clear_root_run_resume_key(self, run_id: UUID) -> None:
         # Keep the pending interrupt context until an explicit Command(resume=...)
@@ -243,10 +265,13 @@ def _persist_resume_trace_context(self, *, run_id: UUID, observation: Any) -> No
         if resume_key is None:
             return
 
-        self._resume_trace_context_by_key[resume_key] = {
-            "trace_id": observation.trace_id,
-            "parent_span_id": observation.id,
-        }
+        self._store_resume_trace_context(
+            resume_key=resume_key,
+            trace_context={
+                "trace_id": observation.trace_id,
+                "parent_span_id": observation.id,
+            },
+        )
 
     def _get_error_level_and_status_message(
         self, error: BaseException
@@ -534,8 +559,8 @@ def on_chain_start(
                     resume_key=resume_key, trace_context=trace_context
                 )
                 if parent_run_id is None:
-                    self._clear_root_run_resume_key(run_id)
                     self._exit_propagation_context()
+                    self._reset(run_id)
             langfuse_logger.exception(e)
 
     def _register_langfuse_prompt(
diff --git a/tests/unit/test_langchain.py b/tests/unit/test_langchain.py
@@ -494,6 +494,7 @@ class DummyControlFlowError(RuntimeError):
 
         assert "thread-1" in handler._resume_trace_context_by_key
         assert failed_resume_run_id not in handler._root_run_resume_key_map
+        assert failed_resume_run_id not in handler._child_to_parent_run_id_map
         assert handler._propagation_context_manager is None
 
         handler.on_chain_start(
@@ -525,6 +526,92 @@ class DummyControlFlowError(RuntimeError):
         otel_context.detach(context_token)
 
 
+def test_control_flow_resume_ignores_non_resume_commands(
+    memory_exporter, langfuse_memory_client, monkeypatch
+):
+    class DummyControlFlowError(RuntimeError):
+        pass
+
+    Command = pytest.importorskip("langgraph.types").Command
+
+    context_token = otel_context.attach(otel_context.Context())
+    monkeypatch.setattr(
+        callback_handler_module,
+        "CONTROL_FLOW_EXCEPTION_TYPES",
+        {DummyControlFlowError},
+    )
+
+    try:
+        handler = CallbackHandler()
+
+        interrupt_run_id = uuid4()
+        goto_run_id = uuid4()
+        resume_run_id = uuid4()
+
+        handler.on_chain_start(
+            {"name": "LangGraph"},
+            {"messages": ["need approval"]},
+            run_id=interrupt_run_id,
+            metadata={"thread_id": "thread-1"},
+        )
+        handler.on_chain_error(
+            DummyControlFlowError("graph interrupt"),
+            run_id=interrupt_run_id,
+        )
+
+        handler.on_chain_start(
+            {"name": "LangGraph"},
+            Command(goto="approval_node"),
+            run_id=goto_run_id,
+            metadata={"thread_id": "thread-1"},
+        )
+        handler.on_chain_end(
+            {"messages": ["routed"]},
+            run_id=goto_run_id,
+        )
+
+        assert "thread-1" in handler._resume_trace_context_by_key
+
+        handler.on_chain_start(
+            {"name": "LangGraph"},
+            Command(resume={"approved": True}),
+            run_id=resume_run_id,
+            metadata={"thread_id": "thread-1"},
+        )
+        handler.on_chain_end(
+            {"messages": ["approved"]},
+            run_id=resume_run_id,
+        )
+
+        handler._langfuse_client.flush()
+
+        root_spans = [
+            span
+            for span in memory_exporter.get_finished_spans()
+            if span.name == "LangGraph"
+        ]
+
+        assert len(root_spans) == 3
+
+        spans_by_trace_id = {}
+        for span in root_spans:
+            spans_by_trace_id.setdefault(span.context.trace_id, []).append(span)
+
+        assert sorted(len(spans) for spans in spans_by_trace_id.values()) == [1, 2]
+
+        resumed_trace_spans = next(
+            spans for spans in spans_by_trace_id.values() if len(spans) == 2
+        )
+        initial_span = next(span for span in resumed_trace_spans if span.parent is None)
+        resumed_span = next(
+            span for span in resumed_trace_spans if span.parent is not None
+        )
+
+        assert resumed_span.parent.span_id == initial_span.context.span_id
+    finally:
+        otel_context.detach(context_token)
+
+
 def test_root_reset_preserves_other_inflight_resume_keys(
     memory_exporter, langfuse_memory_client, monkeypatch
 ):
@@ -681,3 +768,66 @@ class DummyControlFlowError(RuntimeError):
     assert "retriever-thread" in handler._resume_trace_context_by_key
     assert retriever_run_id not in handler._root_run_resume_key_map
     assert retriever_run_id not in handler._child_to_parent_run_id_map
+
+
+def test_pending_resume_contexts_are_capped(langfuse_memory_client, monkeypatch):
+    class DummyControlFlowError(RuntimeError):
+        pass
+
+    monkeypatch.setattr(
+        callback_handler_module,
+        "CONTROL_FLOW_EXCEPTION_TYPES",
+        {DummyControlFlowError},
+    )
+    monkeypatch.setattr(
+        callback_handler_module,
+        "MAX_PENDING_RESUME_TRACE_CONTEXTS",
+        4,
+    )
+
+    handler = CallbackHandler()
+
+    for index in range(5):
+        run_id = uuid4()
+        thread_id = f"thread-{index}"
+
+        handler.on_chain_start(
+            {"name": "LangGraph"},
+            {"messages": ["need approval"]},
+            run_id=run_id,
+            metadata={"thread_id": thread_id},
+        )
+        handler.on_chain_error(
+            DummyControlFlowError(f"graph interrupt {index}"),
+            run_id=run_id,
+        )
+
+    assert len(handler._resume_trace_context_by_key) == 4
+    assert list(handler._resume_trace_context_by_key) == [
+        "thread-1",
+        "thread-2",
+        "thread-3",
+        "thread-4",
+    ]
+
+
+def test_graphbubbleup_import_is_independent_from_command_import():
+    real_import = __import__
+
+    def import_without_langgraph_command(
+        name, globals=None, locals=None, fromlist=(), level=0
+    ):
+        if name == "langgraph.types":
+            raise ImportError("Command unavailable")
+
+        return real_import(name, globals, locals, fromlist, level)
+
+    with patch("builtins.__import__", side_effect=import_without_langgraph_command):
+        reloaded_module = importlib.reload(callback_handler_module)
+        assert reloaded_module.LANGGRAPH_COMMAND_TYPE is None
+        assert any(
+            exception_type.__name__ == "GraphBubbleUp"
+            for exception_type in reloaded_module.CONTROL_FLOW_EXCEPTION_TYPES
+        )
+
+    importlib.reload(callback_handler_module)