open-telemetry · keith-decker · Feb 19, 2026 · Feb 19, 2026 · Feb 19, 2026 · Feb 19, 2026
@@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## Unreleased
 
+- Add ToolCall span lifecycle support
+  ([https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4356/](#4356))
 - Enrich ToolCall type, breaking change: usage of ToolCall class renamed to ToolCallRequest 
   ([#4218](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4218))
 - Add EmbeddingInvocation span lifecycle support

@@ -17,21 +17,17 @@
 
 This module exposes the `TelemetryHandler` class, which manages the lifecycle of
 GenAI (Generative AI) invocations and emits telemetry data (spans and related attributes).
-It supports starting, stopping, and failing LLM invocations.
+It supports starting, stopping, and failing LLM invocations and tool call executions.
 
 Classes:
     - TelemetryHandler: Manages GenAI invocation lifecycles and emits telemetry.
 
 Functions:
     - get_telemetry_handler: Returns a singleton `TelemetryHandler` instance.
 
-Usage:
+Usage - LLM Invocations:
     handler = get_telemetry_handler()
 
-    # Create an invocation object with your request data
-    # The span and context_token attributes are set by the TelemetryHandler, and
-    # managed by the TelemetryHandler during the lifecycle of the span.
-
     # Use the context manager to manage the lifecycle of an LLM invocation.
     with handler.llm(invocation) as invocation:
         # Populate outputs and any additional attributes
@@ -45,17 +41,24 @@
         provider="my-provider",
         attributes={"custom": "attr"},
     )
-
-    # Start the invocation (opens a span)
     handler.start_llm(invocation)
-
-    # Populate outputs and any additional attributes, then stop (closes the span)
     invocation.output_messages = [...]
-    invocation.attributes.update({"more": "attrs"})
     handler.stop_llm(invocation)
 
-    # Or, in case of error
-    handler.fail_llm(invocation, Error(type="...", message="..."))
+Usage - Tool Call Executions:
+    handler = get_telemetry_handler()
+
+    # Use the context manager to manage the lifecycle of a tool call.
+    tool = ToolCall(name="get_weather", arguments={"location": "Paris"}, id="call_123")
+    with handler.tool_call(tool) as tc:
+        # Execute tool logic
+        tc.tool_result = {"temp": 20, "condition": "sunny"}
+
+    # Or, manage the lifecycle manually
+    tool = ToolCall(name="get_weather", arguments={"location": "Paris"})
+    handler.start(tool)
+    tool.tool_result = {"temp": 20}
+    handler.stop(tool)
 """
 
 from __future__ import annotations
@@ -78,20 +81,25 @@
     get_tracer,
     set_span_in_context,
 )
+from opentelemetry.trace.status import Status, StatusCode
 from opentelemetry.util.genai.metrics import InvocationMetricsRecorder
 from opentelemetry.util.genai.span_utils import (
     _apply_embedding_finish_attributes,
     _apply_error_attributes,
     _apply_llm_finish_attributes,
+    _apply_tool_call_attributes,
+    _finish_tool_call_span,
     _get_embedding_span_name,
     _get_llm_span_name,
+    _get_tool_call_span_name,
     _maybe_emit_llm_event,
 )
 from opentelemetry.util.genai.types import (
     EmbeddingInvocation,
     Error,
     GenAIInvocation,
     LLMInvocation,
+    ToolCall,
 )
 from opentelemetry.util.genai.version import __version__
 
@@ -129,46 +137,46 @@ def __init__(
             schema_url=schema_url,
         )
 
-    def _record_llm_metrics(
+    def _record_metrics(
         self,
-        invocation: LLMInvocation,
+        invocation: GenAIInvocation,
         span: Span | None = None,
         *,
         error_type: str | None = None,
     ) -> None:
+        """Record metrics for an invocation."""
         if self._metrics_recorder is None or span is None:
             return
+        # Only LLMInvocation and ToolCall metrics are currently supported
+        if not isinstance(invocation, (LLMInvocation, ToolCall)):
+            return
         self._metrics_recorder.record(
             span,
             invocation,
             error_type=error_type,
         )
 
-    @staticmethod
-    def _record_embedding_metrics(
-        invocation: EmbeddingInvocation,
-        span: Span | None = None,
-        *,
-        error_type: str | None = None,
-    ) -> None:
-        # Metrics recorder currently supports LLMInvocation fields only.
-        # Keep embedding metrics as a no-op until dedicated embedding
-        # metric support is added.
-        return
-
     def _start(self, invocation: _T) -> _T:
         """Start a GenAI invocation and create a pending span entry."""
+        span_kind = SpanKind.CLIENT
         if isinstance(invocation, LLMInvocation):
             span_name = _get_llm_span_name(invocation)
         elif isinstance(invocation, EmbeddingInvocation):
             span_name = _get_embedding_span_name(invocation)
+        elif isinstance(invocation, ToolCall):
+            span_name = _get_tool_call_span_name(invocation)
+            span_kind = SpanKind.INTERNAL
         else:
             span_name = ""
+
         span = self._tracer.start_span(
             name=span_name,
-            kind=SpanKind.CLIENT,
+            kind=span_kind,
         )
-        # Record a monotonic start timestamp (seconds) for duration
+        if isinstance(invocation, ToolCall):
+            _apply_tool_call_attributes(
+                span, invocation, capture_content=False
+            )
         # calculation using timeit.default_timer.
         invocation.monotonic_start_s = timeit.default_timer()
         invocation.span = span
@@ -187,11 +195,14 @@ def _stop(self, invocation: _T) -> _T:
         try:
             if isinstance(invocation, LLMInvocation):
                 _apply_llm_finish_attributes(span, invocation)
-                self._record_llm_metrics(invocation, span)
+                self._record_metrics(invocation, span)
                 _maybe_emit_llm_event(self._logger, span, invocation)
             elif isinstance(invocation, EmbeddingInvocation):
                 _apply_embedding_finish_attributes(span, invocation)
-                self._record_embedding_metrics(invocation, span)
+                self._record_metrics(invocation, span)
+            elif isinstance(invocation, ToolCall):
+                _finish_tool_call_span(span, invocation, capture_content=True)
+                self._record_metrics(invocation, span)
         finally:
             # Detach context and end span even if finishing fails
             otel_context.detach(invocation.context_token)
@@ -210,18 +221,19 @@ def _fail(self, invocation: _T, error: Error) -> _T:
             if isinstance(invocation, LLMInvocation):
                 _apply_llm_finish_attributes(span, invocation)
                 _apply_error_attributes(span, error, error_type)
-                self._record_llm_metrics(
-                    invocation, span, error_type=error_type
-                )
+                self._record_metrics(invocation, span, error_type=error_type)
                 _maybe_emit_llm_event(
                     self._logger, span, invocation, error_type
                 )
             elif isinstance(invocation, EmbeddingInvocation):
                 _apply_embedding_finish_attributes(span, invocation)
                 _apply_error_attributes(span, error, error_type)
-                self._record_embedding_metrics(
-                    invocation, span, error_type=error_type
-                )
+                self._record_metrics(invocation, span, error_type=error_type)
+            elif isinstance(invocation, ToolCall):
+                invocation.error_type = error_type
+                _finish_tool_call_span(span, invocation, capture_content=True)
+                self._record_metrics(invocation, span, error_type=error_type)
+                span.set_status(Status(StatusCode.ERROR, error.message))
         finally:
             # Detach context and end span even if finishing fails
             otel_context.detach(invocation.context_token)
@@ -258,6 +270,37 @@ def fail_llm(
         """Fail an LLM invocation and end its span with error status."""
         return self._fail(invocation, error)
 
+    @contextmanager
+    def tool_call(
+        self, tool_call: ToolCall | None = None
+    ) -> Iterator[ToolCall]:
+        """Context manager for tool call invocations.
+
+        Only set data attributes on the tool_call object, do not modify the span or context.
+
+        Starts the span on entry. On normal exit, finalizes the tool call and ends the span.
+        If an exception occurs inside the context, marks the span as error, ends it, and
+        re-raises the original exception.
+
+        Example:
+            with handler.tool_call(ToolCall(name="get_weather", arguments={"location": "Paris"})) as tc:
+                # Execute tool logic
+                tc.tool_result = {"temp": 20, "condition": "sunny"}
+        """
+        if tool_call is None:
+            tool_call = ToolCall(
+                name="",
+                arguments={},
+                id=None,
+            )
+        self.start(tool_call)
+        try:
+            yield tool_call
+        except Exception as exc:
+            self.fail(tool_call, Error(message=str(exc), type=type(exc)))
+            raise
+        self.stop(tool_call)
+
     @contextmanager
     def llm(
         self, invocation: LLMInvocation | None = None

@@ -1,9 +1,9 @@
-"""Helpers for emitting GenAI metrics from LLM invocations."""
+"""Helpers for emitting GenAI metrics from invocations."""
 
 from __future__ import annotations
 
 import timeit
-from typing import Dict, Optional
+from typing import Dict, Optional, Union
 
 from opentelemetry.metrics import Histogram, Meter
 from opentelemetry.semconv._incubating.attributes import (
@@ -18,7 +18,7 @@
     create_duration_histogram,
     create_token_histogram,
 )
-from opentelemetry.util.genai.types import LLMInvocation
+from opentelemetry.util.genai.types import LLMInvocation, ToolCall
 from opentelemetry.util.types import AttributeValue
 
 
@@ -32,44 +32,41 @@ def __init__(self, meter: Meter):
     def record(
         self,
         span: Optional[Span],
-        invocation: LLMInvocation,
+        invocation: Union[LLMInvocation, ToolCall],
         *,
         error_type: Optional[str] = None,
     ) -> None:
-        """Record duration and token metrics for an invocation if possible."""
+        """Record duration and token metrics for an invocation.
 
+        Supports LLMInvocation (with token metrics) and ToolCall (duration only).
+        """
         # pylint: disable=too-many-branches
 
         if span is None:
             return
 
-        token_counts: list[tuple[int, str]] = []
-        if invocation.input_tokens is not None:
-            token_counts.append(
-                (
-                    invocation.input_tokens,
-                    GenAI.GenAiTokenTypeValues.INPUT.value,
-                )
+        # Build attributes based on invocation type
+        attributes: Dict[str, AttributeValue] = {}
+
+        if isinstance(invocation, LLMInvocation):
+            attributes[GenAI.GEN_AI_OPERATION_NAME] = (
+                GenAI.GenAiOperationNameValues.CHAT.value
             )
-        if invocation.output_tokens is not None:
-            token_counts.append(
-                (
-                    invocation.output_tokens,
-                    GenAI.GenAiTokenTypeValues.OUTPUT.value,
+            if invocation.request_model:
+                attributes[GenAI.GEN_AI_REQUEST_MODEL] = (
+                    invocation.request_model
                 )
-            )
+            if invocation.response_model_name:
+                attributes[GenAI.GEN_AI_RESPONSE_MODEL] = (
+                    invocation.response_model_name
+                )
+        else:
+            # ToolCall
+            attributes[GenAI.GEN_AI_OPERATION_NAME] = "execute_tool"
 
-        attributes: Dict[str, AttributeValue] = {
-            GenAI.GEN_AI_OPERATION_NAME: GenAI.GenAiOperationNameValues.CHAT.value
-        }
-        if invocation.request_model:
-            attributes[GenAI.GEN_AI_REQUEST_MODEL] = invocation.request_model
+        # Common attributes across invocation types
         if invocation.provider:
             attributes[GenAI.GEN_AI_PROVIDER_NAME] = invocation.provider
-        if invocation.response_model_name:
-            attributes[GenAI.GEN_AI_RESPONSE_MODEL] = (
-                invocation.response_model_name
-            )
         if invocation.server_address:
             attributes[server_attributes.SERVER_ADDRESS] = (
                 invocation.server_address
@@ -79,7 +76,7 @@ def record(
         if invocation.metric_attributes:
             attributes.update(invocation.metric_attributes)
 
-        # Calculate duration from span timing or invocation monotonic start
+        # Calculate duration from monotonic start time
         duration_seconds: Optional[float] = None
         if invocation.monotonic_start_s is not None:
             duration_seconds = max(
@@ -98,12 +95,31 @@ def record(
                 context=span_context,
             )
 
-        for token_count, token_type in token_counts:
-            self._token_histogram.record(
-                token_count,
-                attributes=attributes | {GenAI.GEN_AI_TOKEN_TYPE: token_type},
-                context=span_context,
-            )
+        # Token metrics only for LLMInvocation
+        if isinstance(invocation, LLMInvocation):
+            token_counts: list[tuple[int, str]] = []
+            if invocation.input_tokens is not None:
+                token_counts.append(
+                    (
+                        invocation.input_tokens,
+                        GenAI.GenAiTokenTypeValues.INPUT.value,
+                    )
+                )
+            if invocation.output_tokens is not None:
+                token_counts.append(
+                    (
+                        invocation.output_tokens,
+                        GenAI.GenAiTokenTypeValues.OUTPUT.value,
+                    )
+                )
+
+            for token_count, token_type in token_counts:
+                self._token_histogram.record(
+                    token_count,
+                    attributes=attributes
+                    | {GenAI.GEN_AI_TOKEN_TYPE: token_type},
+                    context=span_context,
+                )
 
 
 __all__ = ["InvocationMetricsRecorder"]