Skip to content

Commit 3b05b19

Browse files
committed
feat: add span handler for toolcall
1 parent e7f07f6 commit 3b05b19

6 files changed

Lines changed: 564 additions & 16 deletions

File tree

util/opentelemetry-util-genai/src/opentelemetry/util/genai/handler.py

Lines changed: 153 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -17,21 +17,17 @@
1717
1818
This module exposes the `TelemetryHandler` class, which manages the lifecycle of
1919
GenAI (Generative AI) invocations and emits telemetry data (spans and related attributes).
20-
It supports starting, stopping, and failing LLM invocations.
20+
It supports starting, stopping, and failing LLM invocations and tool call executions.
2121
2222
Classes:
2323
- TelemetryHandler: Manages GenAI invocation lifecycles and emits telemetry.
2424
2525
Functions:
2626
- get_telemetry_handler: Returns a singleton `TelemetryHandler` instance.
2727
28-
Usage:
28+
Usage - LLM Invocations:
2929
handler = get_telemetry_handler()
3030
31-
# Create an invocation object with your request data
32-
# The span and context_token attributes are set by the TelemetryHandler, and
33-
# managed by the TelemetryHandler during the lifecycle of the span.
34-
3531
# Use the context manager to manage the lifecycle of an LLM invocation.
3632
with handler.llm(invocation) as invocation:
3733
# Populate outputs and any additional attributes
@@ -45,17 +41,24 @@
4541
provider="my-provider",
4642
attributes={"custom": "attr"},
4743
)
48-
49-
# Start the invocation (opens a span)
5044
handler.start_llm(invocation)
51-
52-
# Populate outputs and any additional attributes, then stop (closes the span)
5345
invocation.output_messages = [...]
54-
invocation.attributes.update({"more": "attrs"})
5546
handler.stop_llm(invocation)
5647
57-
# Or, in case of error
58-
handler.fail_llm(invocation, Error(type="...", message="..."))
48+
Usage - Tool Call Executions:
49+
handler = get_telemetry_handler()
50+
51+
# Use the context manager to manage the lifecycle of a tool call.
52+
tool = ToolCall(name="get_weather", arguments={"location": "Paris"}, id="call_123")
53+
with handler.tool_call(tool) as tc:
54+
# Execute tool logic
55+
tc.tool_result = {"temp": 20, "condition": "sunny"}
56+
57+
# Or, manage the lifecycle manually
58+
tool = ToolCall(name="get_weather", arguments={"location": "Paris"})
59+
handler.start_tool_call(tool)
60+
tool.tool_result = {"temp": 20}
61+
handler.stop_tool_call(tool)
5962
"""
6063

6164
from __future__ import annotations
@@ -78,13 +81,17 @@
7881
get_tracer,
7982
set_span_in_context,
8083
)
84+
from opentelemetry.trace.status import Status, StatusCode
8185
from opentelemetry.util.genai.metrics import InvocationMetricsRecorder
8286
from opentelemetry.util.genai.span_utils import (
8387
_apply_error_attributes,
8488
_apply_llm_finish_attributes,
89+
_apply_tool_call_attributes,
90+
_finish_tool_call_span,
91+
_get_tool_call_span_name,
8592
_maybe_emit_llm_event,
8693
)
87-
from opentelemetry.util.genai.types import Error, LLMInvocation
94+
from opentelemetry.util.genai.types import Error, LLMInvocation, ToolCall
8895
from opentelemetry.util.genai.version import __version__
8996

9097

@@ -187,6 +194,138 @@ def fail_llm( # pylint: disable=no-self-use
187194
span.end()
188195
return invocation
189196

197+
def start_tool_call(
198+
self,
199+
tool_call: ToolCall,
200+
) -> ToolCall:
201+
"""Start a tool call execution and create a span.
202+
203+
Creates an execute_tool span per span.gen_ai.execute_tool.internal spec:
204+
- Span kind: INTERNAL
205+
- Span name: "execute_tool {tool_name}"
206+
- Required attribute: gen_ai.operation.name = "execute_tool"
207+
208+
Args:
209+
tool_call: ToolCall instance to track
210+
211+
Returns:
212+
The same ToolCall with span and context_token set
213+
"""
214+
# Create span with INTERNAL kind per spec
215+
span = self._tracer.start_span(
216+
name=_get_tool_call_span_name(tool_call),
217+
kind=SpanKind.INTERNAL,
218+
)
219+
220+
# Apply initial attributes (but not result yet)
221+
# capture_content=False for start, only structure attributes
222+
_apply_tool_call_attributes(span, tool_call, capture_content=False)
223+
224+
# Record monotonic start time for duration calculation
225+
tool_call.monotonic_start_s = timeit.default_timer()
226+
227+
# Attach to context
228+
tool_call.span = span
229+
tool_call.context_token = otel_context.attach(
230+
set_span_in_context(span)
231+
)
232+
233+
return tool_call
234+
235+
def stop_tool_call(self, tool_call: ToolCall) -> ToolCall: # pylint: disable=no-self-use
236+
"""Finalize a tool call execution successfully.
237+
238+
Applies final attributes including tool_result, sets OK status, and ends span.
239+
240+
Args:
241+
tool_call: ToolCall instance with span to finalize
242+
243+
Returns:
244+
The same ToolCall
245+
"""
246+
if tool_call.context_token is None or tool_call.span is None:
247+
# TODO: Provide feedback that this invocation was not started
248+
return tool_call
249+
250+
span = tool_call.span
251+
252+
# Finalize span with result (capture_content=True allows result if mode permits)
253+
_finish_tool_call_span(span, tool_call, capture_content=True)
254+
255+
# Detach context and end span
256+
otel_context.detach(tool_call.context_token)
257+
span.end()
258+
259+
return tool_call
260+
261+
def fail_tool_call( # pylint: disable=no-self-use
262+
self, tool_call: ToolCall, error: Error
263+
) -> ToolCall:
264+
"""Fail a tool call execution with error.
265+
266+
Sets error attributes, ERROR status, and ends span.
267+
268+
Args:
269+
tool_call: ToolCall instance with span to fail
270+
error: Error details
271+
272+
Returns:
273+
The same ToolCall
274+
"""
275+
if tool_call.context_token is None or tool_call.span is None:
276+
# TODO: Provide feedback that this invocation was not started
277+
return tool_call
278+
279+
span = tool_call.span
280+
281+
# Set error_type on tool_call so it's included in attributes
282+
tool_call.error_type = error.type.__qualname__
283+
284+
# Finalize span with error
285+
_finish_tool_call_span(span, tool_call, capture_content=True)
286+
287+
# Apply additional error status with message
288+
span.set_status(Status(StatusCode.ERROR, error.message))
289+
290+
# Detach context and end span
291+
otel_context.detach(tool_call.context_token)
292+
span.end()
293+
294+
return tool_call
295+
296+
@contextmanager
297+
def tool_call(
298+
self, tool_call: ToolCall | None = None
299+
) -> Iterator[ToolCall]:
300+
"""Context manager for tool call invocations.
301+
302+
Only set data attributes on the tool_call object, do not modify the span or context.
303+
304+
Starts the span on entry. On normal exit, finalizes the tool call and ends the span.
305+
If an exception occurs inside the context, marks the span as error, ends it, and
306+
re-raises the original exception.
307+
308+
Example:
309+
with handler.tool_call(ToolCall(name="get_weather", arguments={"location": "Paris"})) as tc:
310+
# Execute tool logic
311+
tc.tool_result = {"temp": 20, "condition": "sunny"}
312+
"""
313+
if tool_call is None:
314+
tool_call = ToolCall(
315+
name="",
316+
arguments={},
317+
id=None,
318+
)
319+
self.start_tool_call(tool_call)
320+
try:
321+
yield tool_call
322+
except Exception as exc:
323+
self.fail_tool_call(
324+
tool_call, Error(message=str(exc), type=type(exc))
325+
)
326+
raise
327+
self.stop_tool_call(tool_call)
328+
190329
@contextmanager
191330
def llm(
192331
self, invocation: LLMInvocation | None = None

util/opentelemetry-util-genai/src/opentelemetry/util/genai/span_utils.py

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
LLMInvocation,
3838
MessagePart,
3939
OutputMessage,
40+
ToolCall,
4041
)
4142
from opentelemetry.util.genai.utils import (
4243
ContentCapturingMode,
@@ -279,6 +280,106 @@ def _get_llm_response_attributes(
279280
return {key: value for key, value in optional_attrs if value is not None}
280281

281282

283+
def _get_tool_call_span_name(tool_call: ToolCall) -> str:
284+
"""Get span name for tool call execution per semantic convention.
285+
286+
Format: "execute_tool {gen_ai.tool.name}"
287+
"""
288+
return f"execute_tool {tool_call.name}".strip()
289+
290+
291+
def _apply_tool_call_attributes(
292+
span: Span,
293+
tool_call: ToolCall,
294+
capture_content: bool = False,
295+
) -> None:
296+
"""Apply semantic convention attributes from ToolCall to span.
297+
298+
Follows span.gen_ai.execute_tool.internal specification from:
299+
https://github.com/open-telemetry/semantic-conventions/blob/main/docs/gen-ai/gen-ai-spans.md#execute-tool-span
300+
301+
Required attributes:
302+
- gen_ai.operation.name = "execute_tool"
303+
304+
Recommended attributes (if available):
305+
- gen_ai.tool.name
306+
- gen_ai.tool.call.id
307+
- gen_ai.tool.type
308+
- gen_ai.tool.description
309+
310+
Opt-In attributes (only if capture_content=True and experimental mode):
311+
- gen_ai.tool.call.arguments (sensitive data)
312+
- gen_ai.tool.call.result (sensitive data)
313+
314+
Conditionally required:
315+
- error.type (if operation ended in error)
316+
"""
317+
# Set REQUIRED attribute
318+
span.set_attribute(GenAI.GEN_AI_OPERATION_NAME, "execute_tool")
319+
320+
# Set RECOMMENDED attributes (if present)
321+
if tool_call.name:
322+
span.set_attribute(GenAI.GEN_AI_TOOL_NAME, tool_call.name)
323+
324+
if tool_call.id:
325+
span.set_attribute(GenAI.GEN_AI_TOOL_CALL_ID, tool_call.id)
326+
327+
if tool_call.tool_type:
328+
span.set_attribute(GenAI.GEN_AI_TOOL_TYPE, tool_call.tool_type)
329+
330+
if tool_call.tool_description:
331+
span.set_attribute(
332+
GenAI.GEN_AI_TOOL_DESCRIPTION, tool_call.tool_description
333+
)
334+
335+
# Set OPT-IN attributes (only if capture_content enabled)
336+
if capture_content and is_experimental_mode():
337+
content_mode = get_content_capturing_mode()
338+
if content_mode in (
339+
ContentCapturingMode.SPAN_ONLY,
340+
ContentCapturingMode.SPAN_AND_EVENT,
341+
):
342+
if tool_call.arguments is not None:
343+
# Serialize to JSON string per spec
344+
span.set_attribute(
345+
GenAI.GEN_AI_TOOL_CALL_ARGUMENTS,
346+
gen_ai_json_dumps(tool_call.arguments),
347+
)
348+
349+
if tool_call.tool_result is not None:
350+
span.set_attribute(
351+
GenAI.GEN_AI_TOOL_CALL_RESULT,
352+
gen_ai_json_dumps(tool_call.tool_result),
353+
)
354+
355+
# Set CONDITIONALLY REQUIRED attributes
356+
if tool_call.error_type:
357+
span.set_attribute(error_attributes.ERROR_TYPE, tool_call.error_type)
358+
span.set_status(Status(StatusCode.ERROR))
359+
360+
361+
def _finish_tool_call_span(
362+
span: Span,
363+
tool_call: ToolCall,
364+
capture_content: bool = False,
365+
) -> None:
366+
"""Finalize tool call span with result or error.
367+
368+
Sets span name, applies final attributes, and sets status.
369+
"""
370+
# Update span name with actual tool name
371+
span.update_name(_get_tool_call_span_name(tool_call))
372+
373+
# Apply all attributes including result if available
374+
_apply_tool_call_attributes(span, tool_call, capture_content)
375+
376+
# Set status based on error presence
377+
if tool_call.error_type:
378+
span.set_status(Status(StatusCode.ERROR))
379+
else:
380+
span.set_status(Status(StatusCode.OK))
381+
382+
282383
__all__ = [
283384
"_apply_llm_finish_attributes",
284385
"_apply_error_attributes",
@@ -287,4 +388,7 @@ def _get_llm_response_attributes(
287388
"_get_llm_response_attributes",
288389
"_get_llm_span_name",
289390
"_maybe_emit_llm_event",
391+
"_apply_tool_call_attributes",
392+
"_finish_tool_call_span",
393+
"_get_tool_call_span_name",
290394
]

util/opentelemetry-util-genai/src/opentelemetry/util/genai/types.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,45 @@ class ToolCallRequest:
6969
type: Literal["tool_call"] = "tool_call"
7070

7171

72+
@dataclass()
73+
class ToolCall(ToolCallRequest):
74+
"""Represents a tool call for execution tracking with spans and metrics.
75+
76+
This type extends ToolCallRequest with additional fields for tracking tool execution
77+
per the execute_tool span semantic conventions.
78+
79+
Reference: https://github.com/open-telemetry/semantic-conventions/blob/main/docs/gen-ai/gen-ai-spans.md#execute-tool-span
80+
81+
For simple message parts (tool calls requested by the model), consider using
82+
ToolCallRequest instead to avoid unnecessary execution-tracking fields.
83+
84+
Semantic convention attributes for execute_tool spans:
85+
- gen_ai.operation.name: "execute_tool" (Required)
86+
- gen_ai.tool.name: Name of the tool (Recommended)
87+
- gen_ai.tool.call.id: Tool call identifier (Recommended if available)
88+
- gen_ai.tool.type: Type classification - "function", "extension", or "datastore" (Recommended if available)
89+
- gen_ai.tool.description: Tool description (Recommended if available)
90+
- gen_ai.tool.call.arguments: Parameters passed to tool (Opt-In, may contain sensitive data)
91+
- gen_ai.tool.call.result: Result returned by tool (Opt-In, may contain sensitive data)
92+
- error.type: Error type if operation failed (Conditionally Required)
93+
"""
94+
95+
# Execution-only fields (used for execute_tool spans):
96+
# gen_ai.tool.type - Tool type: "function", "extension", or "datastore"
97+
tool_type: str | None = None
98+
# gen_ai.tool.description - Description of what the tool does
99+
tool_description: str | None = None
100+
# gen_ai.tool.call.result - Result returned by the tool (Opt-In, may contain sensitive data)
101+
tool_result: Any = None
102+
# error.type - Error type if the tool call failed
103+
error_type: str | None = None
104+
105+
# Lifecycle tracking fields (used by TelemetryHandler):
106+
context_token: ContextToken | None = None
107+
span: Span | None = None
108+
monotonic_start_s: float | None = None
109+
110+
72111
@dataclass()
73112
class ToolCallResponse:
74113
"""Represents a tool call result sent to the model or a built-in tool call outcome and details

0 commit comments

Comments
 (0)