Skip to content

Commit e1bc9a2

Browse files
committed
feat: add span handler for toolcall
1 parent 01ab8ae commit e1bc9a2

6 files changed

Lines changed: 564 additions & 15 deletions

File tree

util/opentelemetry-util-genai/src/opentelemetry/util/genai/handler.py

Lines changed: 153 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -17,21 +17,17 @@
1717
1818
This module exposes the `TelemetryHandler` class, which manages the lifecycle of
1919
GenAI (Generative AI) invocations and emits telemetry data (spans and related attributes).
20-
It supports starting, stopping, and failing LLM invocations.
20+
It supports starting, stopping, and failing LLM invocations and tool call executions.
2121
2222
Classes:
2323
- TelemetryHandler: Manages GenAI invocation lifecycles and emits telemetry.
2424
2525
Functions:
2626
- get_telemetry_handler: Returns a singleton `TelemetryHandler` instance.
2727
28-
Usage:
28+
Usage - LLM Invocations:
2929
handler = get_telemetry_handler()
3030
31-
# Create an invocation object with your request data
32-
# The span and context_token attributes are set by the TelemetryHandler, and
33-
# managed by the TelemetryHandler during the lifecycle of the span.
34-
3531
# Use the context manager to manage the lifecycle of an LLM invocation.
3632
with handler.llm(invocation) as invocation:
3733
# Populate outputs and any additional attributes
@@ -45,17 +41,24 @@
4541
provider="my-provider",
4642
attributes={"custom": "attr"},
4743
)
48-
49-
# Start the invocation (opens a span)
5044
handler.start_llm(invocation)
51-
52-
# Populate outputs and any additional attributes, then stop (closes the span)
5345
invocation.output_messages = [...]
54-
invocation.attributes.update({"more": "attrs"})
5546
handler.stop_llm(invocation)
5647
57-
# Or, in case of error
58-
handler.fail_llm(invocation, Error(type="...", message="..."))
48+
Usage - Tool Call Executions:
49+
handler = get_telemetry_handler()
50+
51+
# Use the context manager to manage the lifecycle of a tool call.
52+
tool = ToolCall(name="get_weather", arguments={"location": "Paris"}, id="call_123")
53+
with handler.tool_call(tool) as tc:
54+
# Execute tool logic
55+
tc.tool_result = {"temp": 20, "condition": "sunny"}
56+
57+
# Or, manage the lifecycle manually
58+
tool = ToolCall(name="get_weather", arguments={"location": "Paris"})
59+
handler.start_tool_call(tool)
60+
tool.tool_result = {"temp": 20}
61+
handler.stop_tool_call(tool)
5962
"""
6063

6164
from __future__ import annotations
@@ -78,20 +81,25 @@
7881
get_tracer,
7982
set_span_in_context,
8083
)
84+
from opentelemetry.trace.status import Status, StatusCode
8185
from opentelemetry.util.genai.metrics import InvocationMetricsRecorder
8286
from opentelemetry.util.genai.span_utils import (
8387
_apply_embedding_finish_attributes,
8488
_apply_error_attributes,
8589
_apply_llm_finish_attributes,
8690
_get_embedding_span_name,
8791
_get_llm_span_name,
92+
_apply_tool_call_attributes,
93+
_finish_tool_call_span,
94+
_get_tool_call_span_name,
8895
_maybe_emit_llm_event,
8996
)
9097
from opentelemetry.util.genai.types import (
9198
EmbeddingInvocation,
9299
Error,
93100
GenAIInvocation,
94101
LLMInvocation,
102+
ToolCall,
95103
)
96104
from opentelemetry.util.genai.version import __version__
97105

@@ -258,6 +266,138 @@ def fail_llm(
258266
"""Fail an LLM invocation and end its span with error status."""
259267
return self._fail(invocation, error)
260268

269+
def start_tool_call(
270+
self,
271+
tool_call: ToolCall,
272+
) -> ToolCall:
273+
"""Start a tool call execution and create a span.
274+
275+
Creates an execute_tool span per span.gen_ai.execute_tool.internal spec:
276+
- Span kind: INTERNAL
277+
- Span name: "execute_tool {tool_name}"
278+
- Required attribute: gen_ai.operation.name = "execute_tool"
279+
280+
Args:
281+
tool_call: ToolCall instance to track
282+
283+
Returns:
284+
The same ToolCall with span and context_token set
285+
"""
286+
# Create span with INTERNAL kind per spec
287+
span = self._tracer.start_span(
288+
name=_get_tool_call_span_name(tool_call),
289+
kind=SpanKind.INTERNAL,
290+
)
291+
292+
# Apply initial attributes (but not result yet)
293+
# capture_content=False for start, only structure attributes
294+
_apply_tool_call_attributes(span, tool_call, capture_content=False)
295+
296+
# Record monotonic start time for duration calculation
297+
tool_call.monotonic_start_s = timeit.default_timer()
298+
299+
# Attach to context
300+
tool_call.span = span
301+
tool_call.context_token = otel_context.attach(
302+
set_span_in_context(span)
303+
)
304+
305+
return tool_call
306+
307+
def stop_tool_call(self, tool_call: ToolCall) -> ToolCall: # pylint: disable=no-self-use
308+
"""Finalize a tool call execution successfully.
309+
310+
Applies final attributes including tool_result, sets OK status, and ends span.
311+
312+
Args:
313+
tool_call: ToolCall instance with span to finalize
314+
315+
Returns:
316+
The same ToolCall
317+
"""
318+
if tool_call.context_token is None or tool_call.span is None:
319+
# TODO: Provide feedback that this invocation was not started
320+
return tool_call
321+
322+
span = tool_call.span
323+
324+
# Finalize span with result (capture_content=True allows result if mode permits)
325+
_finish_tool_call_span(span, tool_call, capture_content=True)
326+
327+
# Detach context and end span
328+
otel_context.detach(tool_call.context_token)
329+
span.end()
330+
331+
return tool_call
332+
333+
def fail_tool_call( # pylint: disable=no-self-use
334+
self, tool_call: ToolCall, error: Error
335+
) -> ToolCall:
336+
"""Fail a tool call execution with error.
337+
338+
Sets error attributes, ERROR status, and ends span.
339+
340+
Args:
341+
tool_call: ToolCall instance with span to fail
342+
error: Error details
343+
344+
Returns:
345+
The same ToolCall
346+
"""
347+
if tool_call.context_token is None or tool_call.span is None:
348+
# TODO: Provide feedback that this invocation was not started
349+
return tool_call
350+
351+
span = tool_call.span
352+
353+
# Set error_type on tool_call so it's included in attributes
354+
tool_call.error_type = error.type.__qualname__
355+
356+
# Finalize span with error
357+
_finish_tool_call_span(span, tool_call, capture_content=True)
358+
359+
# Apply additional error status with message
360+
span.set_status(Status(StatusCode.ERROR, error.message))
361+
362+
# Detach context and end span
363+
otel_context.detach(tool_call.context_token)
364+
span.end()
365+
366+
return tool_call
367+
368+
@contextmanager
369+
def tool_call(
370+
self, tool_call: ToolCall | None = None
371+
) -> Iterator[ToolCall]:
372+
"""Context manager for tool call invocations.
373+
374+
Only set data attributes on the tool_call object, do not modify the span or context.
375+
376+
Starts the span on entry. On normal exit, finalizes the tool call and ends the span.
377+
If an exception occurs inside the context, marks the span as error, ends it, and
378+
re-raises the original exception.
379+
380+
Example:
381+
with handler.tool_call(ToolCall(name="get_weather", arguments={"location": "Paris"})) as tc:
382+
# Execute tool logic
383+
tc.tool_result = {"temp": 20, "condition": "sunny"}
384+
"""
385+
if tool_call is None:
386+
tool_call = ToolCall(
387+
name="",
388+
arguments={},
389+
id=None,
390+
)
391+
self.start_tool_call(tool_call)
392+
try:
393+
yield tool_call
394+
except Exception as exc:
395+
self.fail_tool_call(
396+
tool_call, Error(message=str(exc), type=type(exc))
397+
)
398+
raise
399+
self.stop_tool_call(tool_call)
400+
261401
@contextmanager
262402
def llm(
263403
self, invocation: LLMInvocation | None = None

util/opentelemetry-util-genai/src/opentelemetry/util/genai/span_utils.py

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
LLMInvocation,
4040
MessagePart,
4141
OutputMessage,
42+
ToolCall,
4243
)
4344
from opentelemetry.util.genai.utils import (
4445
ContentCapturingMode,
@@ -357,6 +358,106 @@ def _get_embedding_response_attributes(
357358
return {key: value for key, value in optional_attrs if value is not None}
358359

359360

361+
def _get_tool_call_span_name(tool_call: ToolCall) -> str:
362+
"""Get span name for tool call execution per semantic convention.
363+
364+
Format: "execute_tool {gen_ai.tool.name}"
365+
"""
366+
return f"execute_tool {tool_call.name}".strip()
367+
368+
369+
def _apply_tool_call_attributes(
370+
span: Span,
371+
tool_call: ToolCall,
372+
capture_content: bool = False,
373+
) -> None:
374+
"""Apply semantic convention attributes from ToolCall to span.
375+
376+
Follows span.gen_ai.execute_tool.internal specification from:
377+
https://github.com/open-telemetry/semantic-conventions/blob/main/docs/gen-ai/gen-ai-spans.md#execute-tool-span
378+
379+
Required attributes:
380+
- gen_ai.operation.name = "execute_tool"
381+
382+
Recommended attributes (if available):
383+
- gen_ai.tool.name
384+
- gen_ai.tool.call.id
385+
- gen_ai.tool.type
386+
- gen_ai.tool.description
387+
388+
Opt-In attributes (only if capture_content=True and experimental mode):
389+
- gen_ai.tool.call.arguments (sensitive data)
390+
- gen_ai.tool.call.result (sensitive data)
391+
392+
Conditionally required:
393+
- error.type (if operation ended in error)
394+
"""
395+
# Set REQUIRED attribute
396+
span.set_attribute(GenAI.GEN_AI_OPERATION_NAME, "execute_tool")
397+
398+
# Set RECOMMENDED attributes (if present)
399+
if tool_call.name:
400+
span.set_attribute(GenAI.GEN_AI_TOOL_NAME, tool_call.name)
401+
402+
if tool_call.id:
403+
span.set_attribute(GenAI.GEN_AI_TOOL_CALL_ID, tool_call.id)
404+
405+
if tool_call.tool_type:
406+
span.set_attribute(GenAI.GEN_AI_TOOL_TYPE, tool_call.tool_type)
407+
408+
if tool_call.tool_description:
409+
span.set_attribute(
410+
GenAI.GEN_AI_TOOL_DESCRIPTION, tool_call.tool_description
411+
)
412+
413+
# Set OPT-IN attributes (only if capture_content enabled)
414+
if capture_content and is_experimental_mode():
415+
content_mode = get_content_capturing_mode()
416+
if content_mode in (
417+
ContentCapturingMode.SPAN_ONLY,
418+
ContentCapturingMode.SPAN_AND_EVENT,
419+
):
420+
if tool_call.arguments is not None:
421+
# Serialize to JSON string per spec
422+
span.set_attribute(
423+
GenAI.GEN_AI_TOOL_CALL_ARGUMENTS,
424+
gen_ai_json_dumps(tool_call.arguments),
425+
)
426+
427+
if tool_call.tool_result is not None:
428+
span.set_attribute(
429+
GenAI.GEN_AI_TOOL_CALL_RESULT,
430+
gen_ai_json_dumps(tool_call.tool_result),
431+
)
432+
433+
# Set CONDITIONALLY REQUIRED attributes
434+
if tool_call.error_type:
435+
span.set_attribute(error_attributes.ERROR_TYPE, tool_call.error_type)
436+
span.set_status(Status(StatusCode.ERROR))
437+
438+
439+
def _finish_tool_call_span(
440+
span: Span,
441+
tool_call: ToolCall,
442+
capture_content: bool = False,
443+
) -> None:
444+
"""Finalize tool call span with result or error.
445+
446+
Sets span name, applies final attributes, and sets status.
447+
"""
448+
# Update span name with actual tool name
449+
span.update_name(_get_tool_call_span_name(tool_call))
450+
451+
# Apply all attributes including result if available
452+
_apply_tool_call_attributes(span, tool_call, capture_content)
453+
454+
# Set status based on error presence
455+
if tool_call.error_type:
456+
span.set_status(Status(StatusCode.ERROR))
457+
else:
458+
span.set_status(Status(StatusCode.OK))
459+
460+
360461
__all__ = [
361462
"_apply_llm_finish_attributes",
362463
"_apply_error_attributes",
@@ -370,4 +471,7 @@ def _get_embedding_response_attributes(
370471
"_get_embedding_request_attributes",
371472
"_get_embedding_response_attributes",
372473
"_get_embedding_span_name",
474+
"_apply_tool_call_attributes",
475+
"_finish_tool_call_span",
476+
"_get_tool_call_span_name",
373477
]

util/opentelemetry-util-genai/src/opentelemetry/util/genai/types.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,45 @@ class ToolCallRequest:
6969
type: Literal["tool_call"] = "tool_call"
7070

7171

72+
@dataclass()
73+
class ToolCall(ToolCallRequest):
74+
"""Represents a tool call for execution tracking with spans and metrics.
75+
76+
This type extends ToolCallRequest with additional fields for tracking tool execution
77+
per the execute_tool span semantic conventions.
78+
79+
Reference: https://github.com/open-telemetry/semantic-conventions/blob/main/docs/gen-ai/gen-ai-spans.md#execute-tool-span
80+
81+
For simple message parts (tool calls requested by the model), consider using
82+
ToolCallRequest instead to avoid unnecessary execution-tracking fields.
83+
84+
Semantic convention attributes for execute_tool spans:
85+
- gen_ai.operation.name: "execute_tool" (Required)
86+
- gen_ai.tool.name: Name of the tool (Recommended)
87+
- gen_ai.tool.call.id: Tool call identifier (Recommended if available)
88+
- gen_ai.tool.type: Type classification - "function", "extension", or "datastore" (Recommended if available)
89+
- gen_ai.tool.description: Tool description (Recommended if available)
90+
- gen_ai.tool.call.arguments: Parameters passed to tool (Opt-In, may contain sensitive data)
91+
- gen_ai.tool.call.result: Result returned by tool (Opt-In, may contain sensitive data)
92+
- error.type: Error type if operation failed (Conditionally Required)
93+
"""
94+
95+
# Execution-only fields (used for execute_tool spans):
96+
# gen_ai.tool.type - Tool type: "function", "extension", or "datastore"
97+
tool_type: str | None = None
98+
# gen_ai.tool.description - Description of what the tool does
99+
tool_description: str | None = None
100+
# gen_ai.tool.call.result - Result returned by the tool (Opt-In, may contain sensitive data)
101+
tool_result: Any = None
102+
# error.type - Error type if the tool call failed
103+
error_type: str | None = None
104+
105+
# Lifecycle tracking fields (used by TelemetryHandler):
106+
context_token: ContextToken | None = None
107+
span: Span | None = None
108+
monotonic_start_s: float | None = None
109+
110+
72111
@dataclass()
73112
class ToolCallResponse:
74113
"""Represents a tool call result sent to the model or a built-in tool call outcome and details

0 commit comments

Comments
 (0)