Skip to content
1 change: 1 addition & 0 deletions util/opentelemetry-util-genai/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
of repeatedly failing on every upload ([https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4390](#4390)).
- Refactor public API: add factory methods (`start_inference`, `start_embedding`, `start_tool`, `start_workflow`) and invocation-owned lifecycle (`invocation.stop()` / `invocation.fail(exc)`); rename `LLMInvocation` → `InferenceInvocation` and `ToolCall` → `ToolInvocation`. Existing usages remain fully functional via deprecated aliases.
([#4391](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4391))
- Add metrics to ToolInvocations ([#4443](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4443))


## Version 0.3b0 (2026-02-20)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from opentelemetry.semconv._incubating.attributes import (
gen_ai_attributes as GenAI,
)
from opentelemetry.semconv.attributes import server_attributes
from opentelemetry.trace import Tracer
from opentelemetry.util.genai._invocation import Error, GenAIInvocation
from opentelemetry.util.genai.metrics import InvocationMetricsRecorder
Expand All @@ -45,7 +46,7 @@ class ToolInvocation(GenAIInvocation):
- error.type: Error type if operation failed (Conditionally Required)
"""

def __init__(
def __init__( # pylint: disable=too-many-locals
Comment thread
keith-decker marked this conversation as resolved.
Outdated
self,
tracer: Tracer,
metrics_recorder: InvocationMetricsRecorder,
Expand All @@ -57,6 +58,9 @@ def __init__(
tool_type: str | None = None,
tool_description: str | None = None,
tool_result: Any = None,
provider: str | None = None,
server_address: str | None = None,
Comment thread
lmolkova marked this conversation as resolved.
Outdated
server_port: int | None = None,
attributes: dict[str, Any] | None = None,
metric_attributes: dict[str, Any] | None = None,
) -> None:
Expand All @@ -77,8 +81,24 @@ def __init__(
self.tool_type = tool_type
self.tool_description = tool_description
self.tool_result = tool_result
self.provider = provider
self.server_address = server_address
self.server_port = server_port
self._start()

def _get_metric_attributes(self) -> dict[str, Any]:
optional_attrs = (
(GenAI.GEN_AI_PROVIDER_NAME, self.provider),
(server_attributes.SERVER_ADDRESS, self.server_address),
(server_attributes.SERVER_PORT, self.server_port),
)
attrs: dict[str, Any] = {
GenAI.GEN_AI_OPERATION_NAME: self._operation_name,
**{k: v for k, v in optional_attrs if v is not None},
}
attrs.update(self.metric_attributes)
return attrs

def _apply_finish(self, error: Error | None = None) -> None:
if error is not None:
self._apply_error_attributes(error)
Expand All @@ -94,3 +114,4 @@ def _apply_finish(self, error: Error | None = None) -> None:
}
attributes.update(self.attributes)
self.span.set_attributes(attributes)
self._metrics_recorder.record(self)
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,9 @@ def start_tool(
tool_call_id: str | None = None,
tool_type: str | None = None,
tool_description: str | None = None,
provider: str | None = None,
server_address: str | None = None,
server_port: int | None = None,
) -> ToolInvocation:
"""Create and start a tool invocation.

Expand All @@ -186,6 +189,9 @@ def start_tool(
tool_call_id=tool_call_id,
tool_type=tool_type,
tool_description=tool_description,
provider=provider,
server_address=server_address,
server_port=server_port,
)

def start_workflow(
Expand Down Expand Up @@ -282,6 +288,9 @@ def tool(
tool_call_id: str | None = None,
tool_type: str | None = None,
tool_description: str | None = None,
provider: str | None = None,
server_address: str | None = None,
server_port: int | None = None,
) -> AbstractContextManager[ToolInvocation]:
"""Context manager for Tool invocations.

Expand All @@ -297,6 +306,9 @@ def tool(
tool_call_id=tool_call_id,
tool_type=tool_type,
tool_description=tool_description,
provider=provider,
server_address=server_address,
server_port=server_port,
)._managed()

def workflow(
Expand Down
81 changes: 81 additions & 0 deletions util/opentelemetry-util-genai/tests/test_handler_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,3 +181,84 @@ def _assert_metric_scope_schema_urls(
self.assertEqual(
scope_metric.scope.schema_url, expected_schema_url
)


class TelemetryHandlerToolMetricsTest(TestBase):
def _harvest_metrics(self) -> Dict[str, List[Any]]:
metrics = self.get_sorted_metrics()
metrics_by_name: Dict[str, List[Any]] = {}
for metric in metrics or []:
points = metric.data.data_points or []
metrics_by_name.setdefault(metric.name, []).extend(points)
return metrics_by_name

def test_stop_tool_records_duration(self) -> None:
handler = TelemetryHandler(
tracer_provider=self.tracer_provider,
meter_provider=self.meter_provider,
)
with patch("timeit.default_timer", return_value=1000.0):
invocation = handler.start_tool(
"get_weather",
provider="test-provider",
server_address="api.example.com",
server_port=443,
)
invocation.metric_attributes = {"custom.key": "custom_value"}

with patch("timeit.default_timer", return_value=1002.5):
invocation.stop()

metrics = self._harvest_metrics()
self.assertIn("gen_ai.client.operation.duration", metrics)
duration_points = metrics["gen_ai.client.operation.duration"]
self.assertEqual(len(duration_points), 1)
duration_point = duration_points[0]

self.assertEqual(
duration_point.attributes[GenAI.GEN_AI_OPERATION_NAME],
"execute_tool",
)
self.assertEqual(
duration_point.attributes[GenAI.GEN_AI_PROVIDER_NAME],
"test-provider",
)
self.assertEqual(
duration_point.attributes["server.address"], "api.example.com"
)
self.assertEqual(duration_point.attributes["server.port"], 443)
self.assertEqual(
duration_point.attributes["custom.key"], "custom_value"
)
self.assertAlmostEqual(duration_point.sum, 2.5, places=3)
self.assertNotIn("gen_ai.client.token.usage", metrics)

def test_fail_tool_records_duration_with_error(self) -> None:
handler = TelemetryHandler(
tracer_provider=self.tracer_provider,
meter_provider=self.meter_provider,
)
with patch("timeit.default_timer", return_value=500.0):
invocation = handler.start_tool(
"failing_tool", provider="err-provider"
)

error = Error(message="Tool execution failed", type=RuntimeError)
with patch("timeit.default_timer", return_value=501.5):
invocation.fail(error)

metrics = self._harvest_metrics()
self.assertIn("gen_ai.client.operation.duration", metrics)
duration_points = metrics["gen_ai.client.operation.duration"]
self.assertEqual(len(duration_points), 1)
duration_point = duration_points[0]

self.assertEqual(
duration_point.attributes["error.type"], "RuntimeError"
)
self.assertEqual(
duration_point.attributes[GenAI.GEN_AI_OPERATION_NAME],
"execute_tool",
)
self.assertAlmostEqual(duration_point.sum, 1.5, places=3)
self.assertNotIn("gen_ai.client.token.usage", metrics)
Loading