Skip to content
1 change: 1 addition & 0 deletions util/opentelemetry-util-genai/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
of repeatedly failing on every upload ([https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4390](#4390)).
- Refactor public API: add factory methods (`start_inference`, `start_embedding`, `start_tool`, `start_workflow`) and invocation-owned lifecycle (`invocation.stop()` / `invocation.fail(exc)`); rename `LLMInvocation` → `InferenceInvocation` and `ToolCall` → `ToolInvocation`. Existing usages remain fully functional via deprecated aliases.
([#4391](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4391))
- Add metrics to ToolInvocations ([#4443](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4443))


## Version 0.3b0 (2026-02-20)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ class ToolInvocation(GenAIInvocation):
- error.type: Error type if operation failed (Conditionally Required)
"""

def __init__(
def __init__( # pylint: disable=too-many-locals
Comment thread
keith-decker marked this conversation as resolved.
Outdated
self,
tracer: Tracer,
metrics_recorder: InvocationMetricsRecorder,
Expand Down Expand Up @@ -79,6 +79,13 @@ def __init__(
self.tool_result = tool_result
self._start()

def _get_metric_attributes(self) -> dict[str, Any]:
attrs: dict[str, Any] = {
GenAI.GEN_AI_OPERATION_NAME: self._operation_name,
}
attrs.update(self.metric_attributes)
return attrs

def _apply_finish(self, error: Error | None = None) -> None:
if error is not None:
self._apply_error_attributes(error)
Expand All @@ -94,3 +101,4 @@ def _apply_finish(self, error: Error | None = None) -> None:
}
attributes.update(self.attributes)
self.span.set_attributes(attributes)
self._metrics_recorder.record(self)
66 changes: 66 additions & 0 deletions util/opentelemetry-util-genai/tests/test_handler_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,3 +181,69 @@ def _assert_metric_scope_schema_urls(
self.assertEqual(
scope_metric.scope.schema_url, expected_schema_url
)


class TelemetryHandlerToolMetricsTest(TestBase):
def _harvest_metrics(self) -> Dict[str, List[Any]]:
metrics = self.get_sorted_metrics()
metrics_by_name: Dict[str, List[Any]] = {}
for metric in metrics or []:
points = metric.data.data_points or []
metrics_by_name.setdefault(metric.name, []).extend(points)
return metrics_by_name

def test_stop_tool_records_duration(self) -> None:
handler = TelemetryHandler(
tracer_provider=self.tracer_provider,
meter_provider=self.meter_provider,
)
with patch("timeit.default_timer", return_value=1000.0):
invocation = handler.start_tool("get_weather")
invocation.metric_attributes = {"custom.key": "custom_value"}

with patch("timeit.default_timer", return_value=1002.5):
invocation.stop()

metrics = self._harvest_metrics()
self.assertIn("gen_ai.client.operation.duration", metrics)
duration_points = metrics["gen_ai.client.operation.duration"]
self.assertEqual(len(duration_points), 1)
duration_point = duration_points[0]

self.assertEqual(
duration_point.attributes[GenAI.GEN_AI_OPERATION_NAME],
"execute_tool",
)
self.assertEqual(
duration_point.attributes["custom.key"], "custom_value"
)
self.assertAlmostEqual(duration_point.sum, 2.5, places=3)
self.assertNotIn("gen_ai.client.token.usage", metrics)

def test_fail_tool_records_duration_with_error(self) -> None:
handler = TelemetryHandler(
tracer_provider=self.tracer_provider,
meter_provider=self.meter_provider,
)
with patch("timeit.default_timer", return_value=500.0):
invocation = handler.start_tool("failing_tool")

error = Error(message="Tool execution failed", type=RuntimeError)
with patch("timeit.default_timer", return_value=501.5):
invocation.fail(error)

metrics = self._harvest_metrics()
self.assertIn("gen_ai.client.operation.duration", metrics)
duration_points = metrics["gen_ai.client.operation.duration"]
self.assertEqual(len(duration_points), 1)
duration_point = duration_points[0]

self.assertEqual(
duration_point.attributes["error.type"], "RuntimeError"
)
self.assertEqual(
duration_point.attributes[GenAI.GEN_AI_OPERATION_NAME],
"execute_tool",
)
self.assertAlmostEqual(duration_point.sum, 1.5, places=3)
self.assertNotIn("gen_ai.client.token.usage", metrics)
Loading