diff --git a/util/opentelemetry-util-genai/CHANGELOG.md b/util/opentelemetry-util-genai/CHANGELOG.md index 1ddd22cee8..a24aca39aa 100644 --- a/util/opentelemetry-util-genai/CHANGELOG.md +++ b/util/opentelemetry-util-genai/CHANGELOG.md @@ -23,6 +23,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 of repeatedly failing on every upload ([#4390](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4390)). - Refactor public API: add factory methods (`start_inference`, `start_embedding`, `start_tool`, `start_workflow`) and invocation-owned lifecycle (`invocation.stop()` / `invocation.fail(exc)`); rename `LLMInvocation` → `InferenceInvocation` and `ToolCall` → `ToolInvocation`. Existing usages remain fully functional via deprecated aliases. ([#4391](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4391)) +- Add metrics to ToolInvocations ([#4443](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4443)) ## Version 0.3b0 (2026-02-20) diff --git a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/_tool_invocation.py b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/_tool_invocation.py index 1ebbd85bc6..39edd6b57b 100644 --- a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/_tool_invocation.py +++ b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/_tool_invocation.py @@ -79,6 +79,13 @@ def __init__( self.tool_result = tool_result self._start() + def _get_metric_attributes(self) -> dict[str, Any]: + attrs: dict[str, Any] = { + GenAI.GEN_AI_OPERATION_NAME: self._operation_name, + } + attrs.update(self.metric_attributes) + return attrs + def _apply_finish(self, error: Error | None = None) -> None: if error is not None: self._apply_error_attributes(error) @@ -94,3 +101,4 @@ def _apply_finish(self, error: Error | None = None) -> None: } attributes.update(self.attributes) self.span.set_attributes(attributes) + self._metrics_recorder.record(self) diff --git a/util/opentelemetry-util-genai/tests/test_handler_metrics.py b/util/opentelemetry-util-genai/tests/test_handler_metrics.py index 233edb9050..2065b1cf9b 100644 --- a/util/opentelemetry-util-genai/tests/test_handler_metrics.py +++ b/util/opentelemetry-util-genai/tests/test_handler_metrics.py @@ -323,3 +323,69 @@ def test_stop_embedding_without_tokens(self) -> None: # Token metrics should NOT be recorded when input_tokens is not set self.assertNotIn("gen_ai.client.token.usage", metrics) + + +class TelemetryHandlerToolMetricsTest(TestBase): + def _harvest_metrics(self) -> Dict[str, List[Any]]: + metrics = self.get_sorted_metrics() + metrics_by_name: Dict[str, List[Any]] = {} + for metric in metrics or []: + points = metric.data.data_points or [] + metrics_by_name.setdefault(metric.name, []).extend(points) + return metrics_by_name + + def test_stop_tool_records_duration(self) -> None: + handler = TelemetryHandler( + tracer_provider=self.tracer_provider, + meter_provider=self.meter_provider, + ) + with patch("timeit.default_timer", return_value=1000.0): + invocation = handler.start_tool("get_weather") + invocation.metric_attributes = {"custom.key": "custom_value"} + + with patch("timeit.default_timer", return_value=1002.5): + invocation.stop() + + metrics = self._harvest_metrics() + self.assertIn("gen_ai.client.operation.duration", metrics) + duration_points = metrics["gen_ai.client.operation.duration"] + self.assertEqual(len(duration_points), 1) + duration_point = duration_points[0] + + self.assertEqual( + duration_point.attributes[GenAI.GEN_AI_OPERATION_NAME], + "execute_tool", + ) + self.assertEqual( + duration_point.attributes["custom.key"], "custom_value" + ) + self.assertAlmostEqual(duration_point.sum, 2.5, places=3) + self.assertNotIn("gen_ai.client.token.usage", metrics) + + def test_fail_tool_records_duration_with_error(self) -> None: + handler = TelemetryHandler( + tracer_provider=self.tracer_provider, + meter_provider=self.meter_provider, + ) + with patch("timeit.default_timer", return_value=500.0): + invocation = handler.start_tool("failing_tool") + + error = Error(message="Tool execution failed", type=RuntimeError) + with patch("timeit.default_timer", return_value=501.5): + invocation.fail(error) + + metrics = self._harvest_metrics() + self.assertIn("gen_ai.client.operation.duration", metrics) + duration_points = metrics["gen_ai.client.operation.duration"] + self.assertEqual(len(duration_points), 1) + duration_point = duration_points[0] + + self.assertEqual( + duration_point.attributes["error.type"], "RuntimeError" + ) + self.assertEqual( + duration_point.attributes[GenAI.GEN_AI_OPERATION_NAME], + "execute_tool", + ) + self.assertAlmostEqual(duration_point.sum, 1.5, places=3) + self.assertNotIn("gen_ai.client.token.usage", metrics)