-
Notifications
You must be signed in to change notification settings - Fork 260
fix(openai): preserve native v1 stream contract #1627
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
b818885
2dcc02b
514171b
b5a7071
95c933d
bdf0d17
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -21,7 +21,7 @@ | |
| from collections import defaultdict | ||
| from dataclasses import dataclass | ||
| from datetime import datetime | ||
| from inspect import isclass | ||
| from inspect import isawaitable, isclass | ||
| from typing import Any, Optional, cast | ||
|
|
||
| from openai._types import NotGiven | ||
|
|
@@ -830,6 +830,191 @@ | |
| ) | ||
|
|
||
|
|
||
| _openai_stream_iter_hook_installed = False | ||
|
|
||
|
|
||
| def _install_openai_stream_iteration_hooks() -> None: | ||
| global _openai_stream_iter_hook_installed | ||
|
|
||
| if not _is_openai_v1(): | ||
| return | ||
|
|
||
| if not _openai_stream_iter_hook_installed: | ||
| original_iter = openai.Stream.__iter__ | ||
| original_aiter = openai.AsyncStream.__aiter__ | ||
|
|
||
| def traced_iter(self: Any) -> Any: | ||
| try: | ||
| yield from original_iter(self) | ||
| finally: | ||
| finalize_once = getattr(self, "_langfuse_finalize_once", None) | ||
| if finalize_once is not None: | ||
| finalize_once() | ||
|
|
||
| async def traced_aiter(self: Any) -> Any: | ||
| try: | ||
| async for item in original_aiter(self): | ||
| yield item | ||
| finally: | ||
| finalize_once = getattr(self, "_langfuse_finalize_once", None) | ||
| if finalize_once is not None: | ||
| await finalize_once() | ||
|
|
||
|
Check failure on line 862 in langfuse/openai.py
|
||
| setattr(openai.Stream, "__iter__", traced_iter) | ||
| setattr(openai.AsyncStream, "__aiter__", traced_aiter) | ||
| _openai_stream_iter_hook_installed = True | ||
|
claude[bot] marked this conversation as resolved.
|
||
|
|
||
|
|
||
| def _finalize_stream_response( | ||
| *, | ||
| resource: OpenAiDefinition, | ||
| items: list[Any], | ||
| generation: LangfuseGeneration, | ||
| completion_start_time: Optional[datetime], | ||
| ) -> None: | ||
| try: | ||
| model, completion, usage, metadata = ( | ||
| _extract_streamed_response_api_response(items) | ||
| if resource.object == "Responses" or resource.object == "AsyncResponses" | ||
| else _extract_streamed_openai_response(resource, items) | ||
| ) | ||
|
|
||
| _create_langfuse_update( | ||
| completion, | ||
| generation, | ||
| completion_start_time, | ||
| model=model, | ||
| usage=usage, | ||
| metadata=metadata, | ||
| ) | ||
| except Exception: | ||
| pass | ||
| finally: | ||
| generation.end() | ||
|
|
||
|
|
||
| def _instrument_openai_stream( | ||
| *, | ||
| resource: OpenAiDefinition, | ||
| response: Any, | ||
| generation: LangfuseGeneration, | ||
| ) -> Any: | ||
| if not hasattr(response, "_iterator"): | ||
| return LangfuseResponseGeneratorSync( | ||
| resource=resource, | ||
| response=response, | ||
| generation=generation, | ||
| ) | ||
|
|
||
| items: list[Any] = [] | ||
| raw_iterator = response._iterator | ||
| completion_start_time: Optional[datetime] = None | ||
| is_finalized = False | ||
| close = response.close | ||
|
|
||
| def finalize_once() -> None: | ||
| nonlocal is_finalized | ||
| if is_finalized: | ||
| return | ||
|
|
||
| is_finalized = True | ||
| _finalize_stream_response( | ||
| resource=resource, | ||
| items=items, | ||
| generation=generation, | ||
| completion_start_time=completion_start_time, | ||
| ) | ||
|
|
||
| response._langfuse_finalize_once = finalize_once # type: ignore[attr-defined] | ||
|
|
||
| def traced_iterator() -> Any: | ||
| nonlocal completion_start_time | ||
| try: | ||
| for item in raw_iterator: | ||
| items.append(item) | ||
|
|
||
| if completion_start_time is None: | ||
| completion_start_time = _get_timestamp() | ||
|
|
||
| yield item | ||
| finally: | ||
| finalize_once() | ||
|
|
||
| def traced_close() -> Any: | ||
| try: | ||
| return close() | ||
| finally: | ||
| finalize_once() | ||
|
|
||
| response._iterator = traced_iterator() | ||
| response.close = traced_close | ||
|
claude[bot] marked this conversation as resolved.
|
||
|
|
||
| return response | ||
|
|
||
|
|
||
| def _instrument_openai_async_stream( | ||
| *, | ||
| resource: OpenAiDefinition, | ||
| response: Any, | ||
| generation: LangfuseGeneration, | ||
| ) -> Any: | ||
| if not hasattr(response, "_iterator"): | ||
| return LangfuseResponseGeneratorAsync( | ||
| resource=resource, | ||
| response=response, | ||
| generation=generation, | ||
| ) | ||
|
|
||
| items: list[Any] = [] | ||
| raw_iterator = response._iterator | ||
| completion_start_time: Optional[datetime] = None | ||
| is_finalized = False | ||
| close = response.close | ||
|
|
||
| async def finalize_once() -> None: | ||
| nonlocal is_finalized | ||
| if is_finalized: | ||
| return | ||
|
|
||
| is_finalized = True | ||
| _finalize_stream_response( | ||
| resource=resource, | ||
| items=items, | ||
| generation=generation, | ||
| completion_start_time=completion_start_time, | ||
| ) | ||
|
|
||
| response._langfuse_finalize_once = finalize_once # type: ignore[attr-defined] | ||
|
|
||
| async def traced_iterator() -> Any: | ||
| nonlocal completion_start_time | ||
| try: | ||
| async for item in raw_iterator: | ||
| items.append(item) | ||
|
|
||
| if completion_start_time is None: | ||
| completion_start_time = _get_timestamp() | ||
|
|
||
| yield item | ||
| finally: | ||
| await finalize_once() | ||
|
|
||
| async def traced_close() -> Any: | ||
| try: | ||
| return await close() | ||
| finally: | ||
| await finalize_once() | ||
|
|
||
| async def traced_aclose() -> Any: | ||
| return await traced_close() | ||
|
|
||
| response._iterator = traced_iterator() | ||
| response.close = traced_close | ||
| response.aclose = traced_aclose | ||
|
hassiebp marked this conversation as resolved.
|
||
|
|
||
| return response | ||
|
|
||
|
|
||
| @_langfuse_wrapper | ||
| def _wrap( | ||
| open_ai_resource: OpenAiDefinition, wrapped: Any, args: Any, kwargs: Any | ||
|
|
@@ -860,13 +1045,19 @@ | |
| prompt=langfuse_data.get("prompt", None), | ||
| ) | ||
|
|
||
| try: | ||
| openai_response = wrapped(**arg_extractor.get_openai_args()) | ||
|
|
||
| if _is_streaming_response(openai_response): | ||
| if _is_openai_v1() and isinstance(openai_response, openai.Stream): | ||
| return _instrument_openai_stream( | ||
| resource=open_ai_resource, | ||
| response=openai_response, | ||
| generation=generation, | ||
| ) | ||
| elif _is_streaming_response(openai_response): | ||
| return LangfuseResponseGeneratorSync( | ||
| resource=open_ai_resource, | ||
| response=openai_response, | ||
|
Check warning on line 1060 in langfuse/openai.py
|
||
|
Comment on lines
1048
to
1060
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🟡 The Extended reasoning...What the dead code is and how it manifests
The specific code path that makes them unreachable In The Why existing code does not prevent the confusion There is no comment or refactoring to signal that the Stream/AsyncStream branches inside What the impact would be There is no runtime impact today. The dead conditions never fire, and the correct instrumentation paths ( Addressing the refutation One verifier noted this is benign dead code with no incorrect runtime behavior. That is accurate, and severity How to fix it Remove the last two conditions from def _is_streaming_response(response: Any) -> bool:
return (
isinstance(response, types.GeneratorType)
or isinstance(response, types.AsyncGeneratorType)
)This makes the function's actual contract explicit: it only handles legacy generator-style streams. Native OpenAI v1 streams are handled by the explicit isinstance checks in |
||
| generation=generation, | ||
| ) | ||
|
|
||
|
|
@@ -934,7 +1125,13 @@ | |
| try: | ||
| openai_response = await wrapped(**arg_extractor.get_openai_args()) | ||
|
|
||
| if _is_streaming_response(openai_response): | ||
| if _is_openai_v1() and isinstance(openai_response, openai.AsyncStream): | ||
| return _instrument_openai_async_stream( | ||
| resource=open_ai_resource, | ||
| response=openai_response, | ||
| generation=generation, | ||
| ) | ||
| elif _is_streaming_response(openai_response): | ||
| return LangfuseResponseGeneratorAsync( | ||
| resource=open_ai_resource, | ||
| response=openai_response, | ||
|
|
@@ -994,6 +1191,7 @@ | |
|
|
||
|
|
||
| register_tracing() | ||
| _install_openai_stream_iteration_hooks() | ||
|
|
||
|
|
||
| class LangfuseResponseGeneratorSync: | ||
|
|
@@ -1010,6 +1208,7 @@ | |
| self.response = response | ||
| self.generation = generation | ||
| self.completion_start_time: Optional[datetime] = None | ||
| self._is_finalized = False | ||
|
|
||
| def __iter__(self) -> Any: | ||
| try: | ||
|
|
@@ -1042,29 +1241,28 @@ | |
| return self.__iter__() | ||
|
|
||
| def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None: | ||
| pass | ||
| self.close() | ||
|
|
||
| def _finalize(self) -> None: | ||
| try: | ||
| model, completion, usage, metadata = ( | ||
| _extract_streamed_response_api_response(self.items) | ||
| if self.resource.object == "Responses" | ||
| or self.resource.object == "AsyncResponses" | ||
| else _extract_streamed_openai_response(self.resource, self.items) | ||
| ) | ||
| def close(self) -> None: | ||
| close = getattr(self.response, "close", None) | ||
|
|
||
| _create_langfuse_update( | ||
| completion, | ||
| self.generation, | ||
| self.completion_start_time, | ||
| model=model, | ||
| usage=usage, | ||
| metadata=metadata, | ||
| ) | ||
| except Exception: | ||
| pass | ||
| try: | ||
| if callable(close): | ||
| close() | ||
| finally: | ||
| self.generation.end() | ||
| self._finalize() | ||
|
|
||
| def _finalize(self) -> None: | ||
| if self._is_finalized: | ||
| return | ||
|
|
||
| self._is_finalized = True | ||
| _finalize_stream_response( | ||
| resource=self.resource, | ||
| items=self.items, | ||
| generation=self.generation, | ||
| completion_start_time=self.completion_start_time, | ||
| ) | ||
|
|
||
|
|
||
| class LangfuseResponseGeneratorAsync: | ||
|
|
@@ -1081,6 +1279,7 @@ | |
| self.response = response | ||
| self.generation = generation | ||
| self.completion_start_time: Optional[datetime] = None | ||
| self._is_finalized = False | ||
|
|
||
| async def __aiter__(self) -> Any: | ||
| try: | ||
|
|
@@ -1113,40 +1312,56 @@ | |
| return self.__aiter__() | ||
|
|
||
| async def __aexit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None: | ||
| pass | ||
| await self.aclose() | ||
|
|
||
| async def _finalize(self) -> None: | ||
| try: | ||
| model, completion, usage, metadata = ( | ||
| _extract_streamed_response_api_response(self.items) | ||
| if self.resource.object == "Responses" | ||
| or self.resource.object == "AsyncResponses" | ||
| else _extract_streamed_openai_response(self.resource, self.items) | ||
| ) | ||
|
|
||
| _create_langfuse_update( | ||
| completion, | ||
| self.generation, | ||
| self.completion_start_time, | ||
| model=model, | ||
| usage=usage, | ||
| metadata=metadata, | ||
| ) | ||
| except Exception: | ||
| pass | ||
| finally: | ||
| self.generation.end() | ||
| if self._is_finalized: | ||
| return | ||
|
|
||
| self._is_finalized = True | ||
| _finalize_stream_response( | ||
| resource=self.resource, | ||
| items=self.items, | ||
| generation=self.generation, | ||
| completion_start_time=self.completion_start_time, | ||
| ) | ||
|
|
||
| async def close(self) -> None: | ||
| """Close the response and release the connection. | ||
|
|
||
| Automatically called if the response body is read to completion. | ||
| """ | ||
| await self.response.close() | ||
| close = getattr(self.response, "close", None) | ||
| aclose = getattr(self.response, "aclose", None) | ||
|
|
||
| try: | ||
| if callable(close): | ||
| result = close() | ||
| if isawaitable(result): | ||
| await result | ||
| elif callable(aclose): | ||
| result = aclose() | ||
| if isawaitable(result): | ||
| await result | ||
| finally: | ||
| await self._finalize() | ||
|
|
||
| async def aclose(self) -> None: | ||
| """Close the response and release the connection. | ||
|
|
||
| Automatically called if the response body is read to completion. | ||
| """ | ||
| await self.response.aclose() | ||
| aclose = getattr(self.response, "aclose", None) | ||
| close = getattr(self.response, "close", None) | ||
|
|
||
| try: | ||
| if callable(aclose): | ||
| result = aclose() | ||
| if isawaitable(result): | ||
| await result | ||
| elif callable(close): | ||
| result = close() | ||
| if isawaitable(result): | ||
| await result | ||
| finally: | ||
| await self._finalize() | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🔴 The
traced_aiterhook in_install_openai_stream_iteration_hooksis an async generator (async for item in original_aiter(self): yield item), so itsfinallyblock does not run synchronously when the caller doesasync for chunk in stream: break— Python defers async generator finalization to asyncio'ssys.set_asyncgen_hooksmechanism (PEP 525), meaninggeneration.end()is called only after multiple event loop turns or never in non-asyncio environments. By contrast, the synctraced_iterusesyield from original_iter(self), which propagatesGeneratorExitsynchronously throughyield fromand runs thefinallyblock immediately on break. The test at line 507-509 acknowledges this with a 5-turnasyncio.sleep(0)workaround that has no correctness guarantee; alangfuse.flush()call immediately afterasync for chunk in stream: breakwithout that workaround will silently miss the generation end event.Extended reasoning...
What the bug is and how it manifests
_install_openai_stream_iteration_hooksinstalls both a sync hook onopenai.Stream.__iter__and an async hook onopenai.AsyncStream.__aiter__. The sync hook usesyield from original_iter(self)inside atry/finallyblock, which means when the outer generator is closed (due tofor ... break), Python synchronously propagatesGeneratorExitthroughyield frominto the sub-generator, and thefinallyblock callsfinalize_once()deterministically. The async hook is implemented as an async generator withasync for item in original_aiter(self): yield item, making thefinallyblock's execution dependent on async generator finalization semantics defined by PEP 525 — which are non-deterministic.The specific code path that triggers it
When a user does
async for chunk in stream: break, Python callsstream.__aiter__()which returns the async generator object fromtraced_aiter. After the first chunk is yielded andbreakfires, Python needs to close this async generator. Unlike synchronous generators (wherefor ... breaksynchronously calls.close()), Python does not synchronously call.aclose()on abandoned async generators. Instead, PEP 525 specifies that async generator finalization is handled bysys.set_asyncgen_hooks, which asyncio registers to schedule.aclose()on future event loop iterations. Thefinallyblock intraced_aiter— which callsawait finalize_once()— is therefore deferred.Why existing code does not prevent it
The
is_finalizedguard andresponse._langfuse_finalize_onceare correctly set up, but they only matter oncefinalize_onceis actually called. The problem is that the asyncfinallyblock that callsfinalize_onceis not guaranteed to run promptly. In the sync path,yield fromprovides a deterministic propagation guarantee —GeneratorExittravels synchronously through theyield fromchain and thefinallyblock executes before thebreakstatement returns control to the caller. No equivalent exists for async generators: there is noasync yield fromin Python.What the impact would be
In asyncio environments, finalization is deferred by at least one event loop turn (and requires asyncio's asyncgen hooks to be registered). A user who calls
langfuse.flush()immediately afterasync for chunk in stream: breakwithout awaiting multiple event loop turns will miss the generation end event — the span will appear to be incomplete or missing. In non-asyncio environments (trio, anyio with a non-asyncio backend, or any context where asyncio hasn't registered its asyncgen hooks), finalization may never occur at all. The test itself documents this fragility at lines 507-509 with the comment "Async generator finalizers are scheduled across event-loop turns" and the 5xasyncio.sleep(0)heuristic, which has no formal correctness guarantee.Addressing the refutation
One verifier argues this is a pre-existing limitation because the old
LangfuseResponseGeneratorAsync.__aiter__also used an async generator with the sametry/finallypattern. This is accurate: the behavior predates this PR. However, the PR explicitly fixed the sync path (viayield from) to be deterministic and acknowledges the break-early use case with a dedicated test (test_openai_stream_break_still_finalizes_generation). The async counterpart test (test_openai_async_stream_break_still_finalizes_generation) requires the 5x sleep workaround, confirming the asymmetry was introduced consciously but not resolved. The correctness gap is real and the workaround is fragile.How to fix it
A true fix requires replacing the async generator approach in
traced_aiterwith one that provides deterministic finalization on break. One approach is to implement a custom async iterator class (not an async generator) with an__anext__method that detects exhaustion/cancellation and callsfinalize_oncedirectly, and anaclosemethod that also callsfinalize_once. Since there is no async equivalent ofyield fromthat propagatesGeneratorExitsynchronously, any solution based on async generators will have this limitation.Step-by-step proof
_instrument_openai_async_streamsetsresponse._langfuse_finalize_once = finalize_onceand replacesresponse._iteratorwith an async generatortraced_iterator()._install_openai_stream_iteration_hooksreplacesopenai.AsyncStream.__aiter__with the async generatortraced_aiter.async for chunk in stream: breakstream.__aiter__()→ returns async generator object G fromtraced_aiter(stream).original_aiter, which reads fromresponse._iterator = traced_iterator()). First chunk yielded.breakfires. Python needs to close G (the async generator fromtraced_aiter).G.aclose(). Instead asyncio's asyncgen finalizer hook is registered andacloseis scheduled for a future event loop turn.finalize_oncehas NOT been called.generation.end()has NOT been called.await asyncio.sleep(0)beforelangfuse.flush()to give asyncio's hook time to scheduleG.aclose(), which then runs thefinallyblock. Without these yields,generation.end()would not be called.