@@ -170,6 +170,17 @@ def get_langfuse_args(self):
170170 return {** self .args , ** self .kwargs }
171171
172172 def get_openai_args (self ):
173+ # OpenAI returns streaming usage not by default but only if stream_options has include_usage set
174+ if self .kwargs .get ("stream" ) and "stream_options" not in self .kwargs :
175+ self .kwargs ["stream_options" ] = {"include_usage" : True }
176+
177+ if (
178+ self .kwargs .get ("stream" )
179+ and "stream_options" in self .kwargs
180+ and "include_usage" not in self .kwargs ["stream_options" ]
181+ ):
182+ self .kwargs ["stream_options" ]["include_usage" ] = True
183+
173184 return self .kwargs
174185
175186
@@ -371,7 +382,11 @@ def _get_langfuse_data_from_kwargs(
371382
372383
373384def _create_langfuse_update (
374- completion , generation : StatefulGenerationClient , completion_start_time , model = None
385+ completion ,
386+ generation : StatefulGenerationClient ,
387+ completion_start_time ,
388+ model = None ,
389+ usage = None ,
375390):
376391 update = {
377392 "end_time" : _get_timestamp (),
@@ -381,6 +396,9 @@ def _create_langfuse_update(
381396 if model is not None :
382397 update ["model" ] = model
383398
399+ if usage is not None :
400+ update ["usage" ] = usage
401+
384402 generation .update (** update )
385403
386404
@@ -393,6 +411,7 @@ def _extract_streamed_openai_response(resource, chunks):
393411 chunk = chunk .__dict__
394412
395413 model = model or chunk .get ("model" , None ) or None
414+ usage = chunk .get ("usage" , None )
396415
397416 choices = chunk .get ("choices" , [])
398417
@@ -491,6 +510,7 @@ def get_response_for_chat():
491510 return (
492511 model ,
493512 get_response_for_chat () if resource .type == "chat" else completion ,
513+ usage .__dict__ if _is_openai_v1 () and usage is not None else usage ,
494514 )
495515
496516
@@ -519,7 +539,11 @@ def _get_langfuse_data_from_default_response(resource: OpenAiDefinition, respons
519539
520540 usage = response .get ("usage" , None )
521541
522- return model , completion , usage .__dict__ if _is_openai_v1 () and usage is not None else usage
542+ return (
543+ model ,
544+ completion ,
545+ usage .__dict__ if _is_openai_v1 () and usage is not None else usage ,
546+ )
523547
524548
525549def _is_openai_v1 ():
@@ -793,14 +817,20 @@ def __exit__(self, exc_type, exc_value, traceback):
793817 pass
794818
795819 def _finalize (self ):
796- model , completion = _extract_streamed_openai_response (self .resource , self .items )
820+ model , completion , usage = _extract_streamed_openai_response (
821+ self .resource , self .items
822+ )
797823
798824 # Avoiding the trace-update if trace-id is provided by user.
799825 if not self .is_nested_trace :
800826 self .langfuse .trace (id = self .generation .trace_id , output = completion )
801827
802828 _create_langfuse_update (
803- completion , self .generation , self .completion_start_time , model = model
829+ completion ,
830+ self .generation ,
831+ self .completion_start_time ,
832+ model = model ,
833+ usage = usage ,
804834 )
805835
806836
@@ -857,14 +887,20 @@ async def __aexit__(self, exc_type, exc_value, traceback):
857887 pass
858888
859889 async def _finalize (self ):
860- model , completion = _extract_streamed_openai_response (self .resource , self .items )
890+ model , completion , usage = _extract_streamed_openai_response (
891+ self .resource , self .items
892+ )
861893
862894 # Avoiding the trace-update if trace-id is provided by user.
863895 if not self .is_nested_trace :
864896 self .langfuse .trace (id = self .generation .trace_id , output = completion )
865897
866898 _create_langfuse_update (
867- completion , self .generation , self .completion_start_time , model = model
899+ completion ,
900+ self .generation ,
901+ self .completion_start_time ,
902+ model = model ,
903+ usage = usage ,
868904 )
869905
870906 async def close (self ) -> None :
0 commit comments