-
Notifications
You must be signed in to change notification settings - Fork 843
fix(model): add DataBlock handling in OpenAIMessageConverter #1744
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
46292a8
6af54c7
5782e49
bf1fc7a
0af6b61
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -15,6 +15,7 @@ | |
| */ | ||
| package io.agentscope.core.formatter.openai; | ||
|
|
||
| import io.agentscope.core.formatter.MediaUtils; | ||
| import io.agentscope.core.formatter.openai.dto.OpenAIContentPart; | ||
| import io.agentscope.core.formatter.openai.dto.OpenAIFunction; | ||
| import io.agentscope.core.formatter.openai.dto.OpenAIMessage; | ||
|
|
@@ -23,6 +24,7 @@ | |
| import io.agentscope.core.message.AudioBlock; | ||
| import io.agentscope.core.message.Base64Source; | ||
| import io.agentscope.core.message.ContentBlock; | ||
| import io.agentscope.core.message.DataBlock; | ||
| import io.agentscope.core.message.ImageBlock; | ||
| import io.agentscope.core.message.MessageMetadataKeys; | ||
| import io.agentscope.core.message.Msg; | ||
|
|
@@ -161,91 +163,17 @@ private List<OpenAIContentPart> convertContentBlocks(List<ContentBlock> blocks) | |
|
|
||
| for (ContentBlock block : blocks) { | ||
| if (block instanceof TextBlock tb) { | ||
| contentParts.add(OpenAIContentPart.text(tb.getText())); | ||
| addTextPart(tb.getText(), contentParts); | ||
| } else if (block instanceof ImageBlock ib) { | ||
| try { | ||
| Source source = ib.getSource(); | ||
| if (source == null) { | ||
| log.warn("ImageBlock has null source, skipping"); | ||
| continue; | ||
| } | ||
| String imageUrl = convertImageSourceToUrl(source); | ||
| contentParts.add(OpenAIContentPart.imageUrl(imageUrl)); | ||
| } catch (Exception e) { | ||
| String errorMsg = | ||
| e.getMessage() != null ? e.getMessage() : e.getClass().getSimpleName(); | ||
| log.warn("Failed to process ImageBlock: {}", errorMsg); | ||
| contentParts.add( | ||
| OpenAIContentPart.text( | ||
| "[Image - processing failed: " + errorMsg + "]")); | ||
| } | ||
| addImagePart(ib.getSource(), contentParts); | ||
| } else if (block instanceof AudioBlock ab) { | ||
| try { | ||
| // OpenAI expects base64 audio in input_audio format | ||
| Source source = ab.getSource(); | ||
| if (source == null) { | ||
| log.warn("AudioBlock has null source, using placeholder"); | ||
| contentParts.add(OpenAIContentPart.text("[Audio - source missing]")); | ||
| continue; | ||
| } | ||
| if (source instanceof Base64Source b64) { | ||
| String audioData = b64.getData(); | ||
| if (audioData == null || audioData.isEmpty()) { | ||
| log.warn("Base64Source has null or empty data, using placeholder"); | ||
| contentParts.add(OpenAIContentPart.text("[Audio - data missing]")); | ||
| continue; | ||
| } | ||
| String mediaType = b64.getMediaType(); | ||
| String format = mediaType != null ? detectAudioFormat(mediaType) : "wav"; | ||
| if (format == null) { | ||
| log.debug("Audio format detection returned null, defaulting to wav"); | ||
| format = "wav"; | ||
| } | ||
| contentParts.add(OpenAIContentPart.inputAudio(audioData, format)); | ||
| } else if (source instanceof URLSource urlSource) { | ||
| // For URL-based audio, we need to add as text since OpenAI | ||
| // input_audio requires base64 | ||
| String url = urlSource.getUrl(); | ||
| if (url == null || url.isEmpty()) { | ||
| log.warn("URLSource has null or empty URL, using placeholder"); | ||
| contentParts.add(OpenAIContentPart.text("[Audio URL - missing]")); | ||
| continue; | ||
| } | ||
| log.warn("URL-based audio not directly supported, using text reference"); | ||
| contentParts.add(OpenAIContentPart.text("[Audio URL: " + url + "]")); | ||
| } else { | ||
| log.warn( | ||
| "Unknown audio source type: {}", source.getClass().getSimpleName()); | ||
| contentParts.add( | ||
| OpenAIContentPart.text("[Audio - unsupported source type]")); | ||
| } | ||
| } catch (Exception e) { | ||
| String errorMsg = | ||
| e.getMessage() != null ? e.getMessage() : e.getClass().getSimpleName(); | ||
| log.warn("Failed to process AudioBlock: {}", errorMsg, e); | ||
| contentParts.add( | ||
| OpenAIContentPart.text( | ||
| "[Audio - processing failed: " + errorMsg + "]")); | ||
| } | ||
| addAudioPart(ab.getSource(), contentParts); | ||
| } else if (block instanceof ThinkingBlock) { | ||
| log.debug("Skipping ThinkingBlock when formatting for OpenAI"); | ||
| } else if (block instanceof VideoBlock vb) { | ||
| try { | ||
| Source source = vb.getSource(); | ||
| if (source == null) { | ||
| log.warn("VideoBlock has null source, skipping"); | ||
| continue; | ||
| } | ||
| String videoUrl = convertVideoSourceToUrl(source); | ||
| contentParts.add(OpenAIContentPart.videoUrl(videoUrl)); | ||
| } catch (Exception e) { | ||
| String errorMsg = | ||
| e.getMessage() != null ? e.getMessage() : e.getClass().getSimpleName(); | ||
| log.warn("Failed to process VideoBlock: {}", errorMsg); | ||
| contentParts.add( | ||
| OpenAIContentPart.text( | ||
| "[Video - processing failed: " + errorMsg + "]")); | ||
| } | ||
| addVideoPart(vb.getSource(), contentParts); | ||
| } else if (block instanceof DataBlock db) { | ||
| addDataPart(db.getSource(), contentParts); | ||
| } else if (block instanceof ToolUseBlock) { | ||
| log.warn("ToolUseBlock is not supported in user messages"); | ||
| } else if (block instanceof ToolResultBlock) { | ||
|
|
@@ -429,13 +357,117 @@ private boolean hasMediaContent(List<ContentBlock> blocks) { | |
| for (ContentBlock block : blocks) { | ||
| if (block instanceof ImageBlock | ||
| || block instanceof AudioBlock | ||
| || block instanceof VideoBlock) { | ||
| || block instanceof VideoBlock | ||
| || block instanceof DataBlock) { | ||
| return true; | ||
| } | ||
| } | ||
| return false; | ||
| } | ||
|
|
||
| private void addTextPart(String text, List<OpenAIContentPart> parts) { | ||
| parts.add(OpenAIContentPart.text(text)); | ||
| } | ||
|
|
||
| private void addDataPart(Source source, List<OpenAIContentPart> parts) { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [nit] |
||
| if (source == null) { | ||
| log.warn("DataBlock has null source, skipping"); | ||
| return; | ||
| } | ||
| String mimeType; | ||
| if (source instanceof Base64Source b64) { | ||
| mimeType = b64.getMediaType(); | ||
| } else if (source instanceof URLSource u) { | ||
| mimeType = MediaUtils.determineMediaType(u.getUrl()); | ||
| } else { | ||
| log.warn("DataBlock has unknown source type: {}", source.getClass().getSimpleName()); | ||
| return; | ||
| } | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [minor]
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Acknowledged. This is an inherent limitation of extension-based MIME detection. Added a comment in addDataPart noting that URLSource-based DataBlocks require URLs with recognizable file extensions.
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [minor] |
||
| if (mimeType.startsWith("image/")) { | ||
| addImagePart(source, parts); | ||
| } else if (mimeType.startsWith("audio/")) { | ||
| addAudioPart(source, parts); | ||
| } else if (mimeType.startsWith("video/")) { | ||
| addVideoPart(source, parts); | ||
| } else { | ||
| log.warn("DataBlock has unrecognized MIME type '{}', skipping", mimeType); | ||
| addTextPart("[Data - unrecognized type: " + mimeType + "]", parts); | ||
| } | ||
| } | ||
|
|
||
| private void addImagePart(Source source, List<OpenAIContentPart> parts) { | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [major] When } else {
log.warn("DataBlock has unrecognized MIME type '{}', skipping", mimeType);
addTextPart("[Data - unrecognized type: " + mimeType + "]", parts);
}
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed: unrecognized MIME types now produce a text placeholder like [Data - unrecognized type: ...] instead of being silently dropped, consistent with how ImageBlock/AudioBlock failures are handled.
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ✅ Verified as addressed in
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [major] When } else {
log.warn("DataBlock has unrecognized MIME type '{}', skipping", mimeType);
addTextPart("[Data - unrecognized type: " + mimeType + "]", parts);
}
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ✅ Verified as addressed in |
||
| if (source == null) { | ||
| log.warn("Image source is null, skipping"); | ||
| return; | ||
| } | ||
| try { | ||
| String imageUrl = convertImageSourceToUrl(source); | ||
| parts.add(OpenAIContentPart.imageUrl(imageUrl)); | ||
| } catch (Exception e) { | ||
| String errorMsg = | ||
| e.getMessage() != null ? e.getMessage() : e.getClass().getSimpleName(); | ||
| log.warn("Failed to process image: {}", errorMsg); | ||
| addTextPart("[Image - processing failed: " + errorMsg + "]", parts); | ||
| } | ||
| } | ||
|
|
||
| private void addAudioPart(Source source, List<OpenAIContentPart> parts) { | ||
| if (source == null) { | ||
| log.warn("Audio source is null, using placeholder"); | ||
| addTextPart("[Audio - source missing]", parts); | ||
| return; | ||
| } | ||
| try { | ||
| if (source instanceof Base64Source b64) { | ||
| String audioData = b64.getData(); | ||
| if (audioData == null || audioData.isEmpty()) { | ||
| log.warn("Base64Source has null or empty data, using placeholder"); | ||
| addTextPart("[Audio - data missing]", parts); | ||
| return; | ||
| } | ||
| String mediaType = b64.getMediaType(); | ||
| String format = mediaType != null ? detectAudioFormat(mediaType) : "wav"; | ||
| if (format == null) { | ||
| format = "wav"; | ||
| } | ||
| parts.add(OpenAIContentPart.inputAudio(audioData, format)); | ||
| } else if (source instanceof URLSource u) { | ||
| String url = u.getUrl(); | ||
| if (url == null || url.isEmpty()) { | ||
| log.warn("URLSource has null or empty URL, using placeholder"); | ||
| addTextPart("[Audio URL - missing]", parts); | ||
| return; | ||
| } | ||
| log.warn("URL-based audio not directly supported, using text reference"); | ||
| addTextPart("[Audio URL: " + url + "]", parts); | ||
| } else { | ||
| log.warn("Unknown audio source type: {}", source.getClass().getSimpleName()); | ||
| addTextPart("[Audio - unsupported source type]", parts); | ||
| } | ||
| } catch (Exception e) { | ||
| String errorMsg = | ||
| e.getMessage() != null ? e.getMessage() : e.getClass().getSimpleName(); | ||
| log.warn("Failed to process audio: {}", errorMsg, e); | ||
| addTextPart("[Audio - processing failed: " + errorMsg + "]", parts); | ||
| } | ||
| } | ||
|
|
||
| private void addVideoPart(Source source, List<OpenAIContentPart> parts) { | ||
| if (source == null) { | ||
| log.warn("Video source is null, skipping"); | ||
| return; | ||
| } | ||
| try { | ||
| String videoUrl = convertVideoSourceToUrl(source); | ||
| parts.add(OpenAIContentPart.videoUrl(videoUrl)); | ||
| } catch (Exception e) { | ||
| String errorMsg = | ||
| e.getMessage() != null ? e.getMessage() : e.getClass().getSimpleName(); | ||
| log.warn("Failed to process video: {}", errorMsg); | ||
| addTextPart("[Video - processing failed: " + errorMsg + "]", parts); | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Convert image Source to URL string for OpenAI API. | ||
| * | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[nit]
addTextPartis a trivial one-liner wrapper (parts.add(OpenAIContentPart.text(text))) that adds indirection without the null-check / try-catch / error-handling value that the otheraddXxxPartmethods provide. It's not wrong, but it's asymmetric. Consider either inlining it back (as it was before), or adding a null-check ontextto make it consistent with the defensive style of the other helpers.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fair point. Kept as-is for now — the wrapper provides consistency with the other addXxxPart methods at the call site (every branch uses the same pattern).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
✅ Verified as addressed in
0af6b61: Nit explicitly acknowledged and deferred by developer; bot itself stated the current if-chain works fine, so conscious deferral is a valid resolution.