diff --git a/agentscope-core/src/main/java/io/agentscope/core/formatter/openai/OpenAIMessageConverter.java b/agentscope-core/src/main/java/io/agentscope/core/formatter/openai/OpenAIMessageConverter.java index 47f8584fdb..47768ee493 100644 --- a/agentscope-core/src/main/java/io/agentscope/core/formatter/openai/OpenAIMessageConverter.java +++ b/agentscope-core/src/main/java/io/agentscope/core/formatter/openai/OpenAIMessageConverter.java @@ -15,6 +15,7 @@ */ package io.agentscope.core.formatter.openai; +import io.agentscope.core.formatter.MediaUtils; import io.agentscope.core.formatter.openai.dto.OpenAIContentPart; import io.agentscope.core.formatter.openai.dto.OpenAIFunction; import io.agentscope.core.formatter.openai.dto.OpenAIMessage; @@ -23,6 +24,7 @@ import io.agentscope.core.message.AudioBlock; import io.agentscope.core.message.Base64Source; import io.agentscope.core.message.ContentBlock; +import io.agentscope.core.message.DataBlock; import io.agentscope.core.message.ImageBlock; import io.agentscope.core.message.MessageMetadataKeys; import io.agentscope.core.message.Msg; @@ -161,91 +163,17 @@ private List convertContentBlocks(List blocks) for (ContentBlock block : blocks) { if (block instanceof TextBlock tb) { - contentParts.add(OpenAIContentPart.text(tb.getText())); + addTextPart(tb.getText(), contentParts); } else if (block instanceof ImageBlock ib) { - try { - Source source = ib.getSource(); - if (source == null) { - log.warn("ImageBlock has null source, skipping"); - continue; - } - String imageUrl = convertImageSourceToUrl(source); - contentParts.add(OpenAIContentPart.imageUrl(imageUrl)); - } catch (Exception e) { - String errorMsg = - e.getMessage() != null ? e.getMessage() : e.getClass().getSimpleName(); - log.warn("Failed to process ImageBlock: {}", errorMsg); - contentParts.add( - OpenAIContentPart.text( - "[Image - processing failed: " + errorMsg + "]")); - } + addImagePart(ib.getSource(), contentParts); } else if (block instanceof AudioBlock ab) { - try { - // OpenAI expects base64 audio in input_audio format - Source source = ab.getSource(); - if (source == null) { - log.warn("AudioBlock has null source, using placeholder"); - contentParts.add(OpenAIContentPart.text("[Audio - source missing]")); - continue; - } - if (source instanceof Base64Source b64) { - String audioData = b64.getData(); - if (audioData == null || audioData.isEmpty()) { - log.warn("Base64Source has null or empty data, using placeholder"); - contentParts.add(OpenAIContentPart.text("[Audio - data missing]")); - continue; - } - String mediaType = b64.getMediaType(); - String format = mediaType != null ? detectAudioFormat(mediaType) : "wav"; - if (format == null) { - log.debug("Audio format detection returned null, defaulting to wav"); - format = "wav"; - } - contentParts.add(OpenAIContentPart.inputAudio(audioData, format)); - } else if (source instanceof URLSource urlSource) { - // For URL-based audio, we need to add as text since OpenAI - // input_audio requires base64 - String url = urlSource.getUrl(); - if (url == null || url.isEmpty()) { - log.warn("URLSource has null or empty URL, using placeholder"); - contentParts.add(OpenAIContentPart.text("[Audio URL - missing]")); - continue; - } - log.warn("URL-based audio not directly supported, using text reference"); - contentParts.add(OpenAIContentPart.text("[Audio URL: " + url + "]")); - } else { - log.warn( - "Unknown audio source type: {}", source.getClass().getSimpleName()); - contentParts.add( - OpenAIContentPart.text("[Audio - unsupported source type]")); - } - } catch (Exception e) { - String errorMsg = - e.getMessage() != null ? e.getMessage() : e.getClass().getSimpleName(); - log.warn("Failed to process AudioBlock: {}", errorMsg, e); - contentParts.add( - OpenAIContentPart.text( - "[Audio - processing failed: " + errorMsg + "]")); - } + addAudioPart(ab.getSource(), contentParts); } else if (block instanceof ThinkingBlock) { log.debug("Skipping ThinkingBlock when formatting for OpenAI"); } else if (block instanceof VideoBlock vb) { - try { - Source source = vb.getSource(); - if (source == null) { - log.warn("VideoBlock has null source, skipping"); - continue; - } - String videoUrl = convertVideoSourceToUrl(source); - contentParts.add(OpenAIContentPart.videoUrl(videoUrl)); - } catch (Exception e) { - String errorMsg = - e.getMessage() != null ? e.getMessage() : e.getClass().getSimpleName(); - log.warn("Failed to process VideoBlock: {}", errorMsg); - contentParts.add( - OpenAIContentPart.text( - "[Video - processing failed: " + errorMsg + "]")); - } + addVideoPart(vb.getSource(), contentParts); + } else if (block instanceof DataBlock db) { + addDataPart(db.getSource(), contentParts); } else if (block instanceof ToolUseBlock) { log.warn("ToolUseBlock is not supported in user messages"); } else if (block instanceof ToolResultBlock) { @@ -429,13 +357,117 @@ private boolean hasMediaContent(List blocks) { for (ContentBlock block : blocks) { if (block instanceof ImageBlock || block instanceof AudioBlock - || block instanceof VideoBlock) { + || block instanceof VideoBlock + || block instanceof DataBlock) { return true; } } return false; } + private void addTextPart(String text, List parts) { + parts.add(OpenAIContentPart.text(text)); + } + + private void addDataPart(Source source, List parts) { + if (source == null) { + log.warn("DataBlock has null source, skipping"); + return; + } + String mimeType; + if (source instanceof Base64Source b64) { + mimeType = b64.getMediaType(); + } else if (source instanceof URLSource u) { + mimeType = MediaUtils.determineMediaType(u.getUrl()); + } else { + log.warn("DataBlock has unknown source type: {}", source.getClass().getSimpleName()); + return; + } + if (mimeType.startsWith("image/")) { + addImagePart(source, parts); + } else if (mimeType.startsWith("audio/")) { + addAudioPart(source, parts); + } else if (mimeType.startsWith("video/")) { + addVideoPart(source, parts); + } else { + log.warn("DataBlock has unrecognized MIME type '{}', skipping", mimeType); + addTextPart("[Data - unrecognized type: " + mimeType + "]", parts); + } + } + + private void addImagePart(Source source, List parts) { + if (source == null) { + log.warn("Image source is null, skipping"); + return; + } + try { + String imageUrl = convertImageSourceToUrl(source); + parts.add(OpenAIContentPart.imageUrl(imageUrl)); + } catch (Exception e) { + String errorMsg = + e.getMessage() != null ? e.getMessage() : e.getClass().getSimpleName(); + log.warn("Failed to process image: {}", errorMsg); + addTextPart("[Image - processing failed: " + errorMsg + "]", parts); + } + } + + private void addAudioPart(Source source, List parts) { + if (source == null) { + log.warn("Audio source is null, using placeholder"); + addTextPart("[Audio - source missing]", parts); + return; + } + try { + if (source instanceof Base64Source b64) { + String audioData = b64.getData(); + if (audioData == null || audioData.isEmpty()) { + log.warn("Base64Source has null or empty data, using placeholder"); + addTextPart("[Audio - data missing]", parts); + return; + } + String mediaType = b64.getMediaType(); + String format = mediaType != null ? detectAudioFormat(mediaType) : "wav"; + if (format == null) { + format = "wav"; + } + parts.add(OpenAIContentPart.inputAudio(audioData, format)); + } else if (source instanceof URLSource u) { + String url = u.getUrl(); + if (url == null || url.isEmpty()) { + log.warn("URLSource has null or empty URL, using placeholder"); + addTextPart("[Audio URL - missing]", parts); + return; + } + log.warn("URL-based audio not directly supported, using text reference"); + addTextPart("[Audio URL: " + url + "]", parts); + } else { + log.warn("Unknown audio source type: {}", source.getClass().getSimpleName()); + addTextPart("[Audio - unsupported source type]", parts); + } + } catch (Exception e) { + String errorMsg = + e.getMessage() != null ? e.getMessage() : e.getClass().getSimpleName(); + log.warn("Failed to process audio: {}", errorMsg, e); + addTextPart("[Audio - processing failed: " + errorMsg + "]", parts); + } + } + + private void addVideoPart(Source source, List parts) { + if (source == null) { + log.warn("Video source is null, skipping"); + return; + } + try { + String videoUrl = convertVideoSourceToUrl(source); + parts.add(OpenAIContentPart.videoUrl(videoUrl)); + } catch (Exception e) { + String errorMsg = + e.getMessage() != null ? e.getMessage() : e.getClass().getSimpleName(); + log.warn("Failed to process video: {}", errorMsg); + addTextPart("[Video - processing failed: " + errorMsg + "]", parts); + } + } + /** * Convert image Source to URL string for OpenAI API. * diff --git a/agentscope-core/src/test/java/io/agentscope/core/formatter/openai/DataBlockConverterTest.java b/agentscope-core/src/test/java/io/agentscope/core/formatter/openai/DataBlockConverterTest.java new file mode 100644 index 0000000000..475f1f240e --- /dev/null +++ b/agentscope-core/src/test/java/io/agentscope/core/formatter/openai/DataBlockConverterTest.java @@ -0,0 +1,255 @@ +/* + * Copyright 2024-2026 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.agentscope.core.formatter.openai; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import io.agentscope.core.formatter.openai.dto.OpenAIContentPart; +import io.agentscope.core.formatter.openai.dto.OpenAIMessage; +import io.agentscope.core.message.Base64Source; +import io.agentscope.core.message.DataBlock; +import io.agentscope.core.message.TextBlock; +import io.agentscope.core.message.URLSource; +import io.agentscope.core.message.UserMessage; +import java.util.List; +import org.junit.jupiter.api.Test; + +class DataBlockConverterTest { + + @Test + void testDataBlockImageUrlNotDropped() { + var formatter = new OpenAIChatFormatter(); + + var msg = + new UserMessage( + List.of( + TextBlock.builder().text("analyze this").build(), + DataBlock.builder() + .source( + URLSource.builder() + .url("https://example.com/photo.png") + .build()) + .build())); + + List result = formatter.format(List.of(msg)); + assertEquals(1, result.size()); + + Object content = result.get(0).getContent(); + assertTrue( + content instanceof List, + "expected List, got " + content.getClass()); + + List rawParts = (List) content; + @SuppressWarnings("unchecked") + List parts = (List) rawParts; + + boolean hasText = + parts.stream() + .anyMatch( + p -> + "text".equals(p.getType()) + && "analyze this".equals(p.getText())); + boolean hasImage = + parts.stream() + .anyMatch( + p -> + "image_url".equals(p.getType()) + && p.getImageUrl() != null + && "https://example.com/photo.png" + .equals(p.getImageUrl().getUrl())); + + assertTrue(hasText, "text block should be present in multimodal message"); + assertTrue(hasImage, "image_url from DataBlock should be present in multimodal message"); + } + + @Test + void testDataBlockBase64Image() { + var formatter = new OpenAIChatFormatter(); + + var msg = + new UserMessage( + List.of( + DataBlock.builder() + .source(new Base64Source("image/png", "iVBORw0KGgo=")) + .build())); + + List result = formatter.format(List.of(msg)); + assertEquals(1, result.size()); + + Object content = result.get(0).getContent(); + assertTrue( + content instanceof List, + "expected List, got " + content.getClass()); + + List rawParts = (List) content; + @SuppressWarnings("unchecked") + List parts = (List) rawParts; + + boolean hasImage = + parts.stream() + .anyMatch( + p -> + "image_url".equals(p.getType()) + && p.getImageUrl() != null + && p.getImageUrl() + .getUrl() + .startsWith("data:image/png;base64")); + + assertTrue(hasImage, "base64 image from DataBlock should be present"); + } + + @Test + void testDataBlockVideoUrl() { + var formatter = new OpenAIChatFormatter(); + + var msg = + new UserMessage( + List.of( + DataBlock.builder() + .source( + URLSource.builder() + .url("https://example.com/movie.mp4") + .build()) + .build())); + + List result = formatter.format(List.of(msg)); + assertEquals(1, result.size()); + + Object content = result.get(0).getContent(); + assertTrue( + content instanceof List, + "expected List, got " + content.getClass()); + + @SuppressWarnings("unchecked") + List parts = (List) (List) content; + + boolean hasVideo = + parts.stream() + .anyMatch( + p -> + "video_url".equals(p.getType()) + && p.getVideoUrl() != null + && "https://example.com/movie.mp4" + .equals(p.getVideoUrl().getUrl())); + assertTrue(hasVideo, "video_url from DataBlock should be present"); + } + + @Test + void testDataBlockAudioUrlFallback() { + var formatter = new OpenAIChatFormatter(); + + var msg = + new UserMessage( + List.of( + DataBlock.builder() + .source( + URLSource.builder() + .url("https://example.com/song.mp3") + .build()) + .build())); + + List result = formatter.format(List.of(msg)); + assertEquals(1, result.size()); + + Object content = result.get(0).getContent(); + assertTrue( + content instanceof List, + "expected List, got " + content.getClass()); + + @SuppressWarnings("unchecked") + List parts = (List) (List) content; + + boolean hasAudioText = + parts.stream() + .anyMatch( + p -> + "text".equals(p.getType()) + && p.getText() != null + && p.getText() + .contains("https://example.com/song.mp3")); + assertTrue(hasAudioText, "URL-based audio should produce a text placeholder"); + } + + @Test + void testDataBlockUnknownMimeType() { + var formatter = new OpenAIChatFormatter(); + + var msg = + new UserMessage( + List.of( + DataBlock.builder() + .source( + URLSource.builder() + .url("https://example.com/file.unknown") + .build()) + .build())); + + List result = formatter.format(List.of(msg)); + assertEquals(1, result.size()); + + Object content = result.get(0).getContent(); + assertTrue( + content instanceof List, + "expected List, got " + content.getClass()); + + @SuppressWarnings("unchecked") + List parts = (List) (List) content; + + boolean hasFallback = + parts.stream() + .anyMatch( + p -> + "text".equals(p.getType()) + && p.getText() != null + && p.getText() + .startsWith("[Data - unrecognized type:")); + assertTrue(hasFallback, "unknown MIME should produce a text placeholder"); + } + + @Test + void testDataBlockBase64Audio() { + var formatter = new OpenAIChatFormatter(); + + var msg = + new UserMessage( + List.of( + DataBlock.builder() + .source(new Base64Source("audio/wav", "base64audiodata")) + .build())); + + List result = formatter.format(List.of(msg)); + assertEquals(1, result.size()); + + Object content = result.get(0).getContent(); + assertTrue( + content instanceof List, + "expected List, got " + content.getClass()); + + @SuppressWarnings("unchecked") + List parts = (List) (List) content; + + boolean hasAudio = + parts.stream() + .anyMatch( + p -> + "input_audio".equals(p.getType()) + && p.getInputAudio() != null + && "base64audiodata" + .equals(p.getInputAudio().getData())); + assertTrue(hasAudio, "Base64 audio from DataBlock should produce input_audio part"); + } +}