Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
136 changes: 116 additions & 20 deletions lib/ruby_llm/providers/gemini/images.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,44 +3,140 @@
module RubyLLM
module Providers
class Gemini
# Image generation methods for the Gemini API implementation
# Image generation methods for the Gemini API implementation.
#
# Routes to one of two protocols depending on the model:
# - Imagen models (imagen-*): :predict endpoint with instances/parameters payload.
# - Gemini Image models (everything else, e.g. gemini-2.5-flash-image, nano-banana-*):
# :generateContent endpoint with contents/parts payload, same protocol as chat.
module Images
SIZE_TO_ASPECT_RATIO = {
'1024x1024' => '1:1',
'1792x1024' => '16:9',
'1024x1792' => '9:16',
'1408x1024' => '4:3',
'1024x1408' => '3:4'
}.freeze

def images_url(with: nil, mask: nil) # rubocop:disable Lint/UnusedMethodArgument
"models/#{@model}:predict"
imagen?(@model) ? "models/#{@model}:predict" : "models/#{@model}:generateContent"
end

def render_image_payload(prompt, model:, size:, with: nil, mask: nil, params: {}) # rubocop:disable Lint/UnusedMethodArgument,Metrics/ParameterLists
RubyLLM.logger.debug { "Ignoring size #{size}. Gemini does not support image size customization." }
def render_image_payload(prompt, model:, size:, with: nil, mask: nil, params: {}) # rubocop:disable Metrics/ParameterLists
@model = model
reject_unsupported_inputs!(model, with: with, mask: mask)

payload = if imagen?(model)
imagen_payload(prompt, size: size)
else
gemini_image_payload(prompt, size: size, with: with)
end
Utils.deep_merge(payload, params)
end

def parse_image_response(response, model:)
if imagen?(model)
parse_imagen_response(response, model: model)
else
parse_gemini_image_response(response, model: model)
end
end

# Override the base provider's blanket rejection of `with:`/`mask:`. The model-aware
# checks live in render_image_payload because the base flow calls validate first,
# before render — at that point the model id is not yet on @model.
def validate_paint_inputs!(with:, mask:); end

private

def imagen?(model)
model.to_s.start_with?('imagen')
end

def reject_unsupported_inputs!(model, with:, mask:)
raise UnsupportedAttachmentError, 'Gemini image generation does not support masks' unless mask.nil?
return if with.nil?
return unless imagen?(model)

raise UnsupportedAttachmentError, 'Imagen does not support image references in paint'
end

# --- Imagen path (unchanged behavior) ---

def imagen_payload(prompt, size:)
RubyLLM.logger.debug { "Ignoring size #{size}. Imagen does not support image size customization." }
{
instances: [
{
prompt: prompt
}
],
parameters: {
sampleCount: 1
}
instances: [{ prompt: prompt }],
parameters: { sampleCount: 1 }
}
end

def parse_image_response(response, model:)
data = response.body
image_data = data['predictions']&.first
def parse_imagen_response(response, model:)
image_data = response.body['predictions']&.first

unless image_data&.key?('bytesBase64Encoded')
raise Error, 'Unexpected response format from Gemini image generation API'
end

mime_type = image_data['mimeType'] || 'image/png'
base64_data = image_data['bytesBase64Encoded']

Image.new(
data: base64_data,
mime_type: mime_type,
data: image_data['bytesBase64Encoded'],
mime_type: image_data['mimeType'] || 'image/png',
model_id: model
)
end

# --- Gemini Image path ---

def gemini_image_payload(prompt, size:, with:)
parts = build_image_parts(with) + [{ text: prompt }]
{
contents: [{ role: 'user', parts: parts }],
generationConfig: {
# Gemini Image models require both modalities in the response config
# even when only the IMAGE part is consumed.
responseModalities: %w[IMAGE TEXT],
imageConfig: {
aspectRatio: aspect_ratio_for(size),
imageSize: '1K'
}
}
}
end

def build_image_parts(with)
Array(with).filter_map do |source|
next if source.nil? || (source.is_a?(String) && source.strip.empty?)

attachment = RubyLLM::Attachment.new(source)
if attachment.type == :unknown
raise UnsupportedAttachmentError,
"Gemini image generation does not support attachment type: #{attachment.mime_type}"
end

format_attachment(attachment)
end
end

def aspect_ratio_for(size)
SIZE_TO_ASPECT_RATIO[size] || begin
RubyLLM.logger.debug { "Unmapped size #{size}; defaulting Gemini aspectRatio to 1:1" }
'1:1'
end
end

def parse_gemini_image_response(response, model:)
parts = response.body.dig('candidates', 0, 'content', 'parts') || []
inline = parts.filter_map { |p| p['inlineData'] || p['inline_data'] }.first

raise Error, 'No inlineData image part in Gemini image generation response' unless inline

Image.new(
data: inline['data'],
mime_type: inline['mimeType'] || inline['mime_type'] || 'image/png',
model_id: model,
usage: response.body['usageMetadata'] || {}
)
end
end
end
end
Expand Down

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

16 changes: 16 additions & 0 deletions spec/ruby_llm/image_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,22 @@ def missing_remote_image_url

save_and_verify_image image
end

it 'gemini-2.5-flash-image supports image edits with a local file' do
image = RubyLLM.paint(
prompt,
model: 'gemini-2.5-flash-image',
provider: :gemini,
with: image_path
)

expect(image.base64?).to be(true)
expect(image.data).to be_present
expect(image.mime_type).to include('image')
expect(image.model_id).to eq('gemini-2.5-flash-image')

save_and_verify_image image
end
end

describe 'edit functionality' do
Expand Down
74 changes: 74 additions & 0 deletions spec/ruby_llm/providers/gemini/images_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
# frozen_string_literal: true

require 'spec_helper'

RSpec.describe RubyLLM::Providers::Gemini::Images do
include_context 'with configured RubyLLM'

# Build a host object that mixes in Images alongside Media so format_attachment
# is reachable, mirroring how the real Gemini provider class composes its modules.
let(:host) do
Object.new.tap do |obj|
obj.extend(RubyLLM::Providers::Gemini::Media)
obj.extend(described_class)
end
end

describe '#render_image_payload (Imagen with `with:`)' do
it 'rejects image references for Imagen models' do
expect do
host.render_image_payload(
'a cat',
model: 'imagen-4.0-generate-001',
size: '1024x1024',
with: 'spec/fixtures/ruby.png'
)
end.to raise_error(RubyLLM::UnsupportedAttachmentError, /Imagen does not support image references/)
end
end

describe '#parse_image_response (Imagen)' do
it 'raises when the predictions entry has no bytesBase64Encoded field' do
response = instance_double(Faraday::Response, body: { 'predictions' => [{}] })

expect do
host.parse_image_response(response, model: 'imagen-4.0-generate-001')
end.to raise_error(RubyLLM::Error, /Unexpected response format/)
end
end

describe '#render_image_payload (Gemini Image with unknown attachment type)' do
it 'raises when an attachment has an unknown mime type' do
fake_attachment = instance_double(
RubyLLM::Attachment,
type: :unknown,
mime_type: 'application/x-unrecognized'
)
allow(RubyLLM::Attachment).to receive(:new).and_return(fake_attachment)

expect do
host.render_image_payload(
'edit this',
model: 'gemini-2.5-flash-image',
size: '1024x1024',
with: 'spec/fixtures/ruby.png'
)
end.to raise_error(RubyLLM::UnsupportedAttachmentError, /does not support attachment type/)
end
end

describe '#render_image_payload (Gemini Image with unmapped size)' do
it 'defaults aspectRatio to 1:1 and logs a debug message for an unknown size string' do
allow(RubyLLM.logger).to receive(:debug).and_yield

payload = host.render_image_payload(
'a panda',
model: 'gemini-2.5-flash-image',
size: '999x999'
)

expect(payload.dig(:generationConfig, :imageConfig, :aspectRatio)).to eq('1:1')
expect(RubyLLM.logger).to have_received(:debug).at_least(:once)
end
end
end
2 changes: 2 additions & 0 deletions spec/support/models_to_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,8 @@ def filter_local_providers(models)
{ provider: :openai, model: 'dall-e-3', supports_size: true },
{ provider: :openai, model: 'gpt-image-1', supports_size: false },
{ provider: :gemini, model: 'imagen-4.0-generate-001', supports_size: false },
{ provider: :gemini, model: 'gemini-2.5-flash-image', supports_size: false },
{ provider: :gemini, model: 'gemini-3.1-flash-image-preview', supports_size: false },
{ provider: :openrouter, model: 'google/gemini-2.5-flash-image', supports_size: false }
].freeze
IMAGE_GENERATION_MODELS = filter_local_providers(image_generation_models).freeze
Loading