diff --git a/docs/_core_features/chat.md b/docs/_core_features/chat.md index c7b71ee4b..0dd6ee2fc 100644 --- a/docs/_core_features/chat.md +++ b/docs/_core_features/chat.md @@ -306,6 +306,22 @@ response = chat.ask "What is the square root of 64? Answer with a JSON object wi puts JSON.parse(response.content) ``` +OpenAI can route chat requests through either Chat Completions or Responses while keeping the same `RubyLLM.chat` interface. The default `:auto` mode keeps existing Chat Completions behavior unless the request uses a Responses-only feature, such as a native `web_search` tool, a deep-research model, GPT-5 tool calls with reasoning enabled, or params like `previous_response_id`, `include`, `background`, `conversation`, `max_tool_calls`, `truncation`, or `text`. + +```ruby +RubyLLM.configure do |config| + config.openai_api_mode = :auto # :auto, :chat_completions, or :responses +end + +chat = RubyLLM.chat(model: "gpt-5.5") + .with_params( + openai_api_mode: :responses, + tools: [{ type: "web_search", search_context_size: "low" }] + ) +``` + +The `openai_api_mode` key is consumed by RubyLLM and is not sent to the API. See OpenAI's [migration guide](https://platform.openai.com/docs/guides/migrate-to-responses), [web search guide](https://platform.openai.com/docs/guides/tools-web-search?api-mode=responses), and [streaming guide](https://platform.openai.com/docs/guides/streaming-responses) for provider-specific behavior. + > **With great power comes great responsibility:** The `with_params` method can override any part of the request payload, including critical parameters like model, max_tokens, or tools. Use it carefully to avoid unintended behavior. Always verify that your overrides are compatible with the provider's API. To debug and see the exact request being sent, set the environment variable `RUBYLLM_DEBUG=true`. {: .warning } diff --git a/docs/_core_features/tools.md b/docs/_core_features/tools.md index 8da85632b..418b3446b 100644 --- a/docs/_core_features/tools.md +++ b/docs/_core_features/tools.md @@ -471,6 +471,22 @@ end Provider metadata is passed through verbatim—turn on `RUBYLLM_DEBUG=true` if you want to inspect the final payload while experimenting. +### OpenAI Native Tools + +For OpenAI Responses features such as hosted web search, pass the native tool definitions through `with_params(tools: ...)`. RubyLLM appends those native tools to any Ruby tool classes registered with `with_tool` when the request routes through Responses. + +```ruby +chat = RubyLLM.chat(model: "gpt-5.5") + .with_tool(Weather) + .with_params( + tools: [{ type: "web_search", search_context_size: "low" }] + ) + +response = chat.ask("Find today's weather context and compare it with our local forecast.") +``` + +Ruby tool classes still use `with_tool`; `with_params(tools: ...)` is only for OpenAI-native tools such as `web_search`, `file_search`, or `code_interpreter`. See OpenAI's [Responses migration guide](https://platform.openai.com/docs/guides/migrate-to-responses) and [web search guide](https://platform.openai.com/docs/guides/tools-web-search?api-mode=responses) for the provider-specific tool shapes. + ## Advanced: Halting Tool Continuation After a tool executes, the LLM normally continues the conversation to explain what happened. In rare cases, you might want to skip this and return the tool result directly. diff --git a/lib/ruby_llm/configuration.rb b/lib/ruby_llm/configuration.rb index de6202686..02c347f44 100644 --- a/lib/ruby_llm/configuration.rb +++ b/lib/ruby_llm/configuration.rb @@ -3,6 +3,8 @@ module RubyLLM # Global configuration for RubyLLM class Configuration + OPENAI_API_MODES = %i[auto chat_completions responses].freeze + class << self # Declare a single configuration option. def option(key, default = nil) @@ -37,6 +39,7 @@ def defaults = @defaults ||= {} option :default_moderation_model, 'omni-moderation-latest' option :default_image_model, 'gpt-image-1.5' option :default_transcription_model, 'whisper-1' + option :openai_api_mode, :auto option :model_registry_file, -> { File.expand_path('models.json', __dir__) } option :model_registry_class, 'Model' @@ -77,5 +80,15 @@ def log_regexp_timeout=(value) @log_regexp_timeout = value end end + + def openai_api_mode=(value) + mode = (value || :auto).to_sym + unless OPENAI_API_MODES.include?(mode) + raise ArgumentError, + "Invalid openai_api_mode: #{value.inspect}. Valid values are: #{OPENAI_API_MODES.join(', ')}" + end + + @openai_api_mode = mode + end end end diff --git a/lib/ruby_llm/providers/openai.rb b/lib/ruby_llm/providers/openai.rb index 4e36b2668..8d42c5455 100644 --- a/lib/ruby_llm/providers/openai.rb +++ b/lib/ruby_llm/providers/openai.rb @@ -4,7 +4,29 @@ module RubyLLM module Providers # OpenAI API integration. class OpenAI < Provider + RESPONSE_ONLY_PARAMS = %i[ + previous_response_id + include + background + conversation + max_tool_calls + truncation + text + ].freeze + + RESPONSE_TOOL_TYPES = %w[ + code_interpreter + computer_use_preview + file_search + image_generation + local_shell + mcp + web_search + web_search_preview + ].freeze + include OpenAI::Chat + include OpenAI::Responses include OpenAI::Embeddings include OpenAI::Models include OpenAI::Moderation @@ -30,6 +52,83 @@ def maybe_normalize_temperature(temperature, model) OpenAI::Temperature.normalize(temperature, model.id) end + # rubocop:disable Metrics/ParameterLists + def complete(messages, tools:, temperature:, model:, params: {}, headers: {}, schema: nil, thinking: nil, + tool_prefs: nil, &) + request_params = params.dup + requested_mode = extract_openai_api_mode!(request_params) + routing_context = { messages:, model:, params: request_params, tools:, thinking: } + @using_responses_api = native_openai_provider? && use_responses_api?(requested_mode, routing_context) + validate_responses_attachments!(messages, requested_mode) + + native_tools = @using_responses_api ? extract_native_response_tools!(request_params) : nil + normalized_temperature = maybe_normalize_temperature(temperature, model) + payload_options = { + tools: tools, + tool_prefs: tool_prefs, + temperature: normalized_temperature, + model: model, + stream: block_given?, + schema: schema, + thinking: thinking + } + payload_options[:native_tools] = native_tools if @using_responses_api + + payload = Utils.deep_merge( + render_payload(messages, **payload_options), + request_params + ) + + if block_given? + stream_response @connection, payload, headers, & + else + sync_response @connection, payload, headers + end + end + # rubocop:enable Metrics/ParameterLists + + def completion_url + @using_responses_api ? responses_url : chat_completions_url + end + + # rubocop:disable Metrics/ParameterLists + def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil, thinking: nil, + tool_prefs: nil, native_tools: nil) + if @using_responses_api + render_response_payload( + messages, + tools: tools, + native_tools: native_tools, + tool_prefs: tool_prefs, + temperature: temperature, + model: model, + stream: stream, + schema: schema, + thinking: thinking + ) + else + render_chat_payload( + messages, + tools: tools, + tool_prefs: tool_prefs, + temperature: temperature, + model: model, + stream: stream, + schema: schema, + thinking: thinking + ) + end + end + # rubocop:enable Metrics/ParameterLists + + def parse_completion_response(response) + if @using_responses_api + parse_response_response(response) + else + parse_chat_completion_response(response) + end + end + class << self def capabilities OpenAI::Capabilities @@ -39,6 +138,7 @@ def configuration_options %i[ openai_api_key openai_api_base + openai_api_mode openai_organization_id openai_project_id openai_use_system_role @@ -49,6 +149,90 @@ def configuration_requirements %i[openai_api_key] end end + + private + + def extract_openai_api_mode!(params) + value = params.delete(:openai_api_mode) || params.delete('openai_api_mode') || @config.openai_api_mode + normalize_openai_api_mode(value) + end + + def normalize_openai_api_mode(value) + mode = (value || :auto).to_sym + return mode if Configuration::OPENAI_API_MODES.include?(mode) + + raise ArgumentError, + "Invalid openai_api_mode: #{value.inspect}. " \ + "Valid values are: #{Configuration::OPENAI_API_MODES.join(', ')}" + end + + def use_responses_api?(requested_mode, routing_context) + return true if requested_mode == :responses + return false if requested_mode == :chat_completions + return false if audio_input?(routing_context[:messages]) + + responses_model?(routing_context[:model]) || + native_response_tools?(routing_context[:params]) || + responses_only_params?(routing_context[:params]) || + responses_required_for_reasoning_tools?( + routing_context[:model], + routing_context[:tools], + routing_context[:thinking] + ) + end + + def native_openai_provider? + instance_of?(OpenAI) + end + + def responses_model?(model) + model.id.to_s.include?('deep-research') + end + + def responses_required_for_reasoning_tools?(model, tools, thinking) + return false unless tools.any? && resolve_effort(thinking) + + model.id.to_s.start_with?('gpt-5') + end + + def responses_only_params?(params) + RESPONSE_ONLY_PARAMS.any? { |key| params.key?(key) || params.key?(key.to_s) } + end + + def native_response_tools?(params) + tools = params[:tools] || params['tools'] + Utils.to_safe_array(tools).any? { |tool| native_response_tool?(tool) } + end + + def native_response_tool?(tool) + return false unless tool.is_a?(Hash) + + type = (tool[:type] || tool['type']).to_s + return true if RESPONSE_TOOL_TYPES.include?(type) + + type == 'function' && + (tool.key?(:name) || tool.key?('name')) && + !(tool.key?(:function) || tool.key?('function')) + end + + def extract_native_response_tools!(params) + tools = params.delete(:tools) || params.delete('tools') + Utils.to_safe_array(tools).select { |tool| native_response_tool?(tool) } + end + + def validate_responses_attachments!(messages, requested_mode) + return unless @using_responses_api && audio_input?(messages) + return unless requested_mode == :responses + + raise UnsupportedAttachmentError, 'OpenAI Responses API does not support audio inputs yet' + end + + def audio_input?(messages) + messages.any? do |message| + content = message.content + content.respond_to?(:attachments) && content.attachments.any? { |attachment| attachment.type == :audio } + end + end end end end diff --git a/lib/ruby_llm/providers/openai/chat.rb b/lib/ruby_llm/providers/openai/chat.rb index 141cd0790..956377f59 100644 --- a/lib/ruby_llm/providers/openai/chat.rb +++ b/lib/ruby_llm/providers/openai/chat.rb @@ -5,15 +5,29 @@ module Providers class OpenAI # Chat methods of the OpenAI API integration module Chat - def completion_url + def chat_completions_url 'chat/completions' end module_function - # rubocop:disable Metrics/ParameterLists,Metrics/PerceivedComplexity + # rubocop:disable Metrics/ParameterLists,Metrics/PerceivedComplexity,Lint/UnusedMethodArgument def render_payload(messages, tools:, temperature:, model:, stream: false, schema: nil, - thinking: nil, tool_prefs: nil) + thinking: nil, tool_prefs: nil, native_tools: nil) + render_chat_payload( + messages, + tools: tools, + temperature: temperature, + model: model, + stream: stream, + schema: schema, + thinking: thinking, + tool_prefs: tool_prefs + ) + end + + def render_chat_payload(messages, tools:, temperature:, model:, stream: false, schema: nil, + thinking: nil, tool_prefs: nil) tool_prefs ||= {} payload = { model: model.id, @@ -49,9 +63,13 @@ def render_payload(messages, tools:, temperature:, model:, stream: false, schema payload[:stream_options] = { include_usage: true } if stream payload end - # rubocop:enable Metrics/ParameterLists,Metrics/PerceivedComplexity + # rubocop:enable Metrics/ParameterLists,Metrics/PerceivedComplexity,Lint/UnusedMethodArgument def parse_completion_response(response) + parse_chat_completion_response(response) + end + + def parse_chat_completion_response(response) data = response.body return if data.empty? diff --git a/lib/ruby_llm/providers/openai/responses.rb b/lib/ruby_llm/providers/openai/responses.rb new file mode 100644 index 000000000..011b12e13 --- /dev/null +++ b/lib/ruby_llm/providers/openai/responses.rb @@ -0,0 +1,207 @@ +# frozen_string_literal: true + +module RubyLLM + module Providers + class OpenAI + # Responses API support for the OpenAI provider. + module Responses + RESPONSE_REASONING_TEXT_TYPES = %w[summary_text output_text].freeze + + def responses_url + 'responses' + end + + module_function + + # rubocop:disable Metrics/ParameterLists + def render_response_payload(messages, tools:, temperature:, model:, stream: false, schema: nil, + thinking: nil, tool_prefs: nil, native_tools: nil) + tool_prefs ||= {} + payload = { + model: model.id, + input: format_response_input(messages), + stream: stream, + store: false + } + + payload[:temperature] = temperature unless temperature.nil? + apply_response_tools(payload, tools, native_tools, tool_prefs) + apply_response_schema(payload, schema) if schema + apply_response_thinking(payload, thinking) + payload + end + # rubocop:enable Metrics/ParameterLists + + def format_response_input(messages) + messages.flat_map do |message| + if message.tool_call? + format_response_tool_calls(message.tool_calls) + elsif message.role == :tool + format_response_tool_result(message) + else + format_response_message(message) + end + end + end + + def parse_response_response(response) + data = response.body + return if data.empty? + + raise Error.new(response, data.dig('error', 'message')) if data.dig('error', 'message') + + outputs = data['output'] || [] + return if outputs.empty? + + usage = data['usage'] || {} + + Message.new( + role: :assistant, + content: response_output_text(data), + thinking: Thinking.build(text: response_reasoning_text(outputs)), + tool_calls: parse_response_tool_calls(outputs), + input_tokens: usage['input_tokens'], + output_tokens: usage['output_tokens'], + cached_tokens: usage.dig('input_tokens_details', 'cached_tokens'), + cache_creation_tokens: usage.dig('input_tokens_details', 'cache_write_tokens') || 0, + thinking_tokens: usage.dig('output_tokens_details', 'reasoning_tokens'), + model_id: data['model'], + raw: response + ) + end + + def format_response_message(message) + { + type: 'message', + role: format_role(message.role), + content: format_response_content(message.content) + }.compact + end + + def format_response_tool_calls(tool_calls) + tool_calls.map do |_, tool_call| + { + type: 'function_call', + call_id: tool_call.id, + name: tool_call.name, + arguments: JSON.generate(tool_call.arguments || {}) + } + end + end + + def format_response_tool_result(message) + { + type: 'function_call_output', + call_id: message.tool_call_id, + output: response_tool_output(message.content) + } + end + + def apply_response_tools(payload, tools, native_tools, tool_prefs) + response_tools = tools.map { |_, tool| response_tool_for(tool) } + response_tools.concat(Utils.to_safe_array(native_tools)) + payload[:tools] = response_tools if response_tools.any? + payload[:tool_choice] = build_response_tool_choice(tool_prefs[:choice]) unless tool_prefs[:choice].nil? + payload[:parallel_tool_calls] = tool_prefs[:calls] == :many unless tool_prefs[:calls].nil? + end + + def apply_response_schema(payload, schema) + payload[:text] = { + format: { + type: 'json_schema', + name: schema[:name], + schema: schema[:schema], + strict: schema[:strict] + } + } + end + + def apply_response_thinking(payload, thinking) + effort = resolve_effort(thinking) + payload[:reasoning] = { effort: effort } if effort + end + + def format_response_content(content) + return content.value if content.is_a?(RubyLLM::Content::Raw) + return content.to_json if content.is_a?(Hash) || content.is_a?(Array) + return content unless content.is_a?(Content) + + parts = [] + parts << format_response_text(content.text) if content.text + + content.attachments.each do |attachment| + parts << format_response_attachment(attachment) + end + + parts + end + + def format_response_attachment(attachment) + case attachment.type + when :image + { + type: 'input_image', + image_url: attachment.url? ? attachment.source.to_s : attachment.for_llm + } + when :pdf + { + type: 'input_file', + filename: attachment.filename, + file_data: attachment.for_llm + } + when :text + format_response_text(attachment.for_llm) + when :audio + raise UnsupportedAttachmentError, 'OpenAI Responses API does not support audio inputs yet' + else + raise UnsupportedAttachmentError, attachment.type + end + end + + def format_response_text(text) + { + type: 'input_text', + text: text + } + end + + def response_tool_output(content) + return JSON.generate(content.value) if content.is_a?(RubyLLM::Content::Raw) + return content.text.to_s if content.is_a?(RubyLLM::Content) && content.text + return JSON.generate(content.to_h) if content.is_a?(RubyLLM::Content) + return JSON.generate(content) if content.is_a?(Hash) || content.is_a?(Array) + + content.to_s + end + + def response_output_text(data) + output_text = data['output_text'] + return output_text if output_text.is_a?(String) && !output_text.empty? + + text = response_output_text_parts(data['output']).join + text.empty? ? nil : text + end + + def response_output_text_parts(outputs) + Utils.to_safe_array(outputs).select { |output| output['type'] == 'message' }.flat_map do |output| + Utils.to_safe_array(output['content']).filter_map do |content| + content['text'] if content['type'] == 'output_text' && content['text'].is_a?(String) + end + end + end + + def response_reasoning_text(outputs) + text = outputs.select { |output| output['type'] == 'reasoning' }.flat_map do |output| + Utils.to_safe_array(output['summary'] || output['content']).filter_map do |content| + if RESPONSE_REASONING_TEXT_TYPES.include?(content['type']) && content['text'].is_a?(String) + content['text'] + end + end + end.join + + text.empty? ? nil : text + end + end + end + end +end diff --git a/lib/ruby_llm/providers/openai/streaming.rb b/lib/ruby_llm/providers/openai/streaming.rb index 890818d97..99975048c 100644 --- a/lib/ruby_llm/providers/openai/streaming.rb +++ b/lib/ruby_llm/providers/openai/streaming.rb @@ -12,6 +12,12 @@ def stream_url end def build_chunk(data) + return build_responses_chunk(data) if responses_event?(data) + + build_chat_completions_chunk(data) + end + + def build_chat_completions_chunk(data) usage = data['usage'] || {} delta = data.dig('choices', 0, 'delta') || {} content_source = delta['content'] || data.dig('choices', 0, 'message', 'content') @@ -34,19 +40,124 @@ def build_chunk(data) ) end + def build_responses_chunk(data) + case data['type'] + when 'response.output_text.delta' + response_text_delta(data) + when 'response.reasoning_summary_text.delta', 'response.reasoning_text.delta' + response_thinking_delta(data) + when 'response.output_item.added', 'response.output_item.done' + response_output_item_chunk(data) + when 'response.function_call_arguments.delta' + response_function_call_delta(data) + when 'response.function_call_arguments.done' + response_function_call_done(data) + when 'response.completed' + response_completed_chunk(data) + when 'response.failed' + raise Error, response_event_error_message(data) + else + Chunk.new(role: :assistant, content: '') + end + end + def parse_streaming_error(data) error_data = JSON.parse(data) - return unless error_data['error'] + error = error_data['error'] || error_data.dig('response', 'error') + return unless error - case error_data.dig('error', 'type') + case error['type'] || error['code'] when 'server_error' - [500, error_data['error']['message']] + [500, error['message']] when 'rate_limit_exceeded', 'insufficient_quota' - [429, error_data['error']['message']] + [429, error['message']] else - [400, error_data['error']['message']] + [400, error['message']] end end + + def responses_event?(data) + data['type'].to_s.start_with?('response.') + end + + def response_text_delta(data) + Chunk.new( + role: :assistant, + content: data['delta'], + model_id: data.dig('response', 'model') + ) + end + + def response_thinking_delta(data) + Chunk.new( + role: :assistant, + content: '', + thinking: Thinking.build(text: data['delta']), + model_id: data.dig('response', 'model') + ) + end + + def response_output_item_chunk(data) + item = data['item'] || {} + return Chunk.new(role: :assistant, content: '') unless item['type'] == 'function_call' + + response_function_call_chunk( + stream_key: item['id'] || data['item_id'], + call_id: item['call_id'] || data['call_id'], + name: item['name'], + arguments: item['arguments'] + ) + end + + def response_function_call_delta(data) + response_function_call_chunk( + stream_key: data['item_id'], + call_id: nil, + name: nil, + arguments: data['delta'] + ) + end + + def response_function_call_done(data) + response_function_call_chunk( + stream_key: data['item_id'], + call_id: data['call_id'], + name: data['name'], + arguments: data['arguments'] + ) + end + + def response_function_call_chunk(stream_key:, call_id:, name:, arguments:) + key = stream_key || call_id + + Chunk.new( + role: :assistant, + content: nil, + tool_calls: { + key => ToolCall.new(id: call_id, name: name, arguments: arguments || '') + } + ) + end + + def response_completed_chunk(data) + response = data['response'] || {} + usage = response['usage'] || {} + + Chunk.new( + role: :assistant, + content: '', + model_id: response['model'], + input_tokens: usage['input_tokens'], + output_tokens: usage['output_tokens'], + cached_tokens: usage.dig('input_tokens_details', 'cached_tokens'), + cache_creation_tokens: usage.dig('input_tokens_details', 'cache_write_tokens') || 0, + thinking_tokens: usage.dig('output_tokens_details', 'reasoning_tokens') + ) + end + + def response_event_error_message(data) + data.dig('response', 'error', 'message') || data.dig('error', 'message') || 'OpenAI response failed' + end end end end diff --git a/lib/ruby_llm/providers/openai/tools.rb b/lib/ruby_llm/providers/openai/tools.rb index 7db984cb2..f79e6372c 100644 --- a/lib/ruby_llm/providers/openai/tools.rb +++ b/lib/ruby_llm/providers/openai/tools.rb @@ -42,6 +42,19 @@ def tool_for(tool) RubyLLM::Utils.deep_merge(definition, tool.provider_params) end + def response_tool_for(tool) + definition = { + type: 'function', + name: tool.name, + description: tool.description, + parameters: parameters_schema_for(tool) + } + + return definition if tool.provider_params.empty? + + RubyLLM::Utils.deep_merge(definition, tool.provider_params) + end + def param_schema(param) { type: param.type, @@ -100,6 +113,30 @@ def parse_tool_calls(tool_calls, parse_arguments: true) end end + def parse_response_tool_calls(outputs) + function_calls = Utils.to_safe_array(outputs).select { |output| output['type'] == 'function_call' } + return nil if function_calls.empty? + + function_calls.to_h do |output| + id = output['call_id'] || output['id'] + [ + id, + ToolCall.new( + id: id, + name: output['name'], + arguments: parse_response_tool_call_arguments(output) + ) + ] + end + end + + def parse_response_tool_call_arguments(output) + arguments = output['arguments'] + return {} if arguments.nil? || arguments.empty? + + JSON.parse(arguments) + end + def build_tool_choice(tool_choice) case tool_choice when :auto, :none, :required @@ -114,6 +151,18 @@ def build_tool_choice(tool_choice) end end + def build_response_tool_choice(tool_choice) + case tool_choice + when :auto, :none, :required + tool_choice + else + { + type: 'function', + name: tool_choice + } + end + end + def extract_tool_call_thought_signature(tool_call) tool_call.dig('extra_content', 'google', 'thought_signature') end diff --git a/lib/ruby_llm/stream_accumulator.rb b/lib/ruby_llm/stream_accumulator.rb index 0ae83bb15..f4ce2c06b 100644 --- a/lib/ruby_llm/stream_accumulator.rb +++ b/lib/ruby_llm/stream_accumulator.rb @@ -74,22 +74,23 @@ def tool_calls_from_stream def accumulate_tool_calls(new_tool_calls) # rubocop:disable Metrics/PerceivedComplexity RubyLLM.logger.debug { "Accumulating tool calls: #{new_tool_calls}" } if RubyLLM.config.log_stream_debug - new_tool_calls.each_value do |tool_call| + new_tool_calls.each do |stream_key, tool_call| if tool_call.id tool_call_id = tool_call.id.empty? ? SecureRandom.uuid : tool_call.id tool_call_arguments = tool_call.arguments if tool_call_arguments.nil? || (tool_call_arguments.respond_to?(:empty?) && tool_call_arguments.empty?) tool_call_arguments = +'' end - @tool_calls[tool_call.id] = ToolCall.new( + accumulator_key = stream_key || tool_call.id + @tool_calls[accumulator_key] = ToolCall.new( id: tool_call_id, name: tool_call.name, arguments: tool_call_arguments, thought_signature: tool_call.thought_signature ) - @latest_tool_call_id = tool_call.id + @latest_tool_call_id = accumulator_key else - existing = @tool_calls[@latest_tool_call_id] + existing = @tool_calls[stream_key || @latest_tool_call_id] if existing fragment = tool_call.arguments fragment = '' if fragment.nil? @@ -104,7 +105,7 @@ def accumulate_tool_calls(new_tool_calls) # rubocop:disable Metrics/PerceivedCom def find_tool_call(tool_call_id) if tool_call_id.nil? - @tool_calls[@latest_tool_call] + @tool_calls[@latest_tool_call_id] else @latest_tool_call_id = tool_call_id @tool_calls[tool_call_id] diff --git a/spec/ruby_llm/configuration_spec.rb b/spec/ruby_llm/configuration_spec.rb index 4a68fc51c..4774d9466 100644 --- a/spec/ruby_llm/configuration_spec.rb +++ b/spec/ruby_llm/configuration_spec.rb @@ -14,6 +14,7 @@ expect(config.retry_interval).to eq(0.1) expect(config.retry_backoff_factor).to eq(2) expect(config.retry_interval_randomness).to eq(0.5) + expect(config.openai_api_mode).to eq(:auto) end it 'exposes a discoverable options API' do @@ -21,9 +22,20 @@ :request_timeout, :default_model, :model_registry_file, + :openai_api_mode, :openai_api_key, :openrouter_api_base ) end + + it 'normalizes valid OpenAI API mode values' do + config.openai_api_mode = 'responses' + + expect(config.openai_api_mode).to eq(:responses) + end + + it 'rejects invalid OpenAI API mode values' do + expect { config.openai_api_mode = :invalid }.to raise_error(ArgumentError, /Invalid openai_api_mode/) + end end end diff --git a/spec/ruby_llm/providers/open_ai/responses_spec.rb b/spec/ruby_llm/providers/open_ai/responses_spec.rb new file mode 100644 index 000000000..29403e4e3 --- /dev/null +++ b/spec/ruby_llm/providers/open_ai/responses_spec.rb @@ -0,0 +1,340 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe RubyLLM::Providers::OpenAI::Responses do + let(:config) do + RubyLLM::Configuration.new.tap do |config| + config.openai_api_key = 'test' + config.max_retries = 0 + end + end + + let(:provider) { RubyLLM::Providers::OpenAI.new(config) } + let(:model) { instance_double(RubyLLM::Model::Info, id: 'gpt-5.5') } + let(:messages) { [RubyLLM::Message.new(role: :user, content: 'Hello')] } + let(:tools) { {} } + + def complete_with_capture(provider, messages, options) + captured = {} + tools = options.fetch(:tools) + model = options.fetch(:model) + params = options.fetch(:params, {}) + thinking = options[:thinking] + + allow(provider).to receive(:sync_response) do |_connection, payload, _headers| + captured[:url] = provider.completion_url + captured[:payload] = payload + RubyLLM::Message.new(role: :assistant, content: 'ok') + end + + provider.complete( + messages, + tools: tools, + temperature: nil, + model: model, + params: params, + thinking: thinking, + tool_prefs: {} + ) + + captured + end + + describe 'OpenAI protocol routing' do + it 'uses Chat Completions by default in auto mode' do + captured = complete_with_capture(provider, messages, tools: tools, model: model) + + expect(captured[:url]).to eq('chat/completions') + expect(captured[:payload]).to include(:messages) + expect(captured[:payload]).not_to include(:input) + end + + it 'uses Responses when forced per request and removes the routing param' do + captured = complete_with_capture( + provider, + messages, + tools: tools, + model: model, + params: { openai_api_mode: :responses } + ) + + expect(captured[:url]).to eq('responses') + expect(captured[:payload]).to include(input: [{ type: 'message', role: 'user', content: 'Hello' }]) + expect(captured[:payload]).to include(store: false) + expect(captured[:payload]).not_to include(:openai_api_mode) + end + + it 'uses Chat Completions when forced per request' do + config.openai_api_mode = :responses + + captured = complete_with_capture( + provider, + messages, + tools: tools, + model: model, + params: { openai_api_mode: :chat_completions } + ) + + expect(captured[:url]).to eq('chat/completions') + end + + it 'uses Responses for deep research models' do + deep_research_model = instance_double(RubyLLM::Model::Info, id: 'o4-mini-deep-research') + + captured = complete_with_capture(provider, messages, tools: tools, model: deep_research_model) + + expect(captured[:url]).to eq('responses') + end + + it 'uses Responses for native Responses tools' do + captured = complete_with_capture( + provider, + messages, + tools: tools, + model: model, + params: { tools: [{ type: 'web_search', search_context_size: 'low' }] } + ) + + expect(captured[:url]).to eq('responses') + expect(captured[:payload][:tools]).to eq([{ type: 'web_search', search_context_size: 'low' }]) + end + + it 'uses Responses for Responses-only params' do + captured = complete_with_capture( + provider, + messages, + tools: tools, + model: model, + params: { previous_response_id: 'resp_123' } + ) + + expect(captured[:url]).to eq('responses') + expect(captured[:payload][:previous_response_id]).to eq('resp_123') + end + + it 'uses Responses for GPT-5 tool calls with reasoning enabled' do + weather_tool = instance_double( + RubyLLM::Tool, + name: 'weather', + description: 'Looks up weather', + params_schema: nil, + parameters: {}, + provider_params: {} + ) + + captured = complete_with_capture( + provider, + messages, + tools: { weather: weather_tool }, + model: model, + thinking: RubyLLM::Thinking::Config.new(effort: :low) + ) + + expect(captured[:url]).to eq('responses') + end + + it 'keeps audio input on Chat Completions in auto mode' do + audio_message = RubyLLM::Message.new( + role: :user, + content: RubyLLM::Content.new('Transcribe this', 'spec/fixtures/ruby.wav') + ) + + captured = complete_with_capture(provider, [audio_message], tools: tools, model: model) + + expect(captured[:url]).to eq('chat/completions') + end + + it 'rejects forced Responses mode with audio input' do + audio_message = RubyLLM::Message.new( + role: :user, + content: RubyLLM::Content.new('Transcribe this', 'spec/fixtures/ruby.wav') + ) + + expect do + complete_with_capture( + provider, + [audio_message], + tools: tools, + model: model, + params: { openai_api_mode: :responses } + ) + end.to raise_error(RubyLLM::UnsupportedAttachmentError, /does not support audio/) + end + + it 'keeps OpenAI-compatible providers on their existing Chat Completions path' do + openrouter_config = RubyLLM::Configuration.new.tap do |config| + config.openrouter_api_key = 'test' + config.openai_api_mode = :responses + config.max_retries = 0 + end + openrouter = RubyLLM::Providers::OpenRouter.new(openrouter_config) + openrouter_model = instance_double(RubyLLM::Model::Info, id: 'openai/gpt-5.5') + + captured = complete_with_capture( + openrouter, + messages, + tools: tools, + model: openrouter_model, + params: { + openai_api_mode: :responses, + tools: [{ type: 'web_search', search_context_size: 'low' }] + } + ) + + expect(captured[:url]).to eq('chat/completions') + expect(captured[:payload]).to include(:messages) + expect(captured[:payload]).not_to include(:input) + expect(captured[:payload]).not_to include(:openai_api_mode) + end + end + + describe '#render_response_payload' do + before do + stub_const( + 'OpenAIResponsesWeatherTool', + Class.new(RubyLLM::Tool) do + description 'Looks up weather' + param :city, desc: 'City name' + + def execute(city:) + city + end + end + ) + end + + it 'maps messages, files, schema, reasoning, and native tools to Responses shape' do + content = RubyLLM::Content.new( + 'Inspect these files', + ['spec/fixtures/ruby.png', 'spec/fixtures/sample.pdf', 'spec/fixtures/ruby.txt'] + ) + schema = { + name: 'WeatherAnswer', + schema: { type: 'object', properties: { answer: { type: 'string' } }, required: ['answer'] }, + strict: true + } + + payload = provider.send( + :render_response_payload, + [RubyLLM::Message.new(role: :user, content: content)], + tools: { weather: OpenAIResponsesWeatherTool.new }, + native_tools: [{ type: 'web_search', search_context_size: 'low' }], + temperature: 0.2, + model: model, + stream: true, + schema: schema, + thinking: RubyLLM::Thinking::Config.new(effort: :low), + tool_prefs: { choice: :auto, calls: :many } + ) + + expect(payload[:model]).to eq('gpt-5.5') + expect(payload[:stream]).to be(true) + expect(payload[:store]).to be(false) + expect(payload[:temperature]).to eq(0.2) + expect(payload[:reasoning]).to eq(effort: 'low') + expect(payload[:text][:format]).to include(type: 'json_schema', name: 'WeatherAnswer', strict: true) + expect(payload[:tool_choice]).to eq(:auto) + expect(payload[:parallel_tool_calls]).to be(true) + expect(payload[:tools]).to include(hash_including(type: 'function', name: 'open_ai_responses_weather')) + expect(payload[:tools]).to include(type: 'web_search', search_context_size: 'low') + expect(payload[:input].first[:content].map { |part| part[:type] }).to eq( + %w[input_text input_image input_file input_text] + ) + end + + it 'maps previous assistant tool calls and tool outputs to Responses items' do + tool_call = RubyLLM::ToolCall.new(id: 'call_123', name: 'weather', arguments: { 'city' => 'Kyiv' }) + assistant = RubyLLM::Message.new(role: :assistant, content: nil, tool_calls: { 'call_123' => tool_call }) + tool_result = RubyLLM::Message.new( + role: :tool, + content: RubyLLM::Content::Raw.new({ forecast: 'sunny' }), + tool_call_id: 'call_123' + ) + + payload = provider.send( + :render_response_payload, + [assistant, tool_result], + tools: {}, + native_tools: [], + temperature: nil, + model: model + ) + + expect(payload[:input]).to eq( + [ + { + type: 'function_call', + call_id: 'call_123', + name: 'weather', + arguments: '{"city":"Kyiv"}' + }, + { + type: 'function_call_output', + call_id: 'call_123', + output: '{"forecast":"sunny"}' + } + ] + ) + end + end + + describe '#parse_response_response' do + it 'parses text, reasoning summaries, function calls, and usage' do + response = instance_double( + Faraday::Response, + body: { + 'model' => 'gpt-5.5', + 'output_text' => 'Sunny.', + 'output' => [ + { + 'type' => 'reasoning', + 'summary' => [{ 'type' => 'summary_text', 'text' => 'checked the weather' }] + }, + { + 'type' => 'web_search_call', + 'id' => 'ws_123' + }, + { + 'type' => 'function_call', + 'call_id' => 'call_123', + 'name' => 'weather', + 'arguments' => '{"city":"Kyiv"}' + }, + { + 'type' => 'message', + 'content' => [{ 'type' => 'output_text', 'text' => 'Sunny.' }] + } + ], + 'usage' => { + 'input_tokens' => 100, + 'input_tokens_details' => { 'cached_tokens' => 25 }, + 'output_tokens' => 12, + 'output_tokens_details' => { 'reasoning_tokens' => 5 } + } + } + ) + + message = provider.send(:parse_response_response, response) + + expect(message.content).to eq('Sunny.') + expect(message.thinking.text).to eq('checked the weather') + expect(message.tool_calls.values.first).to have_attributes( + id: 'call_123', + name: 'weather', + arguments: { 'city' => 'Kyiv' } + ) + expect(message.input_tokens).to eq(100) + expect(message.cached_tokens).to eq(25) + expect(message.output_tokens).to eq(12) + expect(message.thinking_tokens).to eq(5) + expect(message.raw).to eq(response) + end + + it 'raises provider errors' do + response = instance_double(Faraday::Response, body: { 'error' => { 'message' => 'bad request' } }) + + expect { provider.send(:parse_response_response, response) }.to raise_error(RubyLLM::Error, /bad request/) + end + end +end diff --git a/spec/ruby_llm/providers/open_ai/streaming_spec.rb b/spec/ruby_llm/providers/open_ai/streaming_spec.rb new file mode 100644 index 000000000..8f607efb7 --- /dev/null +++ b/spec/ruby_llm/providers/open_ai/streaming_spec.rb @@ -0,0 +1,170 @@ +# frozen_string_literal: true + +require 'spec_helper' + +RSpec.describe RubyLLM::Providers::OpenAI::Streaming do + include_context 'with configured RubyLLM' + + let(:test_obj) do + Object.new.tap do |obj| + obj.extend(RubyLLM::Providers::OpenAI::Tools) + obj.extend(described_class) + end + end + + describe '#build_chunk' do + it 'keeps Chat Completions streaming chunks working' do + chunk = test_obj.send( + :build_chunk, + { + 'model' => 'gpt-5-nano', + 'choices' => [ + { + 'delta' => { + 'content' => 'Hello' + } + } + ], + 'usage' => { + 'prompt_tokens' => 10, + 'completion_tokens' => 2, + 'prompt_tokens_details' => { 'cached_tokens' => 4 } + } + } + ) + + expect(chunk.content).to eq('Hello') + expect(chunk.model_id).to eq('gpt-5-nano') + expect(chunk.input_tokens).to eq(6) + expect(chunk.cached_tokens).to eq(4) + expect(chunk.output_tokens).to eq(2) + end + + it 'builds Responses text delta chunks' do + chunk = test_obj.send( + :build_chunk, + { + 'type' => 'response.output_text.delta', + 'delta' => 'Hi', + 'response' => { 'model' => 'gpt-5.5' } + } + ) + + expect(chunk.content).to eq('Hi') + expect(chunk.model_id).to eq('gpt-5.5') + end + + it 'builds Responses reasoning delta chunks' do + chunk = test_obj.send( + :build_chunk, + { + 'type' => 'response.reasoning_summary_text.delta', + 'delta' => 'thinking' + } + ) + + expect(chunk.thinking.text).to eq('thinking') + end + + it 'accumulates Responses function call argument deltas by item id' do + accumulator = RubyLLM::StreamAccumulator.new + + [ + { + 'type' => 'response.output_item.added', + 'item' => { + 'id' => 'fc_item_123', + 'type' => 'function_call', + 'call_id' => 'call_123', + 'name' => 'weather', + 'arguments' => '' + } + }, + { + 'type' => 'response.function_call_arguments.delta', + 'item_id' => 'fc_item_123', + 'delta' => '{"city":"Ky' + }, + { + 'type' => 'response.function_call_arguments.delta', + 'item_id' => 'fc_item_123', + 'delta' => 'iv"}' + }, + { + 'type' => 'response.function_call_arguments.done', + 'item_id' => 'fc_item_123', + 'call_id' => 'call_123', + 'name' => 'weather', + 'arguments' => '{"city":"Kyiv"}' + } + ].each do |event| + accumulator.add(test_obj.send(:build_chunk, event)) + end + + message = accumulator.to_message(nil) + tool_call = message.tool_calls.values.first + + expect(tool_call).to have_attributes( + id: 'call_123', + name: 'weather', + arguments: { 'city' => 'Kyiv' } + ) + end + + it 'builds Responses completed chunks with usage' do + chunk = test_obj.send( + :build_chunk, + { + 'type' => 'response.completed', + 'response' => { + 'model' => 'gpt-5.5', + 'usage' => { + 'input_tokens' => 20, + 'input_tokens_details' => { 'cached_tokens' => 5 }, + 'output_tokens' => 8, + 'output_tokens_details' => { 'reasoning_tokens' => 3 } + } + } + } + ) + + expect(chunk.model_id).to eq('gpt-5.5') + expect(chunk.input_tokens).to eq(20) + expect(chunk.cached_tokens).to eq(5) + expect(chunk.cache_creation_tokens).to eq(0) + expect(chunk.output_tokens).to eq(8) + expect(chunk.thinking_tokens).to eq(3) + end + + it 'raises Responses failed events' do + expect do + test_obj.send( + :build_chunk, + { + 'type' => 'response.failed', + 'response' => { + 'error' => { 'message' => 'failed' } + } + } + ) + end.to raise_error(RubyLLM::Error, /failed/) + end + end + + describe '#parse_streaming_error' do + it 'parses Responses error payloads' do + status, message = test_obj.send( + :parse_streaming_error, + { + 'type' => 'response.failed', + 'response' => { + 'error' => { 'type' => 'server_error', 'message' => 'failed' } + } + }.to_json + ) + + expect(status).to eq(500) + expect(message).to eq('failed') + end + end +end diff --git a/spec/support/rubyllm_configuration.rb b/spec/support/rubyllm_configuration.rb index 6b3cede88..a94f0e7ad 100644 --- a/spec/support/rubyllm_configuration.rb +++ b/spec/support/rubyllm_configuration.rb @@ -30,6 +30,7 @@ config.ollama_api_base = ENV.fetch('OLLAMA_API_BASE', 'http://localhost:11434/v1') config.ollama_api_key = ENV.fetch('OLLAMA_API_KEY', nil) config.openai_api_key = ENV.fetch('OPENAI_API_KEY', 'test') + config.openai_api_mode = :auto config.openrouter_api_key = ENV.fetch('OPENROUTER_API_KEY', 'test') config.perplexity_api_key = ENV.fetch('PERPLEXITY_API_KEY', 'test') config.request_timeout = 600