diff --git a/lib/ruby_llm/error.rb b/lib/ruby_llm/error.rb index 04ec8c466..05fb5b335 100644 --- a/lib/ruby_llm/error.rb +++ b/lib/ruby_llm/error.rb @@ -62,6 +62,13 @@ class << self /reduce the length of messages/i ].freeze + RATE_LIMIT_PATTERNS = [ + /rate limit/i, + /per minute/i, + /per hour/i, + /per day/i + ].freeze + def parse_error(provider:, response:) # rubocop:disable Metrics/PerceivedComplexity message = provider&.parse_error(response) @@ -102,6 +109,7 @@ def parse_error(provider:, response:) # rubocop:disable Metrics/PerceivedComplex def context_length_exceeded?(message) return false if message.to_s.empty? + return false if RATE_LIMIT_PATTERNS.any? { |pattern| message.match?(pattern) } CONTEXT_LENGTH_PATTERNS.any? { |pattern| message.match?(pattern) } end diff --git a/spec/ruby_llm/error_middleware_spec.rb b/spec/ruby_llm/error_middleware_spec.rb index 9437c9c79..88b57511b 100644 --- a/spec/ruby_llm/error_middleware_spec.rb +++ b/spec/ruby_llm/error_middleware_spec.rb @@ -48,6 +48,18 @@ end.to raise_error(RubyLLM::RateLimitError) end + it 'keeps token-per-minute rate limit 429 errors as RateLimitError, not ContextLengthExceededError' do + # Anthropic returns 429 with a message mentioning "input tokens per minute", which matches + # the /input[_\s-]?token/i context length pattern — it should be classified as a rate limit. + msg = "This request would exceed your organization's rate limit of 30,000 input tokens per minute" + response = Struct.new(:status, :body).new(429, %({"error":{"message":"#{msg}"}})) + provider = instance_double(RubyLLM::Provider, parse_error: msg) + + expect do + described_class.parse_error(provider: provider, response: response) + end.to raise_error(RubyLLM::RateLimitError) + end + it 'maps context-length-like 400 errors to ContextLengthExceededError' do msg = "This model's maximum context length is 8192 tokens." response = Struct.new(:status, :body).new(400, %({"error":{"message":"#{msg}"}}))