Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions lib/ruby_llm/error.rb
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,13 @@ class << self
/reduce the length of messages/i
].freeze

RATE_LIMIT_PATTERNS = [
/rate limit/i,
/per minute/i,
/per hour/i,
/per day/i
].freeze

def parse_error(provider:, response:) # rubocop:disable Metrics/PerceivedComplexity
message = provider&.parse_error(response)

Expand Down Expand Up @@ -102,6 +109,7 @@ def parse_error(provider:, response:) # rubocop:disable Metrics/PerceivedComplex

def context_length_exceeded?(message)
return false if message.to_s.empty?
return false if RATE_LIMIT_PATTERNS.any? { |pattern| message.match?(pattern) }

CONTEXT_LENGTH_PATTERNS.any? { |pattern| message.match?(pattern) }
end
Expand Down
12 changes: 12 additions & 0 deletions spec/ruby_llm/error_middleware_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,18 @@
end.to raise_error(RubyLLM::RateLimitError)
end

it 'keeps token-per-minute rate limit 429 errors as RateLimitError, not ContextLengthExceededError' do
# Anthropic returns 429 with a message mentioning "input tokens per minute", which matches
# the /input[_\s-]?token/i context length pattern — it should be classified as a rate limit.
msg = "This request would exceed your organization's rate limit of 30,000 input tokens per minute"
response = Struct.new(:status, :body).new(429, %({"error":{"message":"#{msg}"}}))
provider = instance_double(RubyLLM::Provider, parse_error: msg)

expect do
described_class.parse_error(provider: provider, response: response)
end.to raise_error(RubyLLM::RateLimitError)
end

it 'maps context-length-like 400 errors to ContextLengthExceededError' do
msg = "This model's maximum context length is 8192 tokens."
response = Struct.new(:status, :body).new(400, %({"error":{"message":"#{msg}"}}))
Expand Down
Loading