Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
113 changes: 110 additions & 3 deletions src/openhuman/channels/providers/web.rs
Original file line number Diff line number Diff line change
Expand Up @@ -228,16 +228,123 @@ fn with_provider_detail(summary: &str, err: &str) -> String {
}
}

/// Extract a Retry-After / retry_after seconds hint from a free-form
/// error string. Mirrors the typed [`crate::openhuman::inference::
/// provider::reliable::parse_retry_after_ms`] helper but operates on
/// the already-flattened `String` that reaches the channel-classifier
/// layer.
///
/// Returns `Some(n)` when a non-negative integer or fractional value
/// follows one of the canonical headers; fractional values are
/// rounded up so the user is never told to retry sooner than the
/// upstream actually allows.
fn parse_retry_after_secs_from_str(err: &str) -> Option<u64> {
// Normalise quoted JSON-key wrappers ("retry_after": 30) by
// stripping double quotes before scanning for prefixes
// (CodeRabbit review on #2371). A serialised provider body like
// `{"retry_after": 30}` would otherwise miss every prefix and
// the user would lose the retry hint the provider supplied.
let normalized = err.to_ascii_lowercase().replace('"', "");
for prefix in &[
"retry-after:",
"retry_after:",
"retry-after ",
"retry_after ",
] {
if let Some(pos) = normalized.find(prefix) {
let after = &normalized[pos + prefix.len()..];
let num_str: String = after
.trim()
.chars()
.take_while(|c| c.is_ascii_digit() || *c == '.')
.collect();
if let Ok(secs) = num_str.parse::<f64>() {
if secs.is_finite() && secs >= 0.0 {
return Some(secs.ceil() as u64);
}
}
}
}
None
Comment thread
coderabbitai[bot] marked this conversation as resolved.
}

/// Format the retry-after hint as a short user-friendly suffix
/// (`" Try again in 30 seconds."`). Returns an empty string when no
/// hint is available so callers can `format!("{summary}{hint}")`
/// without branching on `Option`.
fn retry_after_hint(secs: Option<u64>) -> String {
match secs {
Some(0) => " You can retry immediately.".to_string(),
Some(1) => " Try again in 1 second.".to_string(),
Some(n) if n < 90 => format!(" Try again in {n} seconds."),
Some(n) => {
// Round UP — never tell the user to retry sooner than
// the upstream actually allows. 90–119s used to render
// as "about 1 minutes" both because of integer flooring
// and missing singular/plural handling (CodeRabbit
// review on #2371).
let mins = (n / 60) + u64::from(n % 60 != 0);
let unit = if mins == 1 { "minute" } else { "minutes" };
format!(" Try again in about {mins} {unit}.")
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.
None => String::new(),
}
}

/// Detect the SecurityPolicy global hourly action-budget signal
/// emitted by the built-in tools (`web_fetch`, `curl`, `http_request`,
/// `polymarket`, `composio`, etc.) — see `src/openhuman/security/
/// policy.rs::SecurityPolicy::is_rate_limited`.
///
/// We match the canonical English strings those tools emit. This is
/// load-bearing for issue #2364: before this check ran, any string
/// containing "rate limit" was misclassified as a provider 429 and
/// the user saw the generic "You're being rate-limited" copy, which
/// hides that the cap is OpenHuman's own per-hour safety budget,
/// not the upstream LLM provider.
fn is_action_budget_exhausted(err_lower: &str) -> bool {
err_lower.contains("rate limit exceeded: action budget exhausted")
|| err_lower.contains("rate limit exceeded: too many actions in the last hour")
|| err_lower.contains("action blocked: rate limit exceeded")
}

fn classify_inference_error(err: &str) -> (&'static str, String) {
let lower = err.to_lowercase();
if lower.contains("rate limit") || lower.contains("429") {
// Order matters: the SecurityPolicy hourly cap and the
// agent-loop max-iterations error both surface as strings that
// contain "rate limit" / "iteration", so they MUST be checked
// before the generic provider-429 branch — otherwise users see
// a confusing "your AI provider is rate-limiting you" message
// for limits OpenHuman itself enforced (issue #2364).
if is_action_budget_exhausted(&lower) {
(
"action_budget_exceeded",
with_provider_detail(
"You've hit OpenHuman's per-hour action budget — this is a local safety cap, \
not your AI provider. The window decays gradually; you can keep chatting in \
this thread and tool-heavy steps will resume as the budget refills.",
err,
),
)
} else if crate::openhuman::agent::error::is_max_iterations_error(err) {
(
"rate_limited",
"max_iterations",
with_provider_detail(
"You're being rate-limited. Please wait a moment and try again.",
"The agent ran the maximum number of tool steps for one turn without \
finishing. This usually means a tool kept failing (often a rate limit on a \
web fetch). You can retry the same question in this thread once the \
underlying limit clears.",
err,
),
)
} else if lower.contains("rate limit") || lower.contains("429") {
let retry = parse_retry_after_secs_from_str(err);
let summary = format!(
"Your AI provider is rate-limiting requests. This is a transient upstream \
limit, not a thread-level block — you can retry in this thread.{}",
retry_after_hint(retry)
);
("rate_limited", with_provider_detail(summary.as_str(), err))
} else if lower.contains("timeout") || lower.contains("timed out") {
(
"timeout",
Expand Down
145 changes: 145 additions & 0 deletions src/openhuman/channels/providers/web_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,151 @@ fn classify_inference_error_surfaces_provider_config_rejection_actionably() {
}
}

// ── #2364: rate-limit classification + retry-after surfacing ────

#[test]
fn classify_inference_error_distinguishes_action_budget_from_provider_429() {
// SecurityPolicy hourly cap (web_fetch / curl / http_request emit
// these strings). Before #2364 these were misclassified as a
// provider 429 and the user saw the "your AI provider is rate-
// limiting you" copy — which is wrong, the limit is OpenHuman's
// own per-hour safety budget.
for raw in [
"Rate limit exceeded: action budget exhausted",
"Rate limit exceeded: too many actions in the last hour",
"Action blocked: rate limit exceeded",
] {
let (category, message) = classify_inference_error(raw);
assert_eq!(
category, "action_budget_exceeded",
"action-budget signal must NOT classify as provider rate_limited: {raw}"
);
assert!(
message.contains("local safety cap"),
"must clarify the limit is OpenHuman-local, not upstream: {message}"
);
assert!(
message.contains("can keep chatting in this thread"),
"must tell the user the thread isn't blocked: {message}"
);
}
}

#[test]
fn classify_inference_error_max_iterations_gets_dedicated_branch() {
// The agent loop's MaxIterationsExceeded variant renders as
// "Agent exceeded maximum tool iterations (N)". Before #2364
// this fell through to the generic `inference` bucket and the
// user saw a vague "something went wrong" copy. Now it gets a
// specific message that says retrying in the same thread is OK.
let raw = "run_chat_task failed client_id=abc thread_id=t1 \
error=Agent exceeded maximum tool iterations (10)";
let (category, message) = classify_inference_error(raw);
assert_eq!(category, "max_iterations");
assert!(
message.contains("maximum number of tool steps"),
"must explain the cap: {message}"
);
assert!(
message.contains("retry the same question in this thread"),
"must reassure same-thread recovery: {message}"
);
}

#[test]
fn classify_inference_error_rate_limited_surfaces_retry_after_seconds() {
let raw = "openrouter API error (429 Too Many Requests): Retry-After: 30";
let (category, message) = classify_inference_error(raw);
assert_eq!(category, "rate_limited");
assert!(
message.contains("Try again in 30 seconds"),
"must surface the parsed retry-after window: {message}"
);
assert!(
message.contains("retry in this thread"),
"must clarify the thread isn't blocked: {message}"
);
}

#[test]
fn classify_inference_error_rate_limited_no_retry_after_omits_hint() {
let raw = "openrouter API error (429 Too Many Requests)";
let (category, message) = classify_inference_error(raw);
assert_eq!(category, "rate_limited");
// Generic copy must still describe the situation accurately.
assert!(message.contains("transient upstream limit"));
// No hallucinated countdown when none was parsed.
assert!(
!message.contains("Try again in"),
"must NOT invent a retry-after when none was parsed: {message}"
);
}

#[test]
fn classify_inference_error_rate_limited_handles_fractional_and_minute_windows() {
// Fractional seconds round up — never tell the user to retry
// sooner than the upstream actually allows.
let (_, message) = classify_inference_error("429 Too Many Requests: retry_after: 2.4");
assert!(
message.contains("Try again in 3 seconds"),
"fractional 2.4 must round up to 3: {message}"
);

// Long windows switch to a "minutes" rendering at the 90s
// threshold so the user gets a less precise but more readable
// hint.
let (_, message) = classify_inference_error("429 Too Many Requests: Retry-After: 180");
assert!(
message.contains("about 3 minutes"),
"180s must render as minutes: {message}"
);
}

#[test]
fn classify_inference_error_rate_limited_minute_window_uses_singular_and_rounds_up() {
// CodeRabbit on #2371: the 90–119s band used to render
// "about 1 minutes" (floor + missing plural handling). Round
// up + singular/plural now produces "about 2 minutes" for 90s
// (since 90s ceils to 2 minutes) and "about 2 minutes" for
// 119s (ditto). 60s lands in the seconds band; 61s is the
// smallest minute-band input but still <90 so seconds; 90s is
// the first true minute-band input.
let (_, m_90) = classify_inference_error("429 Too Many Requests: Retry-After: 90");
assert!(
m_90.contains("about 2 minutes"),
"90s must round up to 2 minutes (not floor to 1): {m_90}"
);
let (_, m_119) = classify_inference_error("429 Too Many Requests: Retry-After: 119");
assert!(
m_119.contains("about 2 minutes"),
"119s must round up to 2 minutes: {m_119}"
);
// Exactly 60-multiple inputs above the 90s threshold render as
// exact minutes with no round-up bump.
let (_, m_120) = classify_inference_error("429 Too Many Requests: Retry-After: 120");
assert!(
m_120.contains("about 2 minutes"),
"exact 120s must stay as 2 minutes: {m_120}"
);
}

#[test]
fn classify_inference_error_rate_limited_parses_quoted_json_retry_after() {
// CodeRabbit on #2371: a serialised provider body like
// {"retry_after": 30} would previously miss every prefix
// because the quote stopped `lower.find("retry_after:")` from
// matching. The parser now strips quotes so the JSON-key shape
// resolves the same as the unquoted header shape.
let (category, message) = classify_inference_error(
r#"openrouter API error (429 Too Many Requests): {"retry_after": 30, "code": "rate_limited"}"#,
);
assert_eq!(category, "rate_limited");
assert!(
message.contains("Try again in 30 seconds"),
"quoted JSON retry_after must be parsed: {message}"
);
}

#[test]
fn generic_error_copy_is_sanitized_and_has_discord_report_action() {
let message = generic_inference_error_user_message();
Expand Down
Loading