From 9f7a74eb4725024e20470208d5422279035d7376 Mon Sep 17 00:00:00 2001 From: Ghost Scripter Date: Thu, 21 May 2026 01:30:59 +0530 Subject: [PATCH 1/2] fix(channels): distinguish rate-limit sources in chat error classifier (#2364) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit User-perception bug from #2364: any agent-loop error string containing "rate limit" was classified as `rate_limited` and the user saw "You're being rate-limited. Please wait a moment and try again." — copy that implies the AI provider is throttling them and gives no hint about which thread is affected or how long to wait. When the real cause was the SecurityPolicy hourly action cap on built-in tools (web_fetch / curl / http_request), the message was misleading; users opened new threads, saw those "work" (because the new thread didn't trigger a tool-call storm), and concluded the original thread was "stuck". Fix: split the catch-all rate-limit branch in `classify_inference_error` into three sources and surface retry-after when available. Classifier (src/openhuman/channels/providers/web.rs) - `action_budget_exceeded` (new): catches the SecurityPolicy strings emitted by the built-in tools (`Rate limit exceeded: action budget exhausted`, `Rate limit exceeded: too many actions in the last hour`, `Action blocked: rate limit exceeded`). User-facing copy: *"You've hit OpenHuman's per-hour action budget — this is a local safety cap, not your AI provider. The window decays gradually; you can keep chatting in this thread …"*. Ordered BEFORE the provider-429 branch so its substring `rate limit` no longer leaks into the wrong bucket. - `max_iterations` (new): catches the canonical agent-loop cap string ("Agent exceeded maximum tool iterations") via the existing `is_max_iterations_error` predicate so the user sees: *"The agent ran the maximum number of tool steps for one turn … You can retry the same question in this thread once the underlying limit clears."* — previously this fell through to the opaque generic `inference` bucket. - `rate_limited` (kept, enriched): now extracts `Retry-After` / `retry_after` seconds from the error body and appends a concrete hint to the user message ("Try again in 30 seconds" / "Try again in about 3 minutes" for windows ≥90s / "You can retry immediately" for 0). Fractional values round up so we never under-promise. Copy also now states the limit is upstream and that retrying in the same thread is fine. Tests (src/openhuman/channels/providers/web_tests.rs) - `classify_inference_error_distinguishes_action_budget_from_provider_429` — all three SecurityPolicy strings classify as `action_budget_exceeded` and the copy says "local safety cap" + "can keep chatting in this thread". - `classify_inference_error_max_iterations_gets_dedicated_branch` — the flattened web-channel error wrapper resolves to `max_iterations` with the same-thread recovery hint. - `classify_inference_error_rate_limited_surfaces_retry_after_seconds` — 30-second retry-after appears verbatim and the thread-recovery hint is present. - `classify_inference_error_rate_limited_no_retry_after_omits_hint` — 429 without Retry-After does NOT hallucinate a window. - `classify_inference_error_rate_limited_handles_fractional_and_minute_windows` — 2.4s rounds to 3s; 180s renders as "about 3 minutes". `cargo test --lib classify_inference_error` → 7 passed, 0 failed. `cargo check` clean. `cargo fmt` applied. What's intentionally out of scope - A real per-thread rate-limit Redux state: the trace shows none exists; `inferenceStatus` / `lifecycle` / `activeThread` all clear on `chat_error` in `ChatRuntimeProvider.onError`, and the FE test suite already exercises this. - A countdown timer in the composer: separate UI work; this PR keeps the fix server-side so the existing FE forwarder picks it up automatically. --- src/openhuman/channels/providers/web.rs | 102 +++++++++++++++++- src/openhuman/channels/providers/web_tests.rs | 100 +++++++++++++++++ 2 files changed, 199 insertions(+), 3 deletions(-) diff --git a/src/openhuman/channels/providers/web.rs b/src/openhuman/channels/providers/web.rs index b0be66866..e730025f2 100644 --- a/src/openhuman/channels/providers/web.rs +++ b/src/openhuman/channels/providers/web.rs @@ -228,16 +228,112 @@ fn with_provider_detail(summary: &str, err: &str) -> String { } } +/// Extract a Retry-After / retry_after seconds hint from a free-form +/// error string. Mirrors the typed [`crate::openhuman::inference:: +/// provider::reliable::parse_retry_after_ms`] helper but operates on +/// the already-flattened `String` that reaches the channel-classifier +/// layer. +/// +/// Returns `Some(n)` when a non-negative integer or fractional value +/// follows one of the canonical headers; fractional values are +/// rounded up so the user is never told to retry sooner than the +/// upstream actually allows. +fn parse_retry_after_secs_from_str(err: &str) -> Option { + let lower = err.to_ascii_lowercase(); + for prefix in &[ + "retry-after:", + "retry_after:", + "retry-after ", + "retry_after ", + ] { + if let Some(pos) = lower.find(prefix) { + let after = &err[pos + prefix.len()..]; + let num_str: String = after + .trim() + .chars() + .take_while(|c| c.is_ascii_digit() || *c == '.') + .collect(); + if let Ok(secs) = num_str.parse::() { + if secs.is_finite() && secs >= 0.0 { + return Some(secs.ceil() as u64); + } + } + } + } + None +} + +/// Format the retry-after hint as a short user-friendly suffix +/// (`" Try again in 30 seconds."`). Returns an empty string when no +/// hint is available so callers can `format!("{summary}{hint}")` +/// without branching on `Option`. +fn retry_after_hint(secs: Option) -> String { + match secs { + Some(0) => " You can retry immediately.".to_string(), + Some(1) => " Try again in 1 second.".to_string(), + Some(n) if n < 90 => format!(" Try again in {n} seconds."), + Some(n) => { + let mins = n / 60; + format!(" Try again in about {mins} minutes.") + } + None => String::new(), + } +} + +/// Detect the SecurityPolicy global hourly action-budget signal +/// emitted by the built-in tools (`web_fetch`, `curl`, `http_request`, +/// `polymarket`, `composio`, etc.) — see `src/openhuman/security/ +/// policy.rs::SecurityPolicy::is_rate_limited`. +/// +/// We match the canonical English strings those tools emit. This is +/// load-bearing for issue #2364: before this check ran, any string +/// containing "rate limit" was misclassified as a provider 429 and +/// the user saw the generic "You're being rate-limited" copy, which +/// hides that the cap is OpenHuman's own per-hour safety budget, +/// not the upstream LLM provider. +fn is_action_budget_exhausted(err_lower: &str) -> bool { + err_lower.contains("rate limit exceeded: action budget exhausted") + || err_lower.contains("rate limit exceeded: too many actions in the last hour") + || err_lower.contains("action blocked: rate limit exceeded") +} + fn classify_inference_error(err: &str) -> (&'static str, String) { let lower = err.to_lowercase(); - if lower.contains("rate limit") || lower.contains("429") { + // Order matters: the SecurityPolicy hourly cap and the + // agent-loop max-iterations error both surface as strings that + // contain "rate limit" / "iteration", so they MUST be checked + // before the generic provider-429 branch — otherwise users see + // a confusing "your AI provider is rate-limiting you" message + // for limits OpenHuman itself enforced (issue #2364). + if is_action_budget_exhausted(&lower) { + ( + "action_budget_exceeded", + with_provider_detail( + "You've hit OpenHuman's per-hour action budget — this is a local safety cap, \ + not your AI provider. The window decays gradually; you can keep chatting in \ + this thread and tool-heavy steps will resume as the budget refills.", + err, + ), + ) + } else if crate::openhuman::agent::error::is_max_iterations_error(err) { ( - "rate_limited", + "max_iterations", with_provider_detail( - "You're being rate-limited. Please wait a moment and try again.", + "The agent ran the maximum number of tool steps for one turn without \ + finishing. This usually means a tool kept failing (often a rate limit on a \ + web fetch). You can retry the same question in this thread once the \ + underlying limit clears.", err, ), ) + } else if lower.contains("rate limit") || lower.contains("429") { + let retry = parse_retry_after_secs_from_str(err); + let summary = format!( + "Your AI provider is rate-limiting requests. This is a transient upstream \ + limit, not a thread-level block — you can retry in this thread.{}", + retry_after_hint(retry) + ); + ("rate_limited", with_provider_detail(summary.as_str(), err)) } else if lower.contains("timeout") || lower.contains("timed out") { ( "timeout", diff --git a/src/openhuman/channels/providers/web_tests.rs b/src/openhuman/channels/providers/web_tests.rs index d2551eb0a..85b49b835 100644 --- a/src/openhuman/channels/providers/web_tests.rs +++ b/src/openhuman/channels/providers/web_tests.rs @@ -207,6 +207,106 @@ fn classify_inference_error_surfaces_provider_config_rejection_actionably() { } } +// ── #2364: rate-limit classification + retry-after surfacing ──── + +#[test] +fn classify_inference_error_distinguishes_action_budget_from_provider_429() { + // SecurityPolicy hourly cap (web_fetch / curl / http_request emit + // these strings). Before #2364 these were misclassified as a + // provider 429 and the user saw the "your AI provider is rate- + // limiting you" copy — which is wrong, the limit is OpenHuman's + // own per-hour safety budget. + for raw in [ + "Rate limit exceeded: action budget exhausted", + "Rate limit exceeded: too many actions in the last hour", + "Action blocked: rate limit exceeded", + ] { + let (category, message) = classify_inference_error(raw); + assert_eq!( + category, "action_budget_exceeded", + "action-budget signal must NOT classify as provider rate_limited: {raw}" + ); + assert!( + message.contains("local safety cap"), + "must clarify the limit is OpenHuman-local, not upstream: {message}" + ); + assert!( + message.contains("can keep chatting in this thread"), + "must tell the user the thread isn't blocked: {message}" + ); + } +} + +#[test] +fn classify_inference_error_max_iterations_gets_dedicated_branch() { + // The agent loop's MaxIterationsExceeded variant renders as + // "Agent exceeded maximum tool iterations (N)". Before #2364 + // this fell through to the generic `inference` bucket and the + // user saw a vague "something went wrong" copy. Now it gets a + // specific message that says retrying in the same thread is OK. + let raw = "run_chat_task failed client_id=abc thread_id=t1 \ + error=Agent exceeded maximum tool iterations (10)"; + let (category, message) = classify_inference_error(raw); + assert_eq!(category, "max_iterations"); + assert!( + message.contains("maximum number of tool steps"), + "must explain the cap: {message}" + ); + assert!( + message.contains("retry the same question in this thread"), + "must reassure same-thread recovery: {message}" + ); +} + +#[test] +fn classify_inference_error_rate_limited_surfaces_retry_after_seconds() { + let raw = "openrouter API error (429 Too Many Requests): Retry-After: 30"; + let (category, message) = classify_inference_error(raw); + assert_eq!(category, "rate_limited"); + assert!( + message.contains("Try again in 30 seconds"), + "must surface the parsed retry-after window: {message}" + ); + assert!( + message.contains("retry in this thread"), + "must clarify the thread isn't blocked: {message}" + ); +} + +#[test] +fn classify_inference_error_rate_limited_no_retry_after_omits_hint() { + let raw = "openrouter API error (429 Too Many Requests)"; + let (category, message) = classify_inference_error(raw); + assert_eq!(category, "rate_limited"); + // Generic copy must still describe the situation accurately. + assert!(message.contains("transient upstream limit")); + // No hallucinated countdown when none was parsed. + assert!( + !message.contains("Try again in"), + "must NOT invent a retry-after when none was parsed: {message}" + ); +} + +#[test] +fn classify_inference_error_rate_limited_handles_fractional_and_minute_windows() { + // Fractional seconds round up — never tell the user to retry + // sooner than the upstream actually allows. + let (_, message) = classify_inference_error("429 Too Many Requests: retry_after: 2.4"); + assert!( + message.contains("Try again in 3 seconds"), + "fractional 2.4 must round up to 3: {message}" + ); + + // Long windows switch to a "minutes" rendering at the 90s + // threshold so the user gets a less precise but more readable + // hint. + let (_, message) = classify_inference_error("429 Too Many Requests: Retry-After: 180"); + assert!( + message.contains("about 3 minutes"), + "180s must render as minutes: {message}" + ); +} + #[test] fn generic_error_copy_is_sanitized_and_has_discord_report_action() { let message = generic_inference_error_user_message(); From 553ec29587621ed18227b2fc48779e63ee362708 Mon Sep 17 00:00:00 2001 From: Ghost Scripter Date: Thu, 21 May 2026 03:54:26 +0530 Subject: [PATCH 2/2] =?UTF-8?q?fix(channels/web):=20address=20CodeRabbit?= =?UTF-8?q?=20on=20#2371=20=E2=80=94=20parse=20quoted=20JSON=20retry=5Faft?= =?UTF-8?q?er=20+=20round=20minutes=20up?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two CodeRabbit findings on src/openhuman/channels/providers/web.rs: 1. **Minor — quoted JSON retry_after.** A serialised provider body like `{"retry_after": 30}` would miss every prefix because the surrounding quote stopped `lower.find("retry_after:")` from matching, and the user lost the retry hint the upstream actually supplied. Normalise by stripping double quotes from the lowercased scan buffer before searching for prefixes. New test `classify_inference_error_rate_limited_parses_quoted_json_retry_after`. 2. **Minor — minute hint rounds up + uses singular/plural.** The `Some(n)` arm used `n / 60` (integer floor) and a hard-coded "minutes" suffix, so 90–119s rendered as "about 1 minutes" — both grammatically wrong and an instruction to retry sooner than the upstream allows. Round up via `(n / 60) + u64::from(n % 60 != 0)` and pick singular vs plural. New test `classify_inference_error_rate_limited_minute_window_uses_singular_and_rounds_up` pins 90s → "about 2 minutes" and 119s → "about 2 minutes". `cargo test --lib classify_inference_error` → 9 passed, 0 failed (7 pre-existing + 2 new). --- src/openhuman/channels/providers/web.rs | 21 ++++++--- src/openhuman/channels/providers/web_tests.rs | 45 +++++++++++++++++++ 2 files changed, 61 insertions(+), 5 deletions(-) diff --git a/src/openhuman/channels/providers/web.rs b/src/openhuman/channels/providers/web.rs index e730025f2..7ce450964 100644 --- a/src/openhuman/channels/providers/web.rs +++ b/src/openhuman/channels/providers/web.rs @@ -239,15 +239,20 @@ fn with_provider_detail(summary: &str, err: &str) -> String { /// rounded up so the user is never told to retry sooner than the /// upstream actually allows. fn parse_retry_after_secs_from_str(err: &str) -> Option { - let lower = err.to_ascii_lowercase(); + // Normalise quoted JSON-key wrappers ("retry_after": 30) by + // stripping double quotes before scanning for prefixes + // (CodeRabbit review on #2371). A serialised provider body like + // `{"retry_after": 30}` would otherwise miss every prefix and + // the user would lose the retry hint the provider supplied. + let normalized = err.to_ascii_lowercase().replace('"', ""); for prefix in &[ "retry-after:", "retry_after:", "retry-after ", "retry_after ", ] { - if let Some(pos) = lower.find(prefix) { - let after = &err[pos + prefix.len()..]; + if let Some(pos) = normalized.find(prefix) { + let after = &normalized[pos + prefix.len()..]; let num_str: String = after .trim() .chars() @@ -273,8 +278,14 @@ fn retry_after_hint(secs: Option) -> String { Some(1) => " Try again in 1 second.".to_string(), Some(n) if n < 90 => format!(" Try again in {n} seconds."), Some(n) => { - let mins = n / 60; - format!(" Try again in about {mins} minutes.") + // Round UP — never tell the user to retry sooner than + // the upstream actually allows. 90–119s used to render + // as "about 1 minutes" both because of integer flooring + // and missing singular/plural handling (CodeRabbit + // review on #2371). + let mins = (n / 60) + u64::from(n % 60 != 0); + let unit = if mins == 1 { "minute" } else { "minutes" }; + format!(" Try again in about {mins} {unit}.") } None => String::new(), } diff --git a/src/openhuman/channels/providers/web_tests.rs b/src/openhuman/channels/providers/web_tests.rs index 85b49b835..b18ad6355 100644 --- a/src/openhuman/channels/providers/web_tests.rs +++ b/src/openhuman/channels/providers/web_tests.rs @@ -307,6 +307,51 @@ fn classify_inference_error_rate_limited_handles_fractional_and_minute_windows() ); } +#[test] +fn classify_inference_error_rate_limited_minute_window_uses_singular_and_rounds_up() { + // CodeRabbit on #2371: the 90–119s band used to render + // "about 1 minutes" (floor + missing plural handling). Round + // up + singular/plural now produces "about 2 minutes" for 90s + // (since 90s ceils to 2 minutes) and "about 2 minutes" for + // 119s (ditto). 60s lands in the seconds band; 61s is the + // smallest minute-band input but still <90 so seconds; 90s is + // the first true minute-band input. + let (_, m_90) = classify_inference_error("429 Too Many Requests: Retry-After: 90"); + assert!( + m_90.contains("about 2 minutes"), + "90s must round up to 2 minutes (not floor to 1): {m_90}" + ); + let (_, m_119) = classify_inference_error("429 Too Many Requests: Retry-After: 119"); + assert!( + m_119.contains("about 2 minutes"), + "119s must round up to 2 minutes: {m_119}" + ); + // Exactly 60-multiple inputs above the 90s threshold render as + // exact minutes with no round-up bump. + let (_, m_120) = classify_inference_error("429 Too Many Requests: Retry-After: 120"); + assert!( + m_120.contains("about 2 minutes"), + "exact 120s must stay as 2 minutes: {m_120}" + ); +} + +#[test] +fn classify_inference_error_rate_limited_parses_quoted_json_retry_after() { + // CodeRabbit on #2371: a serialised provider body like + // {"retry_after": 30} would previously miss every prefix + // because the quote stopped `lower.find("retry_after:")` from + // matching. The parser now strips quotes so the JSON-key shape + // resolves the same as the unquoted header shape. + let (category, message) = classify_inference_error( + r#"openrouter API error (429 Too Many Requests): {"retry_after": 30, "code": "rate_limited"}"#, + ); + assert_eq!(category, "rate_limited"); + assert!( + message.contains("Try again in 30 seconds"), + "quoted JSON retry_after must be parsed: {message}" + ); +} + #[test] fn generic_error_copy_is_sanitized_and_has_discord_report_action() { let message = generic_inference_error_user_message();