tinyhumansai · graycyrus · May 21, 2026 · May 20, 2026 · May 20, 2026
@@ -228,16 +228,112 @@ fn with_provider_detail(summary: &str, err: &str) -> String {
     }
 }
 
+/// Extract a Retry-After / retry_after seconds hint from a free-form
+/// error string. Mirrors the typed [`crate::openhuman::inference::
+/// provider::reliable::parse_retry_after_ms`] helper but operates on
+/// the already-flattened `String` that reaches the channel-classifier
+/// layer.
+///
+/// Returns `Some(n)` when a non-negative integer or fractional value
+/// follows one of the canonical headers; fractional values are
+/// rounded up so the user is never told to retry sooner than the
+/// upstream actually allows.
+fn parse_retry_after_secs_from_str(err: &str) -> Option<u64> {
+    let lower = err.to_ascii_lowercase();
+    for prefix in &[
+        "retry-after:",
+        "retry_after:",
+        "retry-after ",
+        "retry_after ",
+    ] {
+        if let Some(pos) = lower.find(prefix) {
+            let after = &err[pos + prefix.len()..];
+            let num_str: String = after
+                .trim()
+                .chars()
+                .take_while(|c| c.is_ascii_digit() || *c == '.')
+                .collect();
+            if let Ok(secs) = num_str.parse::<f64>() {
+                if secs.is_finite() && secs >= 0.0 {
+                    return Some(secs.ceil() as u64);
+                }
+            }
+        }
+    }
+    None
+}
+
+/// Format the retry-after hint as a short user-friendly suffix
+/// (`" Try again in 30 seconds."`). Returns an empty string when no
+/// hint is available so callers can `format!("{summary}{hint}")`
+/// without branching on `Option`.
+fn retry_after_hint(secs: Option<u64>) -> String {
+    match secs {
+        Some(0) => " You can retry immediately.".to_string(),
+        Some(1) => " Try again in 1 second.".to_string(),
+        Some(n) if n < 90 => format!(" Try again in {n} seconds."),
+        Some(n) => {
+            let mins = n / 60;
+            format!(" Try again in about {mins} minutes.")
+        }
+        None => String::new(),
+    }
+}
+
+/// Detect the SecurityPolicy global hourly action-budget signal
+/// emitted by the built-in tools (`web_fetch`, `curl`, `http_request`,
+/// `polymarket`, `composio`, etc.) — see `src/openhuman/security/
+/// policy.rs::SecurityPolicy::is_rate_limited`.
+///
+/// We match the canonical English strings those tools emit. This is
+/// load-bearing for issue #2364: before this check ran, any string
+/// containing "rate limit" was misclassified as a provider 429 and
+/// the user saw the generic "You're being rate-limited" copy, which
+/// hides that the cap is OpenHuman's own per-hour safety budget,
+/// not the upstream LLM provider.
+fn is_action_budget_exhausted(err_lower: &str) -> bool {
+    err_lower.contains("rate limit exceeded: action budget exhausted")
+        || err_lower.contains("rate limit exceeded: too many actions in the last hour")
+        || err_lower.contains("action blocked: rate limit exceeded")
+}
+
 fn classify_inference_error(err: &str) -> (&'static str, String) {
     let lower = err.to_lowercase();
-    if lower.contains("rate limit") || lower.contains("429") {
+    // Order matters: the SecurityPolicy hourly cap and the
+    // agent-loop max-iterations error both surface as strings that
+    // contain "rate limit" / "iteration", so they MUST be checked
+    // before the generic provider-429 branch — otherwise users see
+    // a confusing "your AI provider is rate-limiting you" message
+    // for limits OpenHuman itself enforced (issue #2364).
+    if is_action_budget_exhausted(&lower) {
+        (
+            "action_budget_exceeded",
+            with_provider_detail(
+                "You've hit OpenHuman's per-hour action budget — this is a local safety cap, \
+                 not your AI provider. The window decays gradually; you can keep chatting in \
+                 this thread and tool-heavy steps will resume as the budget refills.",
+                err,
+            ),
+        )
+    } else if crate::openhuman::agent::error::is_max_iterations_error(err) {
         (
-            "rate_limited",
+            "max_iterations",
             with_provider_detail(
-                "You're being rate-limited. Please wait a moment and try again.",
+                "The agent ran the maximum number of tool steps for one turn without \
+                 finishing. This usually means a tool kept failing (often a rate limit on a \
+                 web fetch). You can retry the same question in this thread once the \
+                 underlying limit clears.",
                 err,
             ),
         )
+    } else if lower.contains("rate limit") || lower.contains("429") {
+        let retry = parse_retry_after_secs_from_str(err);
+        let summary = format!(
+            "Your AI provider is rate-limiting requests. This is a transient upstream \
+             limit, not a thread-level block — you can retry in this thread.{}",
+            retry_after_hint(retry)
+        );
+        ("rate_limited", with_provider_detail(summary.as_str(), err))
     } else if lower.contains("timeout") || lower.contains("timed out") {
         (
             "timeout",

@@ -207,6 +207,106 @@ fn classify_inference_error_surfaces_provider_config_rejection_actionably() {
     }
 }
 
+// ── #2364: rate-limit classification + retry-after surfacing ────
+
+#[test]
+fn classify_inference_error_distinguishes_action_budget_from_provider_429() {
+    // SecurityPolicy hourly cap (web_fetch / curl / http_request emit
+    // these strings). Before #2364 these were misclassified as a
+    // provider 429 and the user saw the "your AI provider is rate-
+    // limiting you" copy — which is wrong, the limit is OpenHuman's
+    // own per-hour safety budget.
+    for raw in [
+        "Rate limit exceeded: action budget exhausted",
+        "Rate limit exceeded: too many actions in the last hour",
+        "Action blocked: rate limit exceeded",
+    ] {
+        let (category, message) = classify_inference_error(raw);
+        assert_eq!(
+            category, "action_budget_exceeded",
+            "action-budget signal must NOT classify as provider rate_limited: {raw}"
+        );
+        assert!(
+            message.contains("local safety cap"),
+            "must clarify the limit is OpenHuman-local, not upstream: {message}"
+        );
+        assert!(
+            message.contains("can keep chatting in this thread"),
+            "must tell the user the thread isn't blocked: {message}"
+        );
+    }
+}
+
+#[test]
+fn classify_inference_error_max_iterations_gets_dedicated_branch() {
+    // The agent loop's MaxIterationsExceeded variant renders as
+    // "Agent exceeded maximum tool iterations (N)". Before #2364
+    // this fell through to the generic `inference` bucket and the
+    // user saw a vague "something went wrong" copy. Now it gets a
+    // specific message that says retrying in the same thread is OK.
+    let raw = "run_chat_task failed client_id=abc thread_id=t1 \
+               error=Agent exceeded maximum tool iterations (10)";
+    let (category, message) = classify_inference_error(raw);
+    assert_eq!(category, "max_iterations");
+    assert!(
+        message.contains("maximum number of tool steps"),
+        "must explain the cap: {message}"
+    );
+    assert!(
+        message.contains("retry the same question in this thread"),
+        "must reassure same-thread recovery: {message}"
+    );
+}
+
+#[test]
+fn classify_inference_error_rate_limited_surfaces_retry_after_seconds() {
+    let raw = "openrouter API error (429 Too Many Requests): Retry-After: 30";
+    let (category, message) = classify_inference_error(raw);
+    assert_eq!(category, "rate_limited");
+    assert!(
+        message.contains("Try again in 30 seconds"),
+        "must surface the parsed retry-after window: {message}"
+    );
+    assert!(
+        message.contains("retry in this thread"),
+        "must clarify the thread isn't blocked: {message}"
+    );
+}
+
+#[test]
+fn classify_inference_error_rate_limited_no_retry_after_omits_hint() {
+    let raw = "openrouter API error (429 Too Many Requests)";
+    let (category, message) = classify_inference_error(raw);
+    assert_eq!(category, "rate_limited");
+    // Generic copy must still describe the situation accurately.
+    assert!(message.contains("transient upstream limit"));
+    // No hallucinated countdown when none was parsed.
+    assert!(
+        !message.contains("Try again in"),
+        "must NOT invent a retry-after when none was parsed: {message}"
+    );
+}
+
+#[test]
+fn classify_inference_error_rate_limited_handles_fractional_and_minute_windows() {
+    // Fractional seconds round up — never tell the user to retry
+    // sooner than the upstream actually allows.
+    let (_, message) = classify_inference_error("429 Too Many Requests: retry_after: 2.4");
+    assert!(
+        message.contains("Try again in 3 seconds"),
+        "fractional 2.4 must round up to 3: {message}"
+    );
+
+    // Long windows switch to a "minutes" rendering at the 90s
+    // threshold so the user gets a less precise but more readable
+    // hint.
+    let (_, message) = classify_inference_error("429 Too Many Requests: Retry-After: 180");
+    assert!(
+        message.contains("about 3 minutes"),
+        "180s must render as minutes: {message}"
+    );
+}
+
 #[test]
 fn generic_error_copy_is_sanitized_and_has_discord_report_action() {
     let message = generic_inference_error_user_message();