Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 99 additions & 3 deletions src/openhuman/channels/providers/web.rs
Original file line number Diff line number Diff line change
Expand Up @@ -228,16 +228,112 @@ fn with_provider_detail(summary: &str, err: &str) -> String {
}
}

/// Extract a Retry-After / retry_after seconds hint from a free-form
/// error string. Mirrors the typed [`crate::openhuman::inference::
/// provider::reliable::parse_retry_after_ms`] helper but operates on
/// the already-flattened `String` that reaches the channel-classifier
/// layer.
///
/// Returns `Some(n)` when a non-negative integer or fractional value
/// follows one of the canonical headers; fractional values are
/// rounded up so the user is never told to retry sooner than the
/// upstream actually allows.
fn parse_retry_after_secs_from_str(err: &str) -> Option<u64> {
let lower = err.to_ascii_lowercase();
for prefix in &[
"retry-after:",
"retry_after:",
"retry-after ",
"retry_after ",
] {
if let Some(pos) = lower.find(prefix) {
let after = &err[pos + prefix.len()..];
let num_str: String = after
.trim()
.chars()
.take_while(|c| c.is_ascii_digit() || *c == '.')
.collect();
if let Ok(secs) = num_str.parse::<f64>() {
if secs.is_finite() && secs >= 0.0 {
return Some(secs.ceil() as u64);
}
}
}
}
None
Comment thread
coderabbitai[bot] marked this conversation as resolved.
}

/// Format the retry-after hint as a short user-friendly suffix
/// (`" Try again in 30 seconds."`). Returns an empty string when no
/// hint is available so callers can `format!("{summary}{hint}")`
/// without branching on `Option`.
fn retry_after_hint(secs: Option<u64>) -> String {
match secs {
Some(0) => " You can retry immediately.".to_string(),
Some(1) => " Try again in 1 second.".to_string(),
Some(n) if n < 90 => format!(" Try again in {n} seconds."),
Some(n) => {
let mins = n / 60;
format!(" Try again in about {mins} minutes.")
}
Comment thread
coderabbitai[bot] marked this conversation as resolved.
None => String::new(),
}
}

/// Detect the SecurityPolicy global hourly action-budget signal
/// emitted by the built-in tools (`web_fetch`, `curl`, `http_request`,
/// `polymarket`, `composio`, etc.) — see `src/openhuman/security/
/// policy.rs::SecurityPolicy::is_rate_limited`.
///
/// We match the canonical English strings those tools emit. This is
/// load-bearing for issue #2364: before this check ran, any string
/// containing "rate limit" was misclassified as a provider 429 and
/// the user saw the generic "You're being rate-limited" copy, which
/// hides that the cap is OpenHuman's own per-hour safety budget,
/// not the upstream LLM provider.
fn is_action_budget_exhausted(err_lower: &str) -> bool {
err_lower.contains("rate limit exceeded: action budget exhausted")
|| err_lower.contains("rate limit exceeded: too many actions in the last hour")
|| err_lower.contains("action blocked: rate limit exceeded")
}

fn classify_inference_error(err: &str) -> (&'static str, String) {
let lower = err.to_lowercase();
if lower.contains("rate limit") || lower.contains("429") {
// Order matters: the SecurityPolicy hourly cap and the
// agent-loop max-iterations error both surface as strings that
// contain "rate limit" / "iteration", so they MUST be checked
// before the generic provider-429 branch — otherwise users see
// a confusing "your AI provider is rate-limiting you" message
// for limits OpenHuman itself enforced (issue #2364).
if is_action_budget_exhausted(&lower) {
(
"action_budget_exceeded",
with_provider_detail(
"You've hit OpenHuman's per-hour action budget — this is a local safety cap, \
not your AI provider. The window decays gradually; you can keep chatting in \
this thread and tool-heavy steps will resume as the budget refills.",
err,
),
)
} else if crate::openhuman::agent::error::is_max_iterations_error(err) {
(
"rate_limited",
"max_iterations",
with_provider_detail(
"You're being rate-limited. Please wait a moment and try again.",
"The agent ran the maximum number of tool steps for one turn without \
finishing. This usually means a tool kept failing (often a rate limit on a \
web fetch). You can retry the same question in this thread once the \
underlying limit clears.",
err,
),
)
} else if lower.contains("rate limit") || lower.contains("429") {
let retry = parse_retry_after_secs_from_str(err);
let summary = format!(
"Your AI provider is rate-limiting requests. This is a transient upstream \
limit, not a thread-level block — you can retry in this thread.{}",
retry_after_hint(retry)
);
("rate_limited", with_provider_detail(summary.as_str(), err))
} else if lower.contains("timeout") || lower.contains("timed out") {
(
"timeout",
Expand Down
100 changes: 100 additions & 0 deletions src/openhuman/channels/providers/web_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,106 @@ fn classify_inference_error_surfaces_provider_config_rejection_actionably() {
}
}

// ── #2364: rate-limit classification + retry-after surfacing ────

#[test]
fn classify_inference_error_distinguishes_action_budget_from_provider_429() {
// SecurityPolicy hourly cap (web_fetch / curl / http_request emit
// these strings). Before #2364 these were misclassified as a
// provider 429 and the user saw the "your AI provider is rate-
// limiting you" copy — which is wrong, the limit is OpenHuman's
// own per-hour safety budget.
for raw in [
"Rate limit exceeded: action budget exhausted",
"Rate limit exceeded: too many actions in the last hour",
"Action blocked: rate limit exceeded",
] {
let (category, message) = classify_inference_error(raw);
assert_eq!(
category, "action_budget_exceeded",
"action-budget signal must NOT classify as provider rate_limited: {raw}"
);
assert!(
message.contains("local safety cap"),
"must clarify the limit is OpenHuman-local, not upstream: {message}"
);
assert!(
message.contains("can keep chatting in this thread"),
"must tell the user the thread isn't blocked: {message}"
);
}
}

#[test]
fn classify_inference_error_max_iterations_gets_dedicated_branch() {
// The agent loop's MaxIterationsExceeded variant renders as
// "Agent exceeded maximum tool iterations (N)". Before #2364
// this fell through to the generic `inference` bucket and the
// user saw a vague "something went wrong" copy. Now it gets a
// specific message that says retrying in the same thread is OK.
let raw = "run_chat_task failed client_id=abc thread_id=t1 \
error=Agent exceeded maximum tool iterations (10)";
let (category, message) = classify_inference_error(raw);
assert_eq!(category, "max_iterations");
assert!(
message.contains("maximum number of tool steps"),
"must explain the cap: {message}"
);
assert!(
message.contains("retry the same question in this thread"),
"must reassure same-thread recovery: {message}"
);
}

#[test]
fn classify_inference_error_rate_limited_surfaces_retry_after_seconds() {
let raw = "openrouter API error (429 Too Many Requests): Retry-After: 30";
let (category, message) = classify_inference_error(raw);
assert_eq!(category, "rate_limited");
assert!(
message.contains("Try again in 30 seconds"),
"must surface the parsed retry-after window: {message}"
);
assert!(
message.contains("retry in this thread"),
"must clarify the thread isn't blocked: {message}"
);
}

#[test]
fn classify_inference_error_rate_limited_no_retry_after_omits_hint() {
let raw = "openrouter API error (429 Too Many Requests)";
let (category, message) = classify_inference_error(raw);
assert_eq!(category, "rate_limited");
// Generic copy must still describe the situation accurately.
assert!(message.contains("transient upstream limit"));
// No hallucinated countdown when none was parsed.
assert!(
!message.contains("Try again in"),
"must NOT invent a retry-after when none was parsed: {message}"
);
}

#[test]
fn classify_inference_error_rate_limited_handles_fractional_and_minute_windows() {
// Fractional seconds round up — never tell the user to retry
// sooner than the upstream actually allows.
let (_, message) = classify_inference_error("429 Too Many Requests: retry_after: 2.4");
assert!(
message.contains("Try again in 3 seconds"),
"fractional 2.4 must round up to 3: {message}"
);

// Long windows switch to a "minutes" rendering at the 90s
// threshold so the user gets a less precise but more readable
// hint.
let (_, message) = classify_inference_error("429 Too Many Requests: Retry-After: 180");
assert!(
message.contains("about 3 minutes"),
"180s must render as minutes: {message}"
);
}

#[test]
fn generic_error_copy_is_sanitized_and_has_discord_report_action() {
let message = generic_inference_error_user_message();
Expand Down
Loading