Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 69 additions & 5 deletions src/openhuman/cron/scheduler.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use crate::core::event_bus::{publish_global, DomainEvent};
use crate::openhuman::agent::error::AgentError;
use crate::openhuman::config::Config;
use crate::openhuman::cron::{
due_jobs, next_run_for_schedule, record_last_run, record_run, remove_job, reschedule_after_run,
Expand All @@ -17,6 +18,63 @@ const MIN_POLL_SECONDS: u64 = 5;
const SHELL_JOB_TIMEOUT_SECS: u64 = 120;
const AGENT_JOB_USER_FAILURE_MESSAGE: &str = "Something went wrong. Please try again.\nThis error has been reported. You can also report it on Discord.\n<openhuman-link path=\"community/discord\">Report on Discord</openhuman-link>";

/// Map a typed [`AgentError`] to a canned, user-facing message for cron-job
/// failure notifications.
///
/// **Contract (load-bearing — see `scheduler_tests::classifier_does_not_leak_error_content`):**
/// this function returns only static `&'static str` constants. It MUST NEVER
/// interpolate any field of `err` into its output (no `format!`, no
/// `err.to_string()`, no `Debug`/`Display`). `last_agent_error` carries stack
/// traces, provider URLs with query tokens, partial response bodies and
/// occasionally user input — routing any of that into a user-visible
/// notification would be a data-exposure regression.
///
/// Variants for which we have no concrete user action (e.g.
/// [`AgentError::ToolExecutionError`], [`AgentError::Other`]) fall back to
/// [`AGENT_JOB_USER_FAILURE_MESSAGE`], preserving today's behaviour.
fn agent_error_to_user_message(err: &AgentError) -> &'static str {
match err {
AgentError::ProviderError { retryable: true, .. } => {
"The model provider is temporarily unavailable. The next run will retry automatically."
}
AgentError::ProviderError { retryable: false, .. } => {
"The model provider rejected the request. Check your provider credentials in Settings \u{2192} AI \u{2192} LLM."
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[minor] "Start a new session" is solid advice in an interactive context, but cron jobs start a fresh session on every run automatically — a user reading this notification can't really act on it. Consider rewording to something like:

"The conversation grew too long for the model's context window. The next run will start fresh. If this recurs, pick a model with a larger context window in Settings → AI → LLM."

This also gives the user a concrete Settings path, matching the other messages.

}
AgentError::ContextLimitExceeded { .. } => {
"The conversation grew too long for the model. Start a new session or pick a model with a larger context window."
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[minor] "daily cost budget" assumes the budget window is always daily. The variant fields are spent_microdollars / budget_microdollars with no period attached. If the budget window is ever configurable (or already isn't daily), this message will mislead. A safer phrasing: "You've reached the cost budget for this agent" (drop "daily").

}
AgentError::CostBudgetExceeded { .. } => {
"You've reached the daily cost budget for this agent. Raise it in Settings \u{2192} Billing or wait for the next budget window."
}
AgentError::MaxIterationsExceeded { .. } => {
"The agent stopped after too many tool iterations. Raise the iteration cap in Settings \u{2192} AI \u{2192} LLM or simplify the task."
}
AgentError::CompactionFailed { .. } => {
"Automatic history compaction failed. The next run will start with a fresh context."
}
AgentError::PermissionDenied { .. } => {
"The agent needs a tool that isn't allowed on this channel. Adjust the permissions in Settings."
}
// ToolExecutionError and Other have no actionable canned message —
// their error bodies are too freeform to summarise safely without
// interpolating contents. Fall back to the generic copy.
AgentError::ToolExecutionError { .. } | AgentError::Other(_) => {
AGENT_JOB_USER_FAILURE_MESSAGE
}
}
}

/// Classify an [`anyhow::Error`] returned by the agent runtime into a canned
/// user-facing message. If the underlying error is a typed [`AgentError`],
/// route through [`agent_error_to_user_message`]; otherwise fall back to the
/// generic message.
fn classify_agent_anyhow_for_user(err: &anyhow::Error) -> &'static str {
match err.downcast_ref::<AgentError>() {
Some(agent_err) => agent_error_to_user_message(agent_err),
None => AGENT_JOB_USER_FAILURE_MESSAGE,
}
}

fn agent_session_target_tag(target: &SessionTarget) -> &'static str {
match target {
SessionTarget::Main => "main",
Expand Down Expand Up @@ -328,11 +386,17 @@ async fn run_agent_job(config: &Config, job: &CronJob) -> (bool, String, Option<
},
None,
),
Err(e) => (
false,
AGENT_JOB_USER_FAILURE_MESSAGE.to_string(),
Some(e.to_string()),
),
Err(e) => {
// Classify into a canned user-facing message *before* logging
// anything that touches `e`. The classifier output is a
// `&'static str` — it never contains any data derived from `e`.
// The raw error is preserved as `last_agent_error` for the
// observability pipeline (`report_error`), where stack traces
// and provider URLs are appropriate; it must NOT reach the
// user-visible notification body.
let user_message = classify_agent_anyhow_for_user(&e);
(false, user_message.to_string(), Some(e.to_string()))
}
}
}

Expand Down
288 changes: 288 additions & 0 deletions src/openhuman/cron/scheduler_tests.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use super::*;
use crate::openhuman::agent::error::AgentError;
use crate::openhuman::config::Config;
use crate::openhuman::cron::{self, ActiveHours, DeliveryConfig};
use crate::openhuman::security::SecurityPolicy;
Expand Down Expand Up @@ -573,3 +574,290 @@ async fn deliver_if_configured_proactive_mode_succeeds() {
};
assert!(deliver_if_configured(&config, &job, "hello").await.is_ok());
}

// ──────────────────────────────────────────────────────────────────────
// Agent-error classifier (Bug B of #2279)
//
// `agent_error_to_user_message` must:
// 1. Return the expected canned string for each handled variant.
// 2. Fall back to `AGENT_JOB_USER_FAILURE_MESSAGE` for residual variants.
// 3. NEVER interpolate any field of the input error into its output.
//
// (3) is the airtight data-exposure guard. `last_agent_error` carries
// provider URLs with query tokens, stack traces, partial response bodies and
// occasionally user input. The leak-canary fuzz below proves none of that
// can reach the user-visible notification.
// ──────────────────────────────────────────────────────────────────────

#[test]
fn agent_error_to_user_message_classifies_provider_retryable() {
let err = AgentError::ProviderError {
message: "boom".into(),
retryable: true,
};
let msg = agent_error_to_user_message(&err);
assert!(msg.contains("temporarily unavailable"));
assert!(msg.contains("retry"));
assert_ne!(msg, AGENT_JOB_USER_FAILURE_MESSAGE);
}

#[test]
fn agent_error_to_user_message_classifies_provider_non_retryable() {
let err = AgentError::ProviderError {
message: "invalid api key".into(),
retryable: false,
};
let msg = agent_error_to_user_message(&err);
assert!(msg.contains("provider"));
assert!(msg.contains("credentials"));
assert!(msg.contains("Settings"));
assert_ne!(msg, AGENT_JOB_USER_FAILURE_MESSAGE);
}

#[test]
fn agent_error_to_user_message_classifies_context_limit() {
let err = AgentError::ContextLimitExceeded {
utilization_pct: 98,
};
let msg = agent_error_to_user_message(&err);
assert!(msg.contains("conversation grew too long"));
assert!(msg.contains("context window"));
assert_ne!(msg, AGENT_JOB_USER_FAILURE_MESSAGE);
}

#[test]
fn agent_error_to_user_message_classifies_cost_budget() {
let err = AgentError::CostBudgetExceeded {
spent_microdollars: 5_000_000,
budget_microdollars: 1_000_000,
};
let msg = agent_error_to_user_message(&err);
assert!(msg.contains("cost budget"));
assert!(msg.contains("Settings"));
assert_ne!(msg, AGENT_JOB_USER_FAILURE_MESSAGE);
}

#[test]
fn agent_error_to_user_message_classifies_max_iterations() {
let err = AgentError::MaxIterationsExceeded { max: 10 };
let msg = agent_error_to_user_message(&err);
assert!(msg.contains("tool iterations"));
assert!(msg.contains("Settings"));
assert_ne!(msg, AGENT_JOB_USER_FAILURE_MESSAGE);
}

#[test]
fn agent_error_to_user_message_classifies_compaction_failed() {
let err = AgentError::CompactionFailed {
message: "summary failed".into(),
consecutive_failures: 3,
};
let msg = agent_error_to_user_message(&err);
assert!(msg.contains("compaction"));
assert!(msg.contains("fresh context"));
assert_ne!(msg, AGENT_JOB_USER_FAILURE_MESSAGE);
}

#[test]
fn agent_error_to_user_message_classifies_permission_denied() {
let err = AgentError::PermissionDenied {
tool_name: "shell".into(),
required_level: "Execute".into(),
channel_max_level: "ReadOnly".into(),
};
let msg = agent_error_to_user_message(&err);
assert!(msg.contains("tool"));
assert!(msg.contains("channel"));
assert!(msg.contains("Settings"));
assert_ne!(msg, AGENT_JOB_USER_FAILURE_MESSAGE);
}

#[test]
fn agent_error_to_user_message_falls_back_on_tool_execution_error() {
// ToolExecutionError has no actionable canned message — the failure
// shape is too freeform. Falls back to the residual constant.
let err = AgentError::ToolExecutionError {
tool_name: "shell".into(),
message: "denied".into(),
};
let msg = agent_error_to_user_message(&err);
assert_eq!(msg, AGENT_JOB_USER_FAILURE_MESSAGE);
}

#[test]
fn agent_error_to_user_message_falls_back_on_other() {
let err = AgentError::Other(anyhow::anyhow!("untyped failure"));
let msg = agent_error_to_user_message(&err);
assert_eq!(msg, AGENT_JOB_USER_FAILURE_MESSAGE);
}

#[test]
fn agent_error_to_user_message_canned_strings_are_short() {
// Canned strings must stay ≤120 chars so they survive the 512-char
// truncation in `push_cron_alert` without losing meaning, and so they
// render cleanly in the notifications drawer. The fallback constant
// is intentionally longer (multi-line w/ Discord link) and is excluded.
let variants: Vec<AgentError> = vec![
AgentError::ProviderError {
message: "x".into(),
retryable: true,
},
AgentError::ProviderError {
message: "x".into(),
retryable: false,
},
AgentError::ContextLimitExceeded { utilization_pct: 0 },
AgentError::CostBudgetExceeded {
spent_microdollars: 0,
budget_microdollars: 0,
},
AgentError::MaxIterationsExceeded { max: 0 },
AgentError::CompactionFailed {
message: "x".into(),
consecutive_failures: 0,
},
AgentError::PermissionDenied {
tool_name: "x".into(),
required_level: "x".into(),
channel_max_level: "x".into(),
},
];
for v in &variants {
let msg = agent_error_to_user_message(v);
if msg == AGENT_JOB_USER_FAILURE_MESSAGE {
// Variant routed to the residual — length not enforced.
continue;
}
assert!(
msg.chars().count() <= 120,
"Canned message too long ({} chars) for variant {:?}: {msg:?}",
msg.chars().count(),
std::mem::discriminant(v),
);
}
}

#[test]
fn classify_agent_anyhow_routes_typed_errors() {
let typed = anyhow::Error::from(AgentError::MaxIterationsExceeded { max: 4 });
let msg = classify_agent_anyhow_for_user(&typed);
assert!(msg.contains("tool iterations"));
assert_ne!(msg, AGENT_JOB_USER_FAILURE_MESSAGE);
}

#[test]
fn classify_agent_anyhow_falls_back_on_untyped_error() {
// Plain anyhow error with no downcast target → residual fallback.
let untyped = anyhow::anyhow!("transport blew up");
let msg = classify_agent_anyhow_for_user(&untyped);
assert_eq!(msg, AGENT_JOB_USER_FAILURE_MESSAGE);
}

#[test]
fn classifier_does_not_leak_error_content() {
// Airtight guard: populate every internal `String` / inner-error field
// of every variant with a distinct `LEAK_CANARY_<n>_<hex>` marker, then
// assert that NONE of those markers appears in the classifier's output.
// This is the mechanical proof that the classifier output never depends
// on the input error's contents.
let canaries = [
"LEAK_CANARY_0_deadbeef",
"LEAK_CANARY_1_cafebabe",
"LEAK_CANARY_2_0badf00d",
"LEAK_CANARY_3_feedface",
"LEAK_CANARY_4_8badf00d",
"LEAK_CANARY_5_1ce1ce1c",
"LEAK_CANARY_6_decafbad",
"LEAK_CANARY_7_b16b00b5",
"LEAK_CANARY_8_c001d00d",
"LEAK_CANARY_9_5ca1ab1e",
];

// Variants paired with the canaries injected into each of their fields.
// Every internal `String` / `&str` / nested-error field is populated
// with a distinct marker.
let variants: Vec<AgentError> = vec![
AgentError::ProviderError {
message: canaries[0].into(),
retryable: true,
},
AgentError::ProviderError {
message: canaries[1].into(),
retryable: false,
},
// ContextLimitExceeded has no string fields, but include it so the
// fuzz still exercises every variant uniformly.
AgentError::ContextLimitExceeded {
utilization_pct: 99,
},
AgentError::ToolExecutionError {
tool_name: canaries[2].into(),
message: canaries[3].into(),
},
AgentError::CostBudgetExceeded {
spent_microdollars: 1,
budget_microdollars: 1,
},
AgentError::MaxIterationsExceeded { max: 7 },
AgentError::CompactionFailed {
message: canaries[4].into(),
consecutive_failures: 2,
},
AgentError::PermissionDenied {
tool_name: canaries[5].into(),
required_level: canaries[6].into(),
channel_max_level: canaries[7].into(),
},
// Other(..) wraps an anyhow error built from a canary string — its
// source chain carries marker text that the classifier must NOT
// forward to the user.
AgentError::Other(anyhow::anyhow!("{}", canaries[8]).context(canaries[9].to_string())),
];

for variant in &variants {
let msg_direct = agent_error_to_user_message(variant);

// Also exercise the anyhow wrapper path so we cover both entry
// points the scheduler uses.
// (We rebuild the anyhow Error here rather than reusing `variant`
// because AgentError doesn't implement Clone.)
// The classifier output is `&'static str` so checking `msg_direct`
// covers both paths, but the explicit check guards future changes.

for canary in &canaries {
assert!(
!msg_direct.contains(canary),
"Classifier leaked `{canary}` into user-facing message: {msg_direct:?}",
);
}
}

// Sanity: also verify the fallback constant doesn't accidentally
// contain any canary substring.
for canary in &canaries {
assert!(
!AGENT_JOB_USER_FAILURE_MESSAGE.contains(canary),
"Fallback constant contains canary `{canary}` — test fixture is broken",
);
}
}

#[test]
fn classify_agent_anyhow_does_not_leak_when_downcast_succeeds() {
// Same airtight guard but through the `classify_agent_anyhow_for_user`
// entry point — proves the downcast path is just as safe.
let canary = "LEAK_CANARY_anyhow_8badf00d";
let typed = anyhow::Error::from(AgentError::ProviderError {
message: canary.into(),
retryable: false,
});
let msg = classify_agent_anyhow_for_user(&typed);
assert!(
!msg.contains(canary),
"classify_agent_anyhow_for_user leaked `{canary}`: {msg:?}",
);
// And it should be the canned non-retryable provider message, not the
// residual fallback — confirms the downcast actually fired.
assert_ne!(msg, AGENT_JOB_USER_FAILURE_MESSAGE);
assert!(msg.contains("credentials"));
}
Loading