diff --git a/gitbooks/developing/architecture/agent-harness.md b/gitbooks/developing/architecture/agent-harness.md index 8c3c9711d5..d1b9b6d680 100644 --- a/gitbooks/developing/architecture/agent-harness.md +++ b/gitbooks/developing/architecture/agent-harness.md @@ -92,6 +92,8 @@ loop { Every iteration emits a real-time `AgentProgress` event so the UI can render token-by-token streaming, "calling tool X" status, and per-iteration cost updates. +**One engine, three entry points.** This loop lives in one place — `engine::run_turn_engine` (`harness/engine/`) — and every caller drives it: `Agent::turn` (web/desktop chat), `run_tool_call_loop` (the `agent.run_turn` bus handler for other channels + triage), and `run_subagent` (spawned sub-agents). What varies per caller is supplied through small seams the engine calls into: a `ToolSource` (which tools are advertised + how a call executes), a `ProgressReporter` (top-level `Turn*` events with streaming vs. nested `Subagent*` events), a `TurnObserver` (context management, transcript persistence, history shape), a `CheckpointStrategy` (error vs. summarize when the iteration cap is hit), and a `ResponseParser` (the `ToolDispatcher` dialect). The per-call executor (`run_one_tool`), the repeated-failure circuit breaker, and the `ProviderDelta → AgentProgress` stream forwarder are shared across all three, so they can't drift. + ### Tool dispatch and tool-call dialects Different LLMs speak different tool-calling dialects. The harness abstracts that with a `ToolDispatcher` trait, which has three concrete implementations: diff --git a/src/openhuman/agent/harness/engine/checkpoint.rs b/src/openhuman/agent/harness/engine/checkpoint.rs new file mode 100644 index 0000000000..eb84b6c35d --- /dev/null +++ b/src/openhuman/agent/harness/engine/checkpoint.rs @@ -0,0 +1,51 @@ +//! Max-iteration checkpoint seam. +//! +//! When a turn exhausts its iteration budget the three callers diverge: +//! +//! * the channel/CLI loop returns the typed `AgentError::MaxIterationsExceeded` +//! so `Agent::run_single` can downcast and suppress Sentry noise +//! ([`ErrorCheckpoint`]); +//! * the subagent and `Agent::turn` instead summarize the run-so-far into a +//! resumable checkpoint string and return it as the turn's result (the +//! `SummarizeCheckpoint`, landed with the subagent/Agent migrations). +//! +//! [`CheckpointStrategy::on_max_iter`] receives the accumulated tool digest so a +//! summarizing strategy can produce a root-cause-aware checkpoint. + +use anyhow::Result; +use async_trait::async_trait; + +use crate::openhuman::inference::provider::UsageInfo; + +/// A checkpoint result. `usage`, when present, is the provider usage from a +/// summarization call the strategy made — the engine folds it into the turn's +/// cost and reports it to the observer so token accounting stays complete. +pub(crate) struct CheckpointOutcome { + pub text: String, + pub usage: Option, +} + +#[async_trait] +pub(crate) trait CheckpointStrategy: Send + Sync { + /// Produce the turn's result after the iteration cap is hit, or return an + /// error to surface the cap to the caller. `digest` is the accumulated + /// `tool → outcome` summary of the run so far. + async fn on_max_iter(&self, digest: &str, max_iterations: usize) -> Result; +} + +/// Surface the cap as the typed [`AgentError::MaxIterationsExceeded`], boxed +/// through `anyhow::Error`, so downstream wrappers — notably +/// `Agent::run_single` — can downcast and suppress Sentry emission for this +/// deterministic agent-state outcome (OPENHUMAN-TAURI-99 / -98). +pub(crate) struct ErrorCheckpoint; + +#[async_trait] +impl CheckpointStrategy for ErrorCheckpoint { + async fn on_max_iter(&self, _digest: &str, max_iterations: usize) -> Result { + Err(anyhow::Error::new( + crate::openhuman::agent::error::AgentError::MaxIterationsExceeded { + max: max_iterations, + }, + )) + } +} diff --git a/src/openhuman/agent/harness/engine/core.rs b/src/openhuman/agent/harness/engine/core.rs new file mode 100644 index 0000000000..4cffb5e7b2 --- /dev/null +++ b/src/openhuman/agent/harness/engine/core.rs @@ -0,0 +1,554 @@ +//! The unified turn loop. +//! +//! [`run_turn_engine`] is the single agentic loop the harness runs: announce the +//! turn, then per iteration run the stop-hook + context guards, send the +//! provider request (streaming deltas when the [`ProgressReporter`] supplies a +//! sink), parse the response, either return the final text or execute every +//! requested tool through the [`ToolSource`] and loop again — bailing early via +//! the shared repeated-failure circuit breaker, or handing the iteration cap to +//! the [`CheckpointStrategy`]. +//! +//! Everything that varies per caller lives behind a seam: [`ToolSource`] (tool +//! advertisement + per-call execution), [`ProgressReporter`] (Turn* vs +//! Subagent* events + streaming), [`TurnObserver`] (context management, +//! transcript persistence, worker-thread mirroring) and [`CheckpointStrategy`] +//! (error vs summarize on cap). The universal concerns — stop hooks, the +//! context guard, token-budget trimming, native/text parsing and the circuit +//! breaker — stay inline. + +use anyhow::Result; +use std::fmt::Write as _; +use std::io::Write as _; + +use crate::openhuman::agent::cost::TurnCost; +use crate::openhuman::agent::multimodal; +use crate::openhuman::agent::stop_hooks::{current_stop_hooks, StopDecision, TurnState}; +use crate::openhuman::context::guard::{ContextCheckResult, ContextGuard}; +use crate::openhuman::inference::model_context::context_window_for_model; +use crate::openhuman::inference::provider::{ + ChatMessage, ChatRequest, Provider, ProviderCapabilityError, +}; + +use super::super::parse::build_native_assistant_history; +use super::super::token_budget::trim_chat_messages_to_budget; +use super::super::tool_loop::{RepeatFailureGuard, STREAM_CHUNK_MIN_CHARS}; +use super::checkpoint::CheckpointStrategy; +use super::parser::ResponseParser; +use super::progress::ProgressReporter; +use super::state::TurnObserver; +use super::tool_source::ToolSource; + +/// What a completed turn yields. `text` is the final assistant text (or the +/// circuit-breaker / checkpoint summary); `iterations` and `cost` let stateful +/// callers attribute the run. +pub(crate) struct TurnEngineOutcome { + pub text: String, + pub iterations: u32, + pub cost: TurnCost, + /// True when the turn stopped because it hit the iteration cap (the + /// `CheckpointStrategy` produced `text`), false for a normal final response + /// or an early circuit-breaker halt. `Agent::turn` keys its checkpoint-only + /// history/transcript handling off this. + pub hit_cap: bool, +} + +/// Truncate a digest entry's body so a huge tool result can't blow up the +/// checkpoint summary. Mirrors the subagent's previous `truncate_with_ellipsis`. +fn truncate_with_ellipsis(s: &str, max: usize) -> String { + if s.chars().count() <= max { + return s.to_string(); + } + let head: String = s.chars().take(max).collect(); + format!("{head}…") +} + +/// Run the agent loop over `history` using `tools`. `max_iterations` must be +/// pre-normalized (callers map `0` to a sane default). See the module docs for +/// the per-iteration flow. +#[allow(clippy::too_many_arguments)] +pub(crate) async fn run_turn_engine( + provider: &dyn Provider, + history: &mut Vec, + tools: &mut dyn ToolSource, + progress: &dyn ProgressReporter, + observer: &mut dyn TurnObserver, + checkpoint: &dyn CheckpointStrategy, + parser: &dyn ResponseParser, + provider_name: &str, + model: &str, + temperature: f64, + silent: bool, + multimodal_config: &crate::openhuman::config::MultimodalConfig, + max_iterations: usize, + on_delta: Option>, +) -> Result { + let mut context_guard = context_window_for_model(model) + .map(ContextGuard::with_context_window) + .unwrap_or_else(ContextGuard::new); + let mut turn_cost = TurnCost::new(); + + // Compiled digest of this run's tool calls + results, for a graceful + // checkpoint if the iteration cap is hit. Accumulated as the loop runs so + // it survives history trimming. + let mut run_tool_digest = String::new(); + + // Announce turn start. Lifecycle (turn/iteration) events are `.await`-ed so + // they survive downstream backpressure — dropping one would desync the + // web-channel progress bridge. + progress.turn_started().await; + + let stop_hooks = current_stop_hooks(); + // Repeated-failure circuit breaker — halts with a root cause rather than + // grinding to `max_iterations`. + let mut failure_guard = RepeatFailureGuard::new(); + let mut halt_reason: Option = None; + for iteration in 0..max_iterations { + progress + .iteration_started((iteration + 1) as u32, max_iterations as u32) + .await; + + // ── Stop hooks: policy check before the next LLM call ── + if !stop_hooks.is_empty() { + let state = TurnState { + iteration: (iteration + 1) as u32, + max_iterations: max_iterations as u32, + cost: &turn_cost, + model, + }; + for hook in &stop_hooks { + match hook.check(&state).await { + StopDecision::Continue => {} + StopDecision::Stop { reason } => { + tracing::warn!( + iteration = (iteration + 1), + hook = hook.name(), + reason = %reason, + "[agent_loop] stop hook triggered — aborting turn" + ); + anyhow::bail!("Agent turn stopped by hook '{}': {reason}", hook.name()); + } + } + } + } + + // ── Context guard: check utilization before each LLM call ── + match context_guard.check() { + ContextCheckResult::Ok => {} + ContextCheckResult::CompactionNeeded => { + tracing::warn!( + iteration, + "[agent_loop] context guard: compaction needed (>{:.0}% full)", + crate::openhuman::context::guard::COMPACTION_TRIGGER_THRESHOLD * 100.0 + ); + } + ContextCheckResult::ContextExhausted { + utilization_pct, + reason, + } => { + let msg = format!("Context window exhausted ({utilization_pct}% full): {reason}"); + crate::core::observability::report_error( + msg.as_str(), + "agent", + "context_exhausted", + &[ + ("provider", provider_name), + ("model", model), + ("utilization_pct", &utilization_pct.to_string()), + ], + ); + anyhow::bail!(msg); + } + } + + if let Some(context_window) = context_window_for_model(model) { + let budget_outcome = trim_chat_messages_to_budget(history, context_window); + if budget_outcome.trimmed { + log::warn!( + "[agent_loop] pre-dispatch history trimmed model={} context_window={} original_tokens={} final_tokens={} messages_removed={}", + model, + context_window, + budget_outcome.original_tokens, + budget_outcome.final_tokens, + budget_outcome.messages_removed + ); + } else { + tracing::debug!( + iteration, + model, + context_window, + estimated_tokens = budget_outcome.final_tokens, + "[agent_loop] pre-dispatch token budget ok" + ); + } + } + + // Caller-specific pre-dispatch work (e.g. Agent's ContextManager). + observer.before_dispatch(history, iteration).await?; + + tracing::debug!(iteration, "[agent_loop] sending LLM request"); + let image_marker_count = multimodal::count_image_markers(history); + if image_marker_count > 0 && !provider.supports_vision() { + let cap_err = ProviderCapabilityError { + provider: provider_name.to_string(), + capability: "vision".to_string(), + message: format!( + "received {image_marker_count} image marker(s), but this provider does not support vision input" + ), + }; + crate::core::observability::report_error( + &cap_err, + "agent", + "provider_capability", + &[ + ("provider", provider_name), + ("capability", "vision"), + ("model", model), + ], + ); + return Err(cap_err.into()); + } + + let prepared_messages = + multimodal::prepare_messages_for_provider(history, multimodal_config).await?; + + // Recomputed each iteration: a `ToolSource` may register tools lazily + // mid-turn, so native-tool enablement can flip from off to on. + let request_tools = if provider.supports_native_tools() && !tools.request_specs().is_empty() + { + Some(tools.request_specs()) + } else { + None + }; + + // ProviderDelta → progress forwarder for this iteration (no-op for + // flavors that don't stream). Sender dropped after the chat call so the + // forwarder exits cleanly. + let (delta_tx_opt, delta_forwarder) = progress.make_stream_sink((iteration + 1) as u32); + + let chat_result = provider + .chat( + ChatRequest { + messages: &prepared_messages.messages, + tools: request_tools, + stream: delta_tx_opt.as_ref(), + }, + model, + temperature, + ) + .await; + + drop(delta_tx_opt); + if let Some(handle) = delta_forwarder { + let _ = handle.await; + } + + let ( + response_text, + display_text, + reasoning_content, + tool_calls, + assistant_history_content, + native_tool_calls, + ) = match chat_result { + Ok(resp) => { + // Update context guard + cost with token usage from this response. + if let Some(ref usage) = resp.usage { + context_guard.update_usage(usage); + turn_cost.add_call(model, usage); + observer.record_usage(model, usage); + tracing::debug!( + iteration, + input_tokens = usage.input_tokens, + output_tokens = usage.output_tokens, + context_window = usage.context_window, + cumulative_usd = turn_cost.total_usd(), + "[agent_loop] LLM response received" + ); + progress + .cost_updated(model, (iteration + 1) as u32, &turn_cost) + .await; + } else { + tracing::debug!( + iteration, + "[agent_loop] LLM response received (no usage info)" + ); + } + + let response_text = resp.text_or_empty().to_string(); + let (display_text, calls) = parser.parse(&resp); + + tracing::debug!( + iteration, + native_tool_calls = resp.tool_calls.len(), + parsed_tool_calls = calls.len(), + "[agent_loop] tool calls parsed" + ); + + let assistant_history_content = if resp.tool_calls.is_empty() { + response_text.clone() + } else { + build_native_assistant_history( + &response_text, + resp.reasoning_content.as_deref(), + &resp.tool_calls, + ) + }; + + let reasoning_content = resp.reasoning_content; + let native_calls = resp.tool_calls; + ( + response_text, + display_text, + reasoning_content, + calls, + assistant_history_content, + native_calls, + ) + } + Err(e) => { + // Transient upstream failures are already classified + retried by + // reliable.rs and reported once when all providers are exhausted; + // re-reporting per iteration floods Sentry (OPENHUMAN-TAURI-3Y/3Z). + let transient = + crate::openhuman::inference::provider::reliable::is_rate_limited(&e) + || crate::openhuman::inference::provider::reliable::is_upstream_unhealthy( + &e, + ); + if transient { + tracing::warn!( + domain = "agent", + operation = "provider_chat", + provider = provider_name, + model = model, + iteration = iteration + 1, + error = %format!("{e:#}"), + "[agent] transient provider_chat failure — retried upstream" + ); + } else { + crate::core::observability::report_error_or_expected( + &e, + "agent", + "provider_chat", + &[ + ("provider", provider_name), + ("model", model), + ("iteration", &(iteration + 1).to_string()), + ], + ); + } + return Err(e); + } + }; + + if tool_calls.is_empty() { + tracing::debug!( + iteration, + "[agent_loop] no tool calls — returning final response" + ); + // The final answer is the narrative text, falling back to the raw + // response text when the parser stripped everything (mirrors the + // legacy `Agent::turn` `final_text` logic). + let final_out = if display_text.is_empty() { + response_text.clone() + } else { + display_text.clone() + }; + // A completion with no text *and* no tool calls is a degenerate + // response. Callers that disallow it (Agent::turn) surface a typed + // error instead of a silent blank reply; the channel/subagent loops + // return it verbatim. + if final_out.trim().is_empty() && !observer.allow_empty_final() { + log::warn!( + "[agent_loop] provider returned an empty final response (i={}, no text, no tool calls) — surfacing as error", + iteration + 1 + ); + return Err( + crate::openhuman::agent::error::AgentError::EmptyProviderResponse { + iteration: iteration + 1, + } + .into(), + ); + } + // No tool calls — final response. Relay the text in small chunks + // when a streaming draft sink exists. + if let Some(ref tx) = on_delta { + let mut chunk = String::new(); + for word in final_out.split_inclusive(char::is_whitespace) { + chunk.push_str(word); + if chunk.len() >= STREAM_CHUNK_MIN_CHARS + && tx.send(std::mem::take(&mut chunk)).await.is_err() + { + break; // receiver dropped + } + } + if !chunk.is_empty() { + let _ = tx.send(chunk).await; + } + } + history.push(ChatMessage::assistant(response_text.clone())); + observer.on_assistant( + &final_out, + &response_text, + reasoning_content.as_deref(), + &[], + &[], + iteration, + true, + ); + observer.after_iteration(history, iteration); + log::info!( + "[agent_loop] turn complete: iters={} provider_calls={} tokens_in={} tokens_out={} cached_in={} usd={:.4}", + (iteration + 1), + turn_cost.call_count, + turn_cost.input_tokens, + turn_cost.output_tokens, + turn_cost.cached_input_tokens, + turn_cost.total_usd(), + ); + progress.turn_completed((iteration + 1) as u32).await; + return Ok(TurnEngineOutcome { + text: final_out, + iterations: (iteration + 1) as u32, + cost: turn_cost, + hit_cap: false, + }); + } + + // Print any text the LLM produced alongside tool calls (unless silent) + if !silent && !display_text.is_empty() { + print!("{display_text}"); + let _ = std::io::stdout().flush(); + } + + // Execute each tool call and build results. `individual_results` tracks + // per-call output so native-mode history can emit one `role: tool` + // message per call with the correct id. + let mut tool_results = String::new(); + let mut individual_results: Vec = Vec::new(); + for (call_idx, call) in tool_calls.iter().enumerate() { + // Stable id threaded through the start/complete pair. The fallback + // includes `call_idx` to stay unique when the same tool name + // appears multiple times in one iteration. + let progress_call_id = call + .id + .clone() + .unwrap_or_else(|| format!("loop-{iteration}-{call_idx}-{}", call.name)); + + // Full per-call lifecycle is owned by the ToolSource. + let outcome = tools + .execute_call(call, iteration, progress, &progress_call_id) + .await; + + individual_results.push(outcome.text.clone()); + let _ = writeln!( + tool_results, + "\n{}\n", + call.name, outcome.text + ); + + // Record this call in the run digest (output truncated) for a + // possible max-iteration checkpoint. + let _ = writeln!( + run_tool_digest, + "- {} [{}]: {}", + call.name, + if outcome.success { "ok" } else { "failed" }, + truncate_with_ellipsis(&outcome.text, 800) + ); + + observer.on_tool_result( + &progress_call_id, + &call.name, + &outcome.text, + outcome.success, + iteration, + ); + + // Repeated-failure circuit breaker (shared guard). + if let Some(reason) = failure_guard.record( + &call.name, + &call.arguments.to_string(), + outcome.success, + &outcome.text, + ) { + tracing::warn!( + iteration, + tool = call.name.as_str(), + "[agent_loop] circuit breaker tripped — halting with root cause" + ); + halt_reason = Some(reason); + } + } + + // Add assistant message with tool calls + tool results to history. + // Native mode: JSON-structured messages so convert_messages() can + // reconstruct OpenAI-format tool_calls + tool result messages. Prompt + // mode: XML-based text format. + history.push(ChatMessage::assistant(assistant_history_content)); + observer.on_assistant( + &display_text, + &response_text, + reasoning_content.as_deref(), + &native_tool_calls, + &tool_calls, + iteration, + false, + ); + if native_tool_calls.is_empty() { + let content = format!("[Tool results]\n{tool_results}"); + observer.on_results_batch(&content, iteration); + history.push(ChatMessage::user(content)); + } else { + for (native_call, result) in native_tool_calls.iter().zip(individual_results.iter()) { + let tool_msg = serde_json::json!({ + "tool_call_id": native_call.id, + "content": result, + }); + history.push(ChatMessage::tool(tool_msg.to_string())); + } + } + + observer.after_iteration(history, iteration); + + // Circuit breaker tripped this iteration: return the root-cause summary + // instead of looping to `max_iterations`. Tool results are already in + // `history`, so the caller still has full context. + if let Some(reason) = halt_reason.take() { + // Mirror the normal-completion path: emit turn-completed before the + // early return so progress consumers don't stay in-flight. + progress.turn_completed((iteration + 1) as u32).await; + return Ok(TurnEngineOutcome { + text: reason, + iterations: (iteration + 1) as u32, + cost: turn_cost, + hit_cap: false, + }); + } + } + + // Iteration cap reached — hand off to the checkpoint strategy (error vs + // summarize). The accumulated digest lets a summarizing strategy produce a + // resumable, root-cause-aware checkpoint. + let digest = if run_tool_digest.is_empty() { + "(no tool calls completed)" + } else { + run_tool_digest.as_str() + }; + let co = checkpoint.on_max_iter(digest, max_iterations).await?; + // Fold any summarization-call usage into the turn cost + observer so token + // accounting stays complete. + if let Some(ref u) = co.usage { + turn_cost.add_call(model, u); + observer.record_usage(model, u); + } + // Emit the terminal lifecycle event on this successful (checkpoint) exit + // too, so consumers aren't left waiting — matching the final-response and + // circuit-breaker paths. + progress.turn_completed(max_iterations as u32).await; + Ok(TurnEngineOutcome { + text: co.text, + iterations: max_iterations as u32, + cost: turn_cost, + hit_cap: true, + }) +} diff --git a/src/openhuman/agent/harness/engine/mod.rs b/src/openhuman/agent/harness/engine/mod.rs new file mode 100644 index 0000000000..df22ec0cf7 --- /dev/null +++ b/src/openhuman/agent/harness/engine/mod.rs @@ -0,0 +1,31 @@ +//! Unified agent turn engine. +//! +//! Historically the harness carried THREE near-identical agentic loops — one +//! per entry point (`Agent::turn` for web/desktop chat, `run_tool_call_loop` +//! for non-web channels + triage, and the subagent `run_inner_loop`). They each +//! re-implemented the same shape (call the LLM → parse tool calls → execute +//! tools → append results → repeat until final text or the iteration cap) and +//! had drifted in subtle ways. +//! +//! This module is the single home for the pieces those loops share, so they +//! can't drift again. The extraction is incremental (see the unify-agent-turn +//! plan): the first piece to land is [`tools::run_one_tool`] — the per-call +//! tool executor (policy gate → scope guard → approval gate → execute with +//! timeout → scrub/tokenjuice/cap/summarize → audit), which was previously +//! duplicated verbatim across all three loops. + +pub(crate) mod checkpoint; +pub(crate) mod core; +pub(crate) mod parser; +pub(crate) mod progress; +pub(crate) mod state; +pub(crate) mod tool_source; +pub(crate) mod tools; + +pub(crate) use checkpoint::{CheckpointOutcome, CheckpointStrategy, ErrorCheckpoint}; +pub(crate) use core::run_turn_engine; +pub(crate) use parser::{DefaultParser, DispatcherParser}; +pub(crate) use progress::{ProgressReporter, SubagentProgress, TurnProgress}; +pub(crate) use state::{NullObserver, TurnObserver}; +pub(crate) use tool_source::{RegistryToolSource, ToolSource}; +pub(crate) use tools::{run_one_tool, ToolRunResult}; diff --git a/src/openhuman/agent/harness/engine/parser.rs b/src/openhuman/agent/harness/engine/parser.rs new file mode 100644 index 0000000000..05601b548d --- /dev/null +++ b/src/openhuman/agent/harness/engine/parser.rs @@ -0,0 +1,70 @@ +//! Response-parsing seam. +//! +//! The channel loop and subagent extract tool calls from a provider response +//! with the built-in native-first + XML-fallback logic ([`DefaultParser`]). +//! `Agent::turn` instead uses its configured [`ToolDispatcher`] (native / XML / +//! PFormat) — PFormat in particular parses positional `name[args]` calls the +//! built-in path can't. [`DispatcherParser`] adapts a dispatcher to this seam so +//! the engine stays parser-agnostic while preserving every dispatcher's grammar. +//! +//! `parse` returns `(display_text, calls)`: the narrative text to surface (tool +//! markup stripped) and the parsed calls in the engine's internal +//! [`ParsedToolCall`] shape. The engine keeps the *raw* response text +//! separately for assistant-history serialization. + +use crate::openhuman::agent::dispatcher::ToolDispatcher; +use crate::openhuman::agent::harness::parse::{ + parse_structured_tool_calls, parse_tool_calls, ParsedToolCall, +}; +use crate::openhuman::inference::provider::ChatResponse; + +pub(crate) trait ResponseParser: Send + Sync { + /// Returns `(display_text, calls)` for this provider response. + fn parse(&self, resp: &ChatResponse) -> (String, Vec); +} + +/// Built-in parser: prefer native structured tool calls, fall back to the +/// XML-tag parser over the response text. Used by the channel loop + subagent. +pub(crate) struct DefaultParser; + +impl ResponseParser for DefaultParser { + fn parse(&self, resp: &ChatResponse) -> (String, Vec) { + let response_text = resp.text_or_empty().to_string(); + let mut calls = parse_structured_tool_calls(&resp.tool_calls); + let mut parsed_text = String::new(); + if calls.is_empty() { + let (fallback_text, fallback_calls) = parse_tool_calls(&response_text); + if !fallback_text.is_empty() { + parsed_text = fallback_text; + } + calls = fallback_calls; + } + let display_text = if parsed_text.is_empty() { + response_text + } else { + parsed_text + }; + (display_text, calls) + } +} + +/// Adapts an [`Agent`]'s configured [`ToolDispatcher`] to the parser seam, +/// converting the dispatcher's `ParsedToolCall` shape into the engine's. +pub(crate) struct DispatcherParser<'a> { + pub dispatcher: &'a dyn ToolDispatcher, +} + +impl ResponseParser for DispatcherParser<'_> { + fn parse(&self, resp: &ChatResponse) -> (String, Vec) { + let (text, calls) = self.dispatcher.parse_response(resp); + let calls = calls + .into_iter() + .map(|c| ParsedToolCall { + name: c.name, + arguments: c.arguments, + id: c.tool_call_id, + }) + .collect(); + (text, calls) + } +} diff --git a/src/openhuman/agent/harness/engine/progress.rs b/src/openhuman/agent/harness/engine/progress.rs new file mode 100644 index 0000000000..64648a6e2c --- /dev/null +++ b/src/openhuman/agent/harness/engine/progress.rs @@ -0,0 +1,356 @@ +//! Progress reporting seam + the shared streaming-delta forwarder. +//! +//! The engine never names a concrete [`AgentProgress`] variant. It talks to a +//! [`ProgressReporter`], whose impls pick the event *flavor*: +//! +//! * [`TurnProgress`] — top-level chat (channel loop, `Agent::turn`): emits the +//! `Turn*` / `ToolCall*` / `TurnCostUpdated` events and streams provider +//! deltas as `TextDelta` / `ThinkingDelta` / `ToolCallArgsDelta`. +//! * [`SubagentProgress`] — a spawned sub-agent: emits the `Subagent*` / +//! `SubagentToolCall*` events (nested under the subagent row in the UI) and +//! does not stream deltas. The `SubagentSpawned` / `SubagentCompleted` / +//! `SubagentFailed` lifecycle events stay in the spawn tool, outside the loop. +//! * [`NullProgress`] — triage / tests: every method is a no-op. + +use async_trait::async_trait; + +use crate::openhuman::agent::cost::TurnCost; +use crate::openhuman::agent::progress::AgentProgress; +use crate::openhuman::inference::provider::ProviderDelta; + +/// What the engine emits as a turn progresses. All methods default to no-ops so +/// an impl only overrides the events its flavor cares about. +#[async_trait] +pub(crate) trait ProgressReporter: Send + Sync { + async fn turn_started(&self) {} + async fn iteration_started(&self, _iteration: u32, _max_iterations: u32) {} + async fn cost_updated(&self, _model: &str, _iteration: u32, _cost: &TurnCost) {} + async fn turn_completed(&self, _iterations: u32) {} + async fn tool_started( + &self, + _call_id: &str, + _tool_name: &str, + _arguments: &serde_json::Value, + _iteration: u32, + ) { + } + #[allow(clippy::too_many_arguments)] + async fn tool_completed( + &self, + _call_id: &str, + _tool_name: &str, + _success: bool, + _output_chars: usize, + _elapsed_ms: u64, + _iteration: u32, + ) { + } + + /// Build the per-iteration `ProviderDelta` streaming sink + forwarder task, + /// or `(None, None)` when this flavor doesn't stream. Default: no streaming. + fn make_stream_sink( + &self, + _iteration: u32, + ) -> ( + Option>, + Option>, + ) { + (None, None) + } +} + +/// Top-level chat flavor: `Turn*` lifecycle + `ToolCall*` + streaming. +pub(crate) struct TurnProgress { + pub sink: Option>, +} + +impl TurnProgress { + pub(crate) fn new(sink: Option>) -> Self { + Self { sink } + } +} + +#[async_trait] +impl ProgressReporter for TurnProgress { + async fn turn_started(&self) { + if let Some(ref sink) = self.sink { + if let Err(e) = sink.send(AgentProgress::TurnStarted).await { + log::warn!("[agent_loop] progress sink closed at TurnStarted: {e}"); + } + } + } + + async fn iteration_started(&self, iteration: u32, max_iterations: u32) { + if let Some(ref sink) = self.sink { + if let Err(e) = sink + .send(AgentProgress::IterationStarted { + iteration, + max_iterations, + }) + .await + { + log::warn!("[agent_loop] progress sink closed at IterationStarted: {e}"); + } + } + } + + async fn cost_updated(&self, model: &str, iteration: u32, cost: &TurnCost) { + if let Some(ref sink) = self.sink { + let event = AgentProgress::TurnCostUpdated { + model: model.to_string(), + iteration, + input_tokens: cost.input_tokens, + output_tokens: cost.output_tokens, + cached_input_tokens: cost.cached_input_tokens, + total_usd: cost.total_usd(), + }; + if let Err(e) = sink.send(event).await { + log::warn!("[agent_loop] progress sink closed at TurnCostUpdated: {e}"); + } + } + } + + async fn turn_completed(&self, iterations: u32) { + if let Some(ref sink) = self.sink { + if let Err(e) = sink.send(AgentProgress::TurnCompleted { iterations }).await { + log::warn!("[agent_loop] progress sink closed at TurnCompleted: {e}"); + } + } + } + + async fn tool_started( + &self, + call_id: &str, + tool_name: &str, + arguments: &serde_json::Value, + iteration: u32, + ) { + if let Some(ref sink) = self.sink { + if let Err(e) = sink + .send(AgentProgress::ToolCallStarted { + call_id: call_id.to_string(), + tool_name: tool_name.to_string(), + arguments: arguments.clone(), + iteration, + }) + .await + { + log::warn!("[agent_loop] progress sink closed while emitting ToolCallStarted: {e}"); + } + } + } + + async fn tool_completed( + &self, + call_id: &str, + tool_name: &str, + success: bool, + output_chars: usize, + elapsed_ms: u64, + iteration: u32, + ) { + if let Some(ref sink) = self.sink { + if let Err(e) = sink + .send(AgentProgress::ToolCallCompleted { + call_id: call_id.to_string(), + tool_name: tool_name.to_string(), + success, + output_chars, + elapsed_ms, + iteration, + }) + .await + { + log::warn!( + "[agent_loop] progress sink closed while emitting ToolCallCompleted: {e}" + ); + } + } + } + + fn make_stream_sink( + &self, + iteration: u32, + ) -> ( + Option>, + Option>, + ) { + spawn_delta_forwarder(self.sink.clone(), iteration) + } +} + +/// Sub-agent flavor: `Subagent*` lifecycle + `SubagentToolCall*`, no streaming. +pub(crate) struct SubagentProgress { + pub sink: Option>, + pub agent_id: String, + pub task_id: String, +} + +#[async_trait] +impl ProgressReporter for SubagentProgress { + async fn iteration_started(&self, iteration: u32, max_iterations: u32) { + if let Some(ref sink) = self.sink { + let _ = sink + .send(AgentProgress::SubagentIterationStarted { + agent_id: self.agent_id.clone(), + task_id: self.task_id.clone(), + iteration, + max_iterations, + }) + .await; + } + } + + async fn tool_started( + &self, + call_id: &str, + tool_name: &str, + _arguments: &serde_json::Value, + iteration: u32, + ) { + if let Some(ref sink) = self.sink { + let _ = sink + .send(AgentProgress::SubagentToolCallStarted { + agent_id: self.agent_id.clone(), + task_id: self.task_id.clone(), + call_id: call_id.to_string(), + tool_name: tool_name.to_string(), + iteration, + }) + .await; + } + } + + async fn tool_completed( + &self, + call_id: &str, + tool_name: &str, + success: bool, + output_chars: usize, + elapsed_ms: u64, + iteration: u32, + ) { + if let Some(ref sink) = self.sink { + let _ = sink + .send(AgentProgress::SubagentToolCallCompleted { + agent_id: self.agent_id.clone(), + task_id: self.task_id.clone(), + call_id: call_id.to_string(), + tool_name: tool_name.to_string(), + success, + output_chars, + elapsed_ms, + iteration, + }) + .await; + } + } + + /// Stream the child's visible text + reasoning deltas to the parent, + /// attributed to this sub-agent's `task_id` so the UI renders them inside + /// the live subagent row (PR #3007). Tool-call arg fragments are dropped + /// here — they're already surfaced via the `SubagentToolCall*` lifecycle + /// events, so forwarding them too would double-render. + fn make_stream_sink( + &self, + iteration: u32, + ) -> ( + Option>, + Option>, + ) { + let Some(sink) = self.sink.clone() else { + return (None, None); + }; + let agent_id = self.agent_id.clone(); + let task_id = self.task_id.clone(); + let (tx, mut rx) = tokio::sync::mpsc::channel::(128); + let forwarder = tokio::spawn(async move { + while let Some(event) = rx.recv().await { + let mapped = match event { + ProviderDelta::TextDelta { delta } => AgentProgress::SubagentTextDelta { + agent_id: agent_id.clone(), + task_id: task_id.clone(), + delta, + iteration, + }, + ProviderDelta::ThinkingDelta { delta } => { + AgentProgress::SubagentThinkingDelta { + agent_id: agent_id.clone(), + task_id: task_id.clone(), + delta, + iteration, + } + } + ProviderDelta::ToolCallStart { .. } + | ProviderDelta::ToolCallArgsDelta { .. } => continue, + }; + // Await backpressure so streamed deltas arrive in order. + if sink.send(mapped).await.is_err() { + break; + } + } + }); + (Some(tx), Some(forwarder)) + } +} + +/// No-op reporter for triage / tests. +pub(crate) struct NullProgress; + +impl ProgressReporter for NullProgress {} + +/// Spawn a task that forwards `ProviderDelta`s from the provider's streaming +/// channel into `on_progress` as `AgentProgress` delta events, tagged with +/// `iteration` (1-based). Returns the sender to hand to +/// [`crate::openhuman::inference::provider::ChatRequest::stream`] and the task +/// handle to await after the chat call. +/// +/// Returns `(None, None)` when there is no progress sink — the caller then +/// passes `stream: None` and the provider uses its non-streaming HTTP path. +/// +/// Backpressure discipline: the forwarder `.await`s each `send`, so streamed +/// deltas arrive in order and are never silently dropped when the downstream +/// bridge is slow. It exits cleanly once the sender is dropped (after the chat +/// call) or the downstream closes. +pub(crate) fn spawn_delta_forwarder( + on_progress: Option>, + iteration: u32, +) -> ( + Option>, + Option>, +) { + let Some(progress_sink) = on_progress else { + return (None, None); + }; + let (tx, mut rx) = tokio::sync::mpsc::channel::(128); + let forwarder = tokio::spawn(async move { + while let Some(event) = rx.recv().await { + let mapped = match event { + ProviderDelta::TextDelta { delta } => AgentProgress::TextDelta { delta, iteration }, + ProviderDelta::ThinkingDelta { delta } => { + AgentProgress::ThinkingDelta { delta, iteration } + } + ProviderDelta::ToolCallStart { call_id, tool_name } => { + AgentProgress::ToolCallArgsDelta { + call_id, + tool_name, + delta: String::new(), + iteration, + } + } + ProviderDelta::ToolCallArgsDelta { call_id, delta } => { + AgentProgress::ToolCallArgsDelta { + call_id, + tool_name: String::new(), + delta, + iteration, + } + } + }; + if progress_sink.send(mapped).await.is_err() { + break; + } + } + }); + (Some(tx), Some(forwarder)) +} diff --git a/src/openhuman/agent/harness/engine/state.rs b/src/openhuman/agent/harness/engine/state.rs new file mode 100644 index 0000000000..359c6fc5b7 --- /dev/null +++ b/src/openhuman/agent/harness/engine/state.rs @@ -0,0 +1,96 @@ +//! Turn-state observer seam. +//! +//! The engine drives the loop over a `Vec` working buffer, but the +//! three callers want to *do* different things around each step: +//! +//! * the channel loop wants nothing extra ([`NullObserver`]); +//! * the subagent wants per-iteration transcript persistence, usage +//! accumulation, and worker-thread mirroring (assistant intents, per-call +//! results, batched text-mode results, final response); +//! * `Agent::turn` wants its `ContextManager` reduction before each dispatch, +//! transcript persistence, and per-turn usage/cost snapshots. +//! +//! [`TurnObserver`] is the seam: every method defaults to a no-op, so an impl +//! only overrides the hooks its caller needs. The engine still owns the +//! universal concerns (stop hooks, context guard, token-budget trim, the +//! circuit breaker) inline — the observer is for caller-specific side effects. + +use anyhow::Result; +use async_trait::async_trait; + +use crate::openhuman::agent::harness::parse::ParsedToolCall; +use crate::openhuman::inference::provider::{ChatMessage, ToolCall, UsageInfo}; + +#[async_trait] +pub(crate) trait TurnObserver: Send { + /// Called before each provider dispatch, after the engine's own context + /// guard + token-budget trim. `Agent::turn` runs its `ContextManager` + /// reduction chain here. Default: no-op. + async fn before_dispatch( + &mut self, + _history: &mut Vec, + _iteration: usize, + ) -> Result<()> { + Ok(()) + } + + /// Called once per provider response that carried a usage block, so the + /// caller can accumulate its own token tally / transcript usage snapshot. + fn record_usage(&mut self, _model: &str, _usage: &UsageInfo) {} + + /// Called after the assistant message for this iteration is committed to + /// the engine's working buffer. `response_text` is the raw provider text + /// (pre native serialization); `reasoning_content` is the thinking-model + /// content to round-trip; `native_tool_calls` are the provider's structured + /// calls (empty in text/prompt mode); `parsed_calls` are the engine-parsed + /// calls (empty when `is_final`). `Agent::turn` uses these to rebuild its + /// typed `ConversationMessage` history; the subagent mirrors to its worker + /// thread. + #[allow(clippy::too_many_arguments)] + fn on_assistant( + &mut self, + _display_text: &str, + _response_text: &str, + _reasoning_content: Option<&str>, + _native_tool_calls: &[ToolCall], + _parsed_calls: &[ParsedToolCall], + _iteration: usize, + _is_final: bool, + ) { + } + + /// Called after one tool's result is known, in native-tool mode (one + /// `role:tool` message per call). Subagent mirrors per-call results to its + /// worker thread; `Agent::turn` buffers them to rebuild typed history. + fn on_tool_result( + &mut self, + _call_id: &str, + _tool_name: &str, + _result_text: &str, + _success: bool, + _iteration: usize, + ) { + } + + /// Called after a batched `[Tool results]` user message is committed + /// (text/prompt mode, where there are no per-call `role:tool` messages). + fn on_results_batch(&mut self, _content: &str, _iteration: usize) {} + + /// Called after the iteration's history is finalized (the transcript + /// persistence point) — both after the final response and after each tool + /// round's results are appended. + fn after_iteration(&mut self, _history: &[ChatMessage], _iteration: usize) {} + + /// Whether an empty final response (no text, no tool calls) is acceptable. + /// The channel/subagent loops return it as `Ok("")`; `Agent::turn` treats + /// it as a degenerate/poisoned completion and surfaces an error instead of + /// a silent blank reply (bug-report-2026-05-26 A1). Default: allowed. + fn allow_empty_final(&self) -> bool { + true + } +} + +/// No-op observer for the channel/CLI/triage loop, which keeps no extra state. +pub(crate) struct NullObserver; + +impl TurnObserver for NullObserver {} diff --git a/src/openhuman/agent/harness/engine/tool_source.rs b/src/openhuman/agent/harness/engine/tool_source.rs new file mode 100644 index 0000000000..0e35a5879b --- /dev/null +++ b/src/openhuman/agent/harness/engine/tool_source.rs @@ -0,0 +1,131 @@ +//! Tool sourcing seam for the turn engine. +//! +//! The three former loops resolved "what tools can the model call this turn and +//! how do I execute one" differently: +//! +//! * the channel loop advertised `registry + extra` filtered by a visibility +//! whitelist, and executed via the shared [`run_one_tool`]; +//! * the subagent loop advertised a definition-filtered slice of the parent's +//! tools (with lazy toolkit registration), and had its own per-call body; +//! * `Agent::turn` advertised `Agent.visible_tool_specs` and executed via the +//! richer `Agent::execute_tool_call` (session policy + per-call permission +//! levels + `execute_with_options`). +//! +//! [`ToolSource`] is the single seam the engine talks to: it advertises the +//! request specs and owns per-call execution (including the start/complete +//! progress events). [`RegistryToolSource`] is the channel/CLI/triage impl; the +//! subagent and `Agent` impls land in later phases. + +use std::collections::HashSet; + +use async_trait::async_trait; + +use super::super::payload_summarizer::PayloadSummarizer; +use super::progress::ProgressReporter; +use super::{run_one_tool, ToolRunResult}; +use crate::openhuman::agent::harness::parse::ParsedToolCall; +use crate::openhuman::tools::policy::ToolPolicy; +use crate::openhuman::tools::{Tool, ToolSpec}; + +/// What the engine needs from "the set of tools available this turn". +#[async_trait] +pub(crate) trait ToolSource: Send { + /// The deduped, visibility-filtered specs to advertise to the provider + /// this turn. Re-read each iteration so impls that register tools lazily + /// (subagent toolkit resolution) can grow the advertised set over a turn. + fn request_specs(&self) -> &[ToolSpec]; + + /// Execute one parsed tool call end-to-end, emitting its `ToolCallStarted` + /// / `ToolCallCompleted` (or flavor-equivalent) progress events. Returns a + /// [`ToolRunResult`] the engine folds into history + the circuit breaker. + async fn execute_call( + &mut self, + call: &ParsedToolCall, + iteration: usize, + progress: &dyn ProgressReporter, + progress_call_id: &str, + ) -> ToolRunResult; +} + +/// The channel/CLI/triage tool source: a persistent `registry`, optional +/// per-turn synthesised `extra` tools, an optional visibility whitelist, and a +/// pluggable [`ToolPolicy`]. Mirrors the original `run_tool_call_loop` tool +/// plumbing exactly. +pub(crate) struct RegistryToolSource<'a> { + registry: &'a [Box], + extra: &'a [Box], + visible: Option<&'a HashSet>, + tool_policy: &'a dyn ToolPolicy, + payload_summarizer: Option<&'a dyn PayloadSummarizer>, + specs: Vec, +} + +impl<'a> RegistryToolSource<'a> { + pub(crate) fn new( + registry: &'a [Box], + extra: &'a [Box], + visible: Option<&'a HashSet>, + tool_policy: &'a dyn ToolPolicy, + payload_summarizer: Option<&'a dyn PayloadSummarizer>, + ) -> Self { + // Filter to visible tools, then dedup by name before sending to the + // provider. Registry tools may collide with per-turn synthesised + // extra_tools (e.g. an `ArchetypeDelegationTool` whose + // `delegate_name = "research"` shadowing a same-named skill). Some + // providers 400 on duplicate tool names — see TAURI-RUST-4. + let filtered: Vec = registry + .iter() + .chain(extra.iter()) + .filter(|tool| visible.map(|s| s.contains(tool.name())).unwrap_or(true)) + .map(|tool| tool.spec()) + .collect(); + let specs = crate::openhuman::agent::harness::session::dedup_visible_tool_specs(filtered); + Self { + registry, + extra, + visible, + tool_policy, + payload_summarizer, + specs, + } + } + + fn is_visible(&self, name: &str) -> bool { + self.visible.map(|s| s.contains(name)).unwrap_or(true) + } +} + +#[async_trait] +impl ToolSource for RegistryToolSource<'_> { + fn request_specs(&self) -> &[ToolSpec] { + &self.specs + } + + async fn execute_call( + &mut self, + call: &ParsedToolCall, + iteration: usize, + progress: &dyn ProgressReporter, + progress_call_id: &str, + ) -> ToolRunResult { + // Look up the tool by name in the combined registry + extras, subject + // to the visibility whitelist. A hallucinated / filtered-out name + // resolves to `None`, which `run_one_tool` reports as an unknown tool. + let tool_opt: Option<&dyn Tool> = self + .registry + .iter() + .chain(self.extra.iter()) + .find(|t| t.name() == call.name && self.is_visible(t.name())) + .map(|b| b.as_ref()); + run_one_tool( + tool_opt, + call, + iteration, + progress, + self.tool_policy, + self.payload_summarizer, + progress_call_id, + ) + .await + } +} diff --git a/src/openhuman/agent/harness/engine/tools.rs b/src/openhuman/agent/harness/engine/tools.rs new file mode 100644 index 0000000000..07297ec3b6 --- /dev/null +++ b/src/openhuman/agent/harness/engine/tools.rs @@ -0,0 +1,381 @@ +//! Shared per-call tool executor. +//! +//! [`run_one_tool`] runs the full lifecycle of a single parsed tool call: +//! +//! 1. emit `ToolCallStarted` (for *every* call, including ones rejected below, +//! so a client row created from streamed args always gets a terminal event); +//! 2. evaluate the pluggable [`ToolPolicy`] (deny short-circuits everything, +//! including approval side-effects); +//! 3. guard `CliRpcOnly` scope (such tools can't run in the autonomous loop); +//! 4. route external-effect tools through the process-global `ApprovalGate`; +//! 5. execute with the configured timeout, then scrub credentials, apply +//! tokenjuice, the per-tool size cap, and the optional payload summarizer; +//! 6. stamp the approval audit "after" row (#2135); +//! 7. emit `ToolCallCompleted`. +//! +//! It returns a [`ToolRunResult`] (`text` + `success`). The caller owns history +//! shaping (native `role:tool` messages vs XML `` blocks) and the +//! repeated-failure circuit breaker, both of which it drives uniformly from the +//! returned `success`/`text` regardless of which branch produced them. +//! +//! This body was lifted verbatim (behavior-preserving) from the canonical +//! `run_tool_call_loop` in `tool_loop.rs`; the three loops now call it instead +//! of each carrying their own copy. + +use super::super::payload_summarizer::PayloadSummarizer; +use super::progress::ProgressReporter; +use crate::openhuman::agent::harness::parse::ParsedToolCall; +use crate::openhuman::tools::policy::{PolicyDecision, ToolPolicy}; +use crate::openhuman::tools::traits::ToolScope; +use crate::openhuman::tools::Tool; + +use super::super::credentials::scrub_credentials; + +/// Outcome of a single tool call. `text` is what should be fed back to the +/// model (a result body, an error, or a denial reason); `success` is `false` +/// for any non-OK outcome (policy/approval denial, scope rejection, timeout, +/// tool error, unknown tool) so the caller's circuit breaker and history +/// formatting can treat every failure mode uniformly. +pub(crate) struct ToolRunResult { + pub text: String, + pub success: bool, +} + +/// Execute one parsed tool call end-to-end. See the module docs for the full +/// lifecycle. `tool_opt` is the (already visibility-filtered) tool the caller +/// resolved by name — `None` means the model requested an unknown/filtered-out +/// tool, which is reported as a structured error the LLM can correct next turn. +/// +/// `progress_call_id` is the stable id threaded through the start/complete +/// event pair (and any preceding args-delta events) so consumers can reconcile +/// tool rows by id. +pub(crate) async fn run_one_tool( + tool_opt: Option<&dyn Tool>, + call: &ParsedToolCall, + iteration: usize, + progress: &dyn ProgressReporter, + tool_policy: &dyn ToolPolicy, + payload_summarizer: Option<&dyn PayloadSummarizer>, + progress_call_id: &str, +) -> ToolRunResult { + let iteration_u32 = (iteration + 1) as u32; + + // Emit a "tool started" event for every parsed call, even ones that will be + // rejected below (approval denied, CliRpcOnly, unknown) — the client-side + // row was created from the streamed args and needs a terminal event. + progress + .tool_started(progress_call_id, &call.name, &call.arguments, iteration_u32) + .await; + + // Helper: emit a failed "tool completed" event for an early-exit path + // (denied / CliRpcOnly / unknown) so the client row flips to `error` + // instead of staying running. + let emit_failed_completion = |message: &str| { + let output_chars = message.chars().count(); + async move { + progress + .tool_completed( + progress_call_id, + &call.name, + false, + output_chars, + 0, + iteration_u32, + ) + .await; + } + }; + + // ── Tool policy check (#2131) ───────────────── + // Evaluate the pluggable ToolPolicy before any approval or execution. If + // the policy denies the call, skip everything (including approval + // side-effects) and return the denial reason as a tool error to the model. + if let PolicyDecision::Deny(reason) = tool_policy.evaluate(&call.name, &call.arguments) { + tracing::debug!( + iteration, + tool = call.name.as_str(), + reason = %reason, + "[agent_loop] tool policy denied tool call" + ); + let denied = format!("Tool '{}' denied by policy: {reason}", call.name); + emit_failed_completion(&denied).await; + return ToolRunResult { + text: denied, + success: false, + }; + } + + let Some(tool) = tool_opt else { + tracing::warn!( + iteration, + tool = call.name.as_str(), + "[agent_loop] unknown tool requested" + ); + let msg = format!("Unknown tool: {}", call.name); + emit_failed_completion(&msg).await; + return ToolRunResult { + text: msg, + success: false, + }; + }; + + tracing::debug!( + iteration, + tool = call.name.as_str(), + found = true, + "[agent_loop] executing tool" + ); + + // Scope check: CliRpcOnly tools cannot run in the autonomous agent loop. + if tool.scope() == ToolScope::CliRpcOnly { + tracing::warn!( + iteration, + tool = call.name.as_str(), + "[agent_loop] tool scope is CliRpcOnly — denied in agent loop" + ); + let denied = format!( + "Tool '{}' is only available via explicit CLI/RPC invocation, not in the autonomous agent loop.", + call.name + ); + emit_failed_completion(&denied).await; + return ToolRunResult { + text: denied, + success: false, + }; + } + + // ── External-effect approval gate (#1339, #2135) ── + // Tools whose `external_effect()` returns true route through the + // process-global `ApprovalGate` so the UI can prompt the user before + // `execute()` runs. The gate is `None` when supervised mode is disabled or + // in test envs — behavior matches the pre-#1339 path. + // + // `approval_request_id` carries the persisted row id forward so we can + // stamp the terminal execution outcome onto the same `pending_approvals` + // row after the tool finishes (issue #2135). `None` means the tool was + // either not gated, was session-allowlist-shortcutted, or was denied — + // none of which produce an audit row that needs an "after" entry. + let mut approval_request_id: Option = None; + let mut approval_gate_for_audit: Option< + std::sync::Arc, + > = None; + if tool.external_effect_with_args(&call.arguments) { + if let Some(gate) = crate::openhuman::approval::ApprovalGate::try_global() { + let summary = crate::openhuman::approval::summarize_action(&call.name, &call.arguments); + let redacted = crate::openhuman::approval::redact_args(&call.arguments); + let (outcome, request_id) = + gate.intercept_audited(&call.name, &summary, redacted).await; + match outcome { + crate::openhuman::approval::GateOutcome::Allow => { + approval_request_id = request_id; + if approval_request_id.is_some() { + approval_gate_for_audit = Some(gate); + } + } + crate::openhuman::approval::GateOutcome::Deny { reason } => { + tracing::warn!( + iteration, + tool = call.name.as_str(), + reason = %reason, + "[agent_loop] approval gate denied tool call" + ); + emit_failed_completion(&reason).await; + return ToolRunResult { + text: reason, + success: false, + }; + } + } + } + } + + let tool_deadline = crate::openhuman::tool_timeout::tool_execution_timeout_duration(); + let timeout_secs = crate::openhuman::tool_timeout::tool_execution_timeout_secs(); + let tool_started = std::time::Instant::now(); + let outcome = tokio::time::timeout(tool_deadline, tool.execute(call.arguments.clone())).await; + let elapsed_ms = tool_started.elapsed().as_millis() as u64; + let (result_text, success) = match outcome { + Ok(Ok(r)) => { + let output = r.output(); + let success = !r.is_error; + if success { + tracing::debug!( + iteration, + tool = call.name.as_str(), + output_len = output.len(), + "[agent_loop] tool succeeded" + ); + let mut scrubbed = scrub_credentials(&output); + let (compacted, tj_stats) = crate::openhuman::tokenjuice::compact_tool_output( + &call.name, + Some(&call.arguments), + &scrubbed, + Some(0), + ); + if tj_stats.applied { + log::debug!( + "[agent_loop] tokenjuice applied tool={} rule={} {}->{} bytes", + call.name, + tj_stats.rule_id, + tj_stats.original_bytes, + tj_stats.compacted_bytes + ); + scrubbed = compacted; + } + + // Per-tool max_result_size_chars cap. When a tool sets it and + // the (post-tokenjuice) body still exceeds the cap, truncate + // here and skip the global payload summarizer for this call — + // the cap is fast and deterministic, the summarizer is the + // fallback for tools that don't know their own size budget. + let mut hit_per_tool_cap = false; + if let Some(cap) = tool.max_result_size_chars() { + let char_count = scrubbed.chars().count(); + if char_count > cap { + let truncated: String = scrubbed.chars().take(cap).collect(); + let dropped = char_count - cap; + log::info!( + "[agent_loop] per-tool cap applied tool={} cap_chars={} original_chars={} dropped_chars={}", + call.name, + cap, + char_count, + dropped, + ); + scrubbed = format!( + "{truncated}\n\n[truncated by tool cap: {dropped} more chars not shown]" + ); + hit_per_tool_cap = true; + } + } + + if !hit_per_tool_cap { + if let Some(summarizer) = payload_summarizer { + log::debug!( + "[agent_loop] payload_summarizer intercepting tool={} bytes={}", + call.name, + scrubbed.len() + ); + match summarizer + .maybe_summarize(&call.name, None, &scrubbed) + .await + { + Ok(Some(payload)) => { + log::info!( + "[agent_loop] payload_summarizer compressed tool={} {}->{} bytes", + call.name, + payload.original_bytes, + payload.summary_bytes + ); + scrubbed = payload.summary; + } + Ok(None) => { + log::debug!( + "[agent_loop] payload_summarizer pass-through tool={} bytes={}", + call.name, + scrubbed.len() + ); + } + Err(e) => { + log::warn!( + "[agent_loop] payload_summarizer error tool={} err={} (passing raw payload through)", + call.name, + e + ); + } + } + } + } + (scrubbed, true) + } else { + // Scrub before logging — a failing tool payload can carry + // credentials / PII, so never log the raw output. + let scrubbed = scrub_credentials(&output); + tracing::warn!( + iteration, + tool = call.name.as_str(), + "[agent_loop] tool returned error: {scrubbed}" + ); + let (compacted, _) = crate::openhuman::tokenjuice::compact_tool_output( + &call.name, + Some(&call.arguments), + &scrubbed, + Some(1), + ); + (format!("Error: {compacted}"), false) + } + } + Ok(Err(e)) => { + crate::core::observability::report_error( + &e, + "tool", + "execute", + &[ + ("tool", call.name.as_str()), + ("outcome", "failed"), + ("iteration", &(iteration + 1).to_string()), + ], + ); + (format!("Error executing {}: {e}", call.name), false) + } + Err(_) => { + let msg = format!( + "tool '{}' timed out after {} seconds", + call.name, timeout_secs + ); + crate::core::observability::report_error( + msg.as_str(), + "tool", + "execute", + &[ + ("tool", call.name.as_str()), + ("outcome", "timeout"), + ("timeout_secs", &timeout_secs.to_string()), + ("iteration", &(iteration + 1).to_string()), + ], + ); + ( + format!( + "Error: tool '{}' timed out after {} seconds", + call.name, timeout_secs + ), + false, + ) + } + }; + progress + .tool_completed( + progress_call_id, + &call.name, + success, + result_text.chars().count(), + elapsed_ms, + iteration_u32, + ) + .await; + // ── Approval audit after-action row (#2135) ──── + // Stamp the terminal status onto the same `pending_approvals` row the gate + // created before execution, so the audit trail carries both the before + // (approval) and after (executed_at + outcome). Best-effort: a write + // failure here is logged but not propagated to the agent. + if let (Some(gate), Some(req_id)) = ( + approval_gate_for_audit.as_ref(), + approval_request_id.as_ref(), + ) { + let exec_outcome = if success { + crate::openhuman::approval::ExecutionOutcome::Success + } else { + crate::openhuman::approval::ExecutionOutcome::Failure + }; + let err_text = if success { + None + } else { + Some(result_text.as_str()) + }; + gate.record_execution(req_id, exec_outcome, err_text); + } + + ToolRunResult { + text: result_text, + success, + } +} diff --git a/src/openhuman/agent/harness/mod.rs b/src/openhuman/agent/harness/mod.rs index 9d3b5bcc2e..1fee0268f7 100644 --- a/src/openhuman/agent/harness/mod.rs +++ b/src/openhuman/agent/harness/mod.rs @@ -24,6 +24,7 @@ pub(crate) mod builtin_definitions; mod credentials; pub mod definition; pub(crate) mod definition_loader; +pub(crate) mod engine; pub mod fork_context; mod instructions; pub mod interrupt; diff --git a/src/openhuman/agent/harness/session/agent_tool_exec.rs b/src/openhuman/agent/harness/session/agent_tool_exec.rs new file mode 100644 index 0000000000..b4e10cfd11 --- /dev/null +++ b/src/openhuman/agent/harness/session/agent_tool_exec.rs @@ -0,0 +1,283 @@ +//! The Agent's per-call tool executor, extracted as a free function so both +//! [`super::types::Agent::execute_tool_call`] and the turn engine's +//! `AgentToolSource` run the exact same path (visibility gate → session policy +//! → per-call permission → pluggable `ToolPolicy` → `execute_with_options` + +//! payload summarizer → per-result byte budget), without one borrowing the +//! `Agent` while the turn observer borrows it mutably. +//! +//! Progress is emitted through a [`ProgressReporter`] (the channel/web flavor), +//! matching the `Agent::turn` events 1:1. + +use std::collections::HashSet; + +use crate::core::event_bus::{publish_global, DomainEvent}; +use crate::openhuman::agent::dispatcher::{ParsedToolCall, ToolExecutionResult}; +use crate::openhuman::agent::harness::engine::ProgressReporter; +use crate::openhuman::agent::harness::payload_summarizer::PayloadSummarizer; +use crate::openhuman::agent::hooks::{self, ToolCallRecord}; +use crate::openhuman::agent::tool_policy::{ + ToolCallContext, ToolPolicy, ToolPolicyDecision, ToolPolicyRequest, +}; +use crate::openhuman::agent_tool_policy::ToolPolicySession; +use crate::openhuman::tools::{Tool, ToolCallOptions}; +use crate::openhuman::util::truncate_with_ellipsis; + +/// Read-only context the Agent tool executor needs, captured up front so it +/// never borrows the `Agent` (whose history/context the turn observer mutates). +pub(super) struct AgentToolExecCtx<'a> { + pub tools: &'a [Box], + pub visible_tool_names: &'a HashSet, + pub tool_policy_session: &'a ToolPolicySession, + pub tool_policy: &'a dyn ToolPolicy, + pub payload_summarizer: Option<&'a dyn PayloadSummarizer>, + pub event_session_id: &'a str, + pub event_channel: &'a str, + pub agent_definition_id: &'a str, + pub prefer_markdown: bool, + pub budget_bytes: usize, +} + +/// Execute one parsed tool call end-to-end with the Agent's semantics, emitting +/// `ToolCallStarted` / `ToolCallCompleted` through `progress`. Returns the +/// result (for history formatting) + the call record (for post-turn hooks). +pub(super) async fn run_agent_tool_call( + ctx: &AgentToolExecCtx<'_>, + progress: &dyn ProgressReporter, + call: &ParsedToolCall, + iteration: usize, +) -> (ToolExecutionResult, ToolCallRecord) { + let started = std::time::Instant::now(); + publish_global(DomainEvent::ToolExecutionStarted { + tool_name: call.name.clone(), + session_id: ctx.event_session_id.to_string(), + }); + // Synthesise a fallback id for prompt-guided (non-native) tool calls so + // downstream consumers always have a stable key to reconcile rows by. + let call_id = call.tool_call_id.clone().unwrap_or_else(|| { + format!( + "turn-{iteration}-{}-{}", + call.name, + uuid::Uuid::new_v4().simple() + ) + }); + progress + .tool_started( + &call_id, + &call.name, + &call.arguments, + (iteration + 1) as u32, + ) + .await; + log::info!("[agent] executing tool: {}", call.name); + + let (raw_result, success) = if !ctx.visible_tool_names.is_empty() + && !ctx.visible_tool_names.contains(&call.name) + { + log::warn!( + "[agent] blocked tool call '{}' — not in visible tool set", + call.name + ); + ( + format!("Tool '{}' is not available to this agent", call.name), + false, + ) + } else if let Some(tool) = ctx.tools.iter().find(|t| t.name() == call.name) { + let session_decision = ctx.tool_policy_session.decision_for(&call.name); + if session_decision.is_denied() { + let required = session_decision + .required_permission + .map(|permission| permission.to_string()) + .unwrap_or_else(|| "unknown".to_string()); + ( + format!( + "Tool '{}' blocked by tool policy: requires {}, channel '{}' allows {}", + call.name, required, ctx.event_channel, session_decision.allowed_permission + ), + false, + ) + } else { + let call_required = tool.permission_level_with_args(&call.arguments); + if call_required > session_decision.allowed_permission { + tracing::debug!( + tool = call.name.as_str(), + call_required = %call_required, + allowed = %session_decision.allowed_permission, + "[agent_loop] tool action blocked by per-call permission check" + ); + ( + format!( + "Tool '{}' action requires {} permission, channel '{}' allows {}", + call.name, + call_required, + ctx.event_channel, + session_decision.allowed_permission + ), + false, + ) + } else { + let context = ToolCallContext::session( + ctx.event_session_id, + ctx.event_channel, + ctx.agent_definition_id.to_string(), + call_id.clone(), + (iteration + 1) as u32, + ); + let mut policy_request = + ToolPolicyRequest::new(call.name.clone(), call.arguments.clone(), context); + if let Some(generated_context) = tool.generated_runtime_context(&call.arguments) { + policy_request = policy_request.with_generated_tool_context(generated_context); + } + let policy_decision = ctx.tool_policy.check(&policy_request).await; + if let Some(reason) = policy_decision.blocking_reason() { + let blocked_action = match &policy_decision { + ToolPolicyDecision::RequireApproval { .. } => "requires approval", + ToolPolicyDecision::Deny { .. } => "denied", + ToolPolicyDecision::Allow => "allowed", + }; + crate::openhuman::tool_registry::denials::record( + call.name.as_str(), + ctx.tool_policy.name(), + blocked_action, + reason, + ); + tracing::debug!( + tool = call.name.as_str(), + policy = ctx.tool_policy.name(), + action = blocked_action, + reason = %reason, + "[agent_loop] tool blocked by policy" + ); + ( + format!( + "Tool '{}' {blocked_action} by policy '{}': {reason}", + call.name, + ctx.tool_policy.name() + ), + false, + ) + } else { + let options = ToolCallOptions { + prefer_markdown: ctx.prefer_markdown, + }; + let outcome = tool + .execute_with_options(call.arguments.clone(), options) + .await; + match outcome { + Ok(r) => { + if !r.is_error { + let mut output = r.output_for_llm(ctx.prefer_markdown); + if ctx.prefer_markdown && r.markdown_formatted.is_some() { + log::debug!( + "[agent_loop] tool={} returned markdown payload bytes={}", + call.name, + output.len() + ); + } + if let Some(ps) = ctx.payload_summarizer { + log::debug!( + "[agent_loop] payload_summarizer intercepting tool={} bytes={}", + call.name, + output.len() + ); + match ps.maybe_summarize(&call.name, None, &output).await { + Ok(Some(payload)) => { + log::info!( + "[agent_loop] payload_summarizer compressed tool={} {}->{} bytes", + call.name, + payload.original_bytes, + payload.summary_bytes + ); + output = payload.summary; + } + Ok(None) => { + log::debug!( + "[agent_loop] payload_summarizer pass-through tool={} bytes={}", + call.name, + output.len() + ); + } + Err(e) => { + log::warn!( + "[agent_loop] payload_summarizer error tool={} err={} (passing raw payload through)", + call.name, + e + ); + } + } + } + (output, true) + } else { + ( + format!("Error: {}", r.output_for_llm(ctx.prefer_markdown)), + false, + ) + } + } + Err(e) => (format!("Error executing {}: {e}", call.name), false), + } + } + } + } + } else { + (format!("Unknown tool: {}", call.name), false) + }; + + // Per-result byte budget — the only cache-safe reduction stage (the + // truncated body has never been sent to the backend). + let (result, budget_outcome) = + crate::openhuman::context::apply_tool_result_budget(raw_result, ctx.budget_bytes); + if budget_outcome.truncated { + log::info!( + "[agent_loop] tool_result_budget applied name={} original_bytes={} final_bytes={} dropped_bytes={}", + call.name, + budget_outcome.original_bytes, + budget_outcome.final_bytes, + budget_outcome.original_bytes - budget_outcome.final_bytes + ); + } + + let elapsed_ms = started.elapsed().as_millis() as u64; + publish_global(DomainEvent::ToolExecutionCompleted { + tool_name: call.name.clone(), + session_id: ctx.event_session_id.to_string(), + success, + elapsed_ms, + }); + progress + .tool_completed( + &call_id, + &call.name, + success, + result.chars().count(), + elapsed_ms, + (iteration + 1) as u32, + ) + .await; + log::info!( + "[agent] tool completed: {} success={} elapsed_ms={}", + call.name, + success, + elapsed_ms + ); + log::debug!( + "[agent] tool output for {}: {}", + call.name, + truncate_with_ellipsis(&result, 500) + ); + + let output_summary = hooks::sanitize_tool_output(&result, &call.name, success); + let record = ToolCallRecord { + name: call.name.clone(), + arguments: call.arguments.clone(), + success, + output_summary, + duration_ms: elapsed_ms, + }; + let exec_result = ToolExecutionResult { + name: call.name.clone(), + output: result, + success, + tool_call_id: call.tool_call_id.clone(), + }; + (exec_result, record) +} diff --git a/src/openhuman/agent/harness/session/builder.rs b/src/openhuman/agent/harness/session/builder.rs index 4897e325f6..9c13ead4ca 100644 --- a/src/openhuman/agent/harness/session/builder.rs +++ b/src/openhuman/agent/harness/session/builder.rs @@ -546,9 +546,10 @@ impl AgentBuilder { memory: self .memory .ok_or_else(|| anyhow::anyhow!("memory is required"))?, - tool_dispatcher: self - .tool_dispatcher - .ok_or_else(|| anyhow::anyhow!("tool_dispatcher is required"))?, + tool_dispatcher: std::sync::Arc::from( + self.tool_dispatcher + .ok_or_else(|| anyhow::anyhow!("tool_dispatcher is required"))?, + ), memory_loader: self .memory_loader .unwrap_or_else(|| Box::new(DefaultMemoryLoader::default())), diff --git a/src/openhuman/agent/harness/session/mod.rs b/src/openhuman/agent/harness/session/mod.rs index 16d9ec3c55..4a0b3697e9 100644 --- a/src/openhuman/agent/harness/session/mod.rs +++ b/src/openhuman/agent/harness/session/mod.rs @@ -20,11 +20,14 @@ //! `crate::openhuman::agent`, which re-exports them from this module. //! The child files are an implementation detail. +mod agent_tool_exec; mod builder; pub mod migration; mod runtime; pub(crate) mod transcript; mod turn; +mod turn_checkpoint; +mod turn_engine_adapter; mod types; pub use migration::{migrate_session_layout_if_needed, MigrationOutcome}; diff --git a/src/openhuman/agent/harness/session/turn.rs b/src/openhuman/agent/harness/session/turn.rs index e65b070e56..121816df57 100644 --- a/src/openhuman/agent/harness/session/turn.rs +++ b/src/openhuman/agent/harness/session/turn.rs @@ -18,17 +18,13 @@ //! background archivist fork. use super::transcript; +use super::turn_engine_adapter::{AgentCheckpoint, AgentObserver, AgentToolSource}; use super::types::Agent; -use crate::core::event_bus::{publish_global, DomainEvent}; use crate::openhuman::agent::dispatcher::{ParsedToolCall, ToolExecutionResult}; -use crate::openhuman::agent::error::AgentError; use crate::openhuman::agent::harness; use crate::openhuman::agent::hooks::{self, ToolCallRecord, TurnContext}; use crate::openhuman::agent::memory_loader::collect_recall_citations; use crate::openhuman::agent::progress::AgentProgress; -use crate::openhuman::agent::tool_policy::{ - ToolCallContext, ToolPolicyDecision, ToolPolicyRequest, -}; use crate::openhuman::agent_experience::{ prepend_experience_block, render_experience_hits, AgentExperienceStore, ExperienceQuery, }; @@ -36,19 +32,14 @@ use crate::openhuman::agent_tool_policy::render_tool_policy_boundary; use crate::openhuman::context::prompt::{ LearnedContextData, NamespaceSummary, PromptContext, PromptTool, }; -use crate::openhuman::context::{ReductionOutcome, ARCHIVIST_EXTRACTION_PROMPT}; -use crate::openhuman::inference::model_context::context_window_for_model; +use crate::openhuman::context::ARCHIVIST_EXTRACTION_PROMPT; use crate::openhuman::inference::provider::{ ChatMessage, ChatRequest, ConversationMessage, ProviderDelta, UsageInfo, }; use crate::openhuman::memory::MemoryCategory; -use crate::openhuman::tools::traits::ToolCallOptions; use crate::openhuman::tools::Tool; use crate::openhuman::util::truncate_with_ellipsis; -use crate::openhuman::agent::harness::token_budget::{ - trim_chat_messages_to_budget, trim_conversation_history_to_budget, -}; use anyhow::Result; use std::hash::{Hash, Hasher}; use std::sync::Arc; @@ -62,12 +53,7 @@ use std::sync::Arc; /// detect those at the `ChatMessage` boundary (where `bound_cached_transcript_messages` /// operates) we have to peek inside the JSON. See TAURI-RUST-7 for the /// failure mode this guards against. -#[path = "turn_checkpoint.rs"] -mod turn_checkpoint; -use turn_checkpoint::{ - assistant_message_has_tool_calls, build_deterministic_checkpoint, - MAX_ITER_CHECKPOINT_INSTRUCTION, -}; +use super::turn_checkpoint::{assistant_message_has_tool_calls, MAX_ITER_CHECKPOINT_INSTRUCTION}; impl Agent { /// Executes a single interaction "turn" with the agent. @@ -459,704 +445,132 @@ impl Agent { // background archivist fork at end-of-turn. self.context.tick_turn(); - // Collect tool call records across all iterations for post-turn hooks - let mut all_tool_records: Vec = Vec::new(); - - // Trim-robust digest of THIS turn's tool calls + results, compiled as - // the loop runs. Used as the *only* context for the max-iteration - // checkpoint summary, so it compiles "what I did this turn" without - // the prior conversation or system prompt bleeding in — and it's - // immune to history trimming (which drops/reorders from the front). - // The persisted transcript is unaffected (bug-report-2026-05-26 A1). - // Bounded: each entry truncates the result to 800 chars, so at the - // default 10-iteration cap the digest is ~8 KB — revisit if - // `max_tool_iterations` is raised substantially. - let mut turn_tool_digest = String::new(); - - // Capture the last `Vec` sent to the provider so we - // can persist it as a session transcript after the turn completes. - let mut last_provider_messages: Option> = None; - - // Accumulate usage stats across iterations for the transcript. - let mut cumulative_input_tokens: u64 = 0; - let mut cumulative_output_tokens: u64 = 0; - let mut cumulative_cached_input_tokens: u64 = 0; - let mut cumulative_charged_usd: f64 = 0.0; - - // Per-turn usage from the final provider response, attached to the - // last assistant message in the persisted transcript. - let mut last_turn_usage: Option = None; - let turn_body = async { - for iteration in 0..self.config.max_tool_iterations { - self.emit_progress(AgentProgress::IterationStarted { - iteration: (iteration + 1) as u32, - max_iterations: self.config.max_tool_iterations as u32, - }) - .await; - log::info!( - "[agent_loop] iteration start i={} history_len={}", - iteration + 1, - self.history.len() - ); - - if let Some(context_window) = context_window_for_model(&effective_model) { - let budget_outcome = - trim_conversation_history_to_budget(&mut self.history, context_window); - if budget_outcome.trimmed { - log::warn!( - "[agent_loop] pre-dispatch history trimmed model={} context_window={} original_tokens={} final_tokens={} messages_removed={}", - effective_model, - context_window, - budget_outcome.original_tokens, - budget_outcome.final_tokens, - budget_outcome.messages_removed - ); - } - } - - // Global context management: run the reduction chain - // before every provider hit. Cheap when the guard is - // healthy; executes the summarizer LLM call - // internally when the pipeline asks for autocompaction - // (summarization, microcompact, and the circuit - // breaker all live inside [`ContextManager`]). - let outcome = self.context.reduce_before_call(&mut self.history).await?; - match &outcome { - ReductionOutcome::NoOp => {} - ReductionOutcome::Microcompacted { - envelopes_cleared, - entries_cleared, - bytes_freed, - } => { - log::info!( - "[agent_loop] context microcompact i={} envelopes={} entries={} bytes_freed={}", - iteration + 1, - envelopes_cleared, - entries_cleared, - bytes_freed - ); - } - ReductionOutcome::Summarized(stats) => { - log::info!( - "[agent_loop] context autocompact summarized i={} messages_removed={} approx_tokens_freed={} summary_chars={}", - iteration + 1, - stats.messages_removed, - stats.approx_tokens_freed, - stats.summary_chars - ); - } - ReductionOutcome::SummarizationFailed { - utilisation_pct, - reason, - } => { - log::warn!( - "[agent_loop] context summarizer failed i={} utilisation_pct={} reason={}", - iteration + 1, - utilisation_pct, - reason - ); - } - ReductionOutcome::NotAttempted { utilisation_pct } => { - log::warn!( - "[agent_loop] context autocompact disabled in config i={} utilisation_pct={}", - iteration + 1, - utilisation_pct - ); - } - ReductionOutcome::Exhausted { - utilisation_pct, - reason, - } => { - log::error!( - "[agent_loop] context exhausted i={} utilisation_pct={} reason={}", - iteration + 1, - utilisation_pct, - reason - ); - return Err(anyhow::anyhow!( - "Context window exhausted ({utilisation_pct}% full): {reason}" - )); - } - } - - // Use cached transcript messages on the first iteration of - // a resumed session to provide a byte-identical prefix for - // KV cache reuse. After `.take()` the cache is consumed; - // subsequent iterations rebuild from history normally. - let mut messages = if let Some(mut cached) = self.cached_transcript_messages.take() - { - // Append only the delta (new user message) from the - // end of the current history. - let new_tail = self.tool_dispatcher.to_provider_messages( - &self.history[self.history.len().saturating_sub(1)..], - ); - cached.extend(new_tail); - log::info!( - "[transcript] resumed from cached transcript prefix_len={} new_tail={}", - cached.len() - 1, - 1 - ); - cached - } else { - self.tool_dispatcher.to_provider_messages(&self.history) - }; - if let Some(context_window) = context_window_for_model(&effective_model) { - let budget_outcome = - trim_chat_messages_to_budget(&mut messages, context_window); - if budget_outcome.trimmed { - log::warn!( - "[agent_loop] pre-dispatch provider messages trimmed model={} context_window={} original_tokens={} final_tokens={} messages_removed={}", - effective_model, - context_window, - budget_outcome.original_tokens, - budget_outcome.final_tokens, - budget_outcome.messages_removed - ); - } - } - - last_provider_messages = Some(messages.clone()); - - log::info!( - "[agent] iteration {}/{} — sending request to provider model={}", - iteration + 1, - self.config.max_tool_iterations, - effective_model - ); - log::info!( - "[agent_loop] provider request i={} messages={} send_tool_specs={}", - iteration + 1, - messages.len(), - self.tool_dispatcher.should_send_tool_specs() - ); - let provider_started = std::time::Instant::now(); - // Only set up the streaming sink when someone is - // listening for progress events. Without a listener the - // channel buffer would fill up and back-pressure the - // provider; skipping it also keeps the non-streaming - // HTTP path alive for providers that don't implement - // SSE. - let iteration_for_stream = (iteration + 1) as u32; - let (delta_tx_opt, delta_forwarder) = if self.on_progress.is_some() { - let (tx, mut rx) = tokio::sync::mpsc::channel::(128); - let progress_tx = self.on_progress.clone(); - let forwarder = tokio::spawn(async move { - while let Some(event) = rx.recv().await { - let Some(ref sink) = progress_tx else { - continue; - }; - let mapped = match event { - ProviderDelta::TextDelta { delta } => AgentProgress::TextDelta { - delta, - iteration: iteration_for_stream, - }, - ProviderDelta::ThinkingDelta { delta } => { - AgentProgress::ThinkingDelta { - delta, - iteration: iteration_for_stream, - } - } - ProviderDelta::ToolCallStart { call_id, tool_name } => { - AgentProgress::ToolCallArgsDelta { - call_id, - tool_name, - delta: String::new(), - iteration: iteration_for_stream, - } - } - ProviderDelta::ToolCallArgsDelta { call_id, delta } => { - AgentProgress::ToolCallArgsDelta { - call_id, - tool_name: String::new(), - delta, - iteration: iteration_for_stream, - } - } - }; - // Await backpressure so streamed deltas arrive - // in order and aren't silently dropped when the - // downstream progress bridge is slow. - if sink.send(mapped).await.is_err() { - break; - } - } - }); - (Some(tx), Some(forwarder)) - } else { - (None, None) - }; - let response = match self - .provider - .chat( - ChatRequest { - messages: &messages, - tools: if self.tool_dispatcher.should_send_tool_specs() { - Some(self.visible_tool_specs.as_slice()) - } else { - None - }, - stream: delta_tx_opt.as_ref(), - }, - &effective_model, - self.temperature, - ) - .await - { - Ok(resp) => { - log::info!( - "[agent_loop] provider response i={} elapsed_ms={} text_chars={} native_tool_calls={}", - iteration + 1, - provider_started.elapsed().as_millis(), - resp.text.as_ref().map_or(0, |t| t.chars().count()), - resp.tool_calls.len() - ); - log::debug!("[agent_loop] provider response: {resp:?}"); - // Feed the context manager (guard + - // session-memory token accounting). No-op when - // the provider doesn't return usage. - if let Some(ref usage) = resp.usage { - self.context.record_usage(usage); - // Feed the dashboard tracker. This always records - // (model + usage) when the process-global tracker - // is available — independent of `cost.enabled`, - // which gates budget enforcement only. The call - // is a no-op only when `init_global` has not yet - // run (before bootstrap) or failed; errors are - // logged and swallowed so cost telemetry never - // breaks a turn. - crate::openhuman::cost::record_provider_usage(&effective_model, usage); - cumulative_input_tokens += usage.input_tokens; - cumulative_output_tokens += usage.output_tokens; - cumulative_cached_input_tokens += usage.cached_input_tokens; - cumulative_charged_usd += usage.charged_amount_usd; - // Snapshot this turn's usage so the transcript - // writer can attribute it to the last assistant - // message. - last_turn_usage = Some(transcript::TurnUsage { - model: effective_model.clone(), - usage: transcript::MessageUsage { - input: usage.input_tokens, - output: usage.output_tokens, - cached_input: usage.cached_input_tokens, - cost_usd: usage.charged_amount_usd, - }, - ts: chrono::Utc::now().to_rfc3339(), - }); - } else { - // Missing usage on this iteration: clear any - // snapshot carried from a prior iteration so - // the transcript doesn't attribute stale - // numbers to the final assistant message. - last_turn_usage = None; - } - resp - } - Err(err) => { - drop(delta_tx_opt); - if let Some(handle) = delta_forwarder { - let _ = handle.await; - } - return Err(err); - } - }; - drop(delta_tx_opt); - if let Some(handle) = delta_forwarder { - let _ = handle.await; - } - - let (text, calls) = self.tool_dispatcher.parse_response(&response); - let calls = Self::with_fallback_tool_call_ids(calls, iteration); - log::info!( - "[agent] provider responded — parsed tool_calls={} text_chars={}", - calls.len(), - text.chars().count() - ); - log::info!( - "[agent_loop] parsed response i={} parsed_text_chars={} parsed_tool_calls={}", - iteration + 1, - text.chars().count(), - calls.len() - ); - if calls.is_empty() { - // Capture reasoning_content before response.text is moved. - // Thinking models (DeepSeek-R1, Qwen3, GLM-4) return - // chain-of-thought in this field; the API contract requires - // it to be echoed back verbatim in subsequent turns or it - // returns HTTP 400. We stash it in extra_metadata so - // convert_messages_for_native can include it when building - // the next request's message list. - let turn_reasoning_content = response.reasoning_content.clone(); - let final_text = if text.is_empty() { - response.text.unwrap_or_default() - } else { - text - }; - // Defense-in-depth (bug-report-2026-05-26 A1): a - // completion with no text *and* no tool calls is never a - // valid final answer — it's a degenerate/poisoned - // response. Surfacing it as an error is visible; the old - // behaviour returned `Ok("")`, which rendered as a blank - // reply and silently wedged the thread. - if final_text.trim().is_empty() { - log::warn!( - "[agent_loop] provider returned an empty final response (i={}, no text, no tool calls) — surfacing as error instead of a silent blank reply", - iteration + 1 - ); - // Typed variant so `run_single` can route this - // through `AgentError::skips_sentry()` and demote - // to a `log::info!` instead of escalating to - // Sentry (TAURI-RUST-4JX). The `Display` impl - // still renders the canonical user-facing string - // for UI surfaces, so the user behaviour is - // unchanged. - return Err(AgentError::EmptyProviderResponse { - iteration: iteration + 1, - } - .into()); - } - log::info!( - "[agent] no tool calls — returning final response after {} iteration(s)", - iteration + 1 - ); - log::info!( - "[agent_loop] final response i={} final_chars={} has_reasoning_content={}", - iteration + 1, - final_text.chars().count(), - turn_reasoning_content.is_some() - ); - - self.emit_progress(AgentProgress::TurnCompleted { - iterations: (iteration + 1) as u32, - }) - .await; - - let mut assistant_msg = ChatMessage::assistant(final_text.clone()); - if let Some(rc) = turn_reasoning_content { - // Store reasoning_content in extra_metadata so it - // survives in history and is passed back to the - // provider on the next turn. - assistant_msg.extra_metadata = - Some(serde_json::json!({ "reasoning_content": rc })); - log::debug!( - "[agent_loop] stored reasoning_content in extra_metadata for next turn (chars={})", - assistant_msg - .extra_metadata - .as_ref() - .and_then(|m| m.get("reasoning_content")) - .and_then(|v| v.as_str()) - .map_or(0, |s| s.chars().count()) - ); - } - self.history.push(ConversationMessage::Chat(assistant_msg)); - self.trim_history(); - - // Mirror the final assistant reply into the transcript - // snapshot so the JSONL persisted below captures the - // response (not just the prompt that was sent). - if let Some(ref mut msgs) = last_provider_messages { - msgs.push(ChatMessage::assistant(final_text.clone())); - } - - // Persist the transcript **now** — right after the - // provider response lands — so a crash during hooks - // / memory-extraction / the outer epilogue can't - // lose the assistant's reply. - if let Some(ref messages) = last_provider_messages { - self.persist_session_transcript( - messages, - cumulative_input_tokens, - cumulative_output_tokens, - cumulative_cached_input_tokens, - cumulative_charged_usd, - last_turn_usage.as_ref(), - ); - } - - if self.auto_save { - let summary = truncate_with_ellipsis(&final_text, 100); - let _ = self - .memory - .store("", "assistant_resp", &summary, MemoryCategory::Daily, None) - .await; - } - - // Session-memory tool-call accounting. The actual - // background extraction spawn happens *outside* - // `turn_body` so the spawned task can take an owned - // parent context without fighting the borrow - // checker against `self`. We capture the decision - // here and surface it via the manager's session - // state — the epilogue (below) reads - // `should_extract_session_memory()`. - self.context.record_tool_calls(all_tool_records.len()); - - // Fire post-turn hooks (non-blocking) - if !self.post_turn_hooks.is_empty() { - let ctx = TurnContext { - user_message: user_message.to_string(), - assistant_response: final_text.clone(), - tool_calls: all_tool_records, - turn_duration_ms: turn_started.elapsed().as_millis() as u64, - session_id: Some(self.event_session_id.clone()) - .filter(|session_id| !session_id.trim().is_empty()), - agent_id: Some(self.agent_definition_id.clone()) - .filter(|agent_id| !agent_id.trim().is_empty()), - entrypoint: Some(self.event_channel.clone()) - .filter(|entrypoint| !entrypoint.trim().is_empty()), - iteration_count: iteration + 1, - }; - hooks::fire_hooks(&self.post_turn_hooks, ctx); - } - - return Ok(final_text); - } - - if !text.is_empty() { - log::info!( - "[agent_loop] assistant pre-tool text i={} chars={}", - iteration + 1, - text.chars().count() - ); - // Push the assistant text into history; rendering is - // the caller's responsibility (the CLI loop walks - // `agent.history()` after each turn, sub-agents and - // library consumers get whatever they need through - // the returned value / history accessors). - self.history - .push(ConversationMessage::Chat(ChatMessage::assistant( - text.clone(), - ))); - } - let tool_names: Vec<&str> = calls.iter().map(|call| call.name.as_str()).collect(); - log::info!( - "[agent] dispatching {} tool(s): {:?}", - calls.len(), - tool_names - ); - log::info!( - "[agent_loop] executing tools i={} names={:?}", - iteration + 1, - tool_names - ); - let persisted_tool_calls = - Self::persisted_tool_calls_for_history(&response, &calls, iteration); - log::info!( - "[agent_loop] persisting assistant tool calls i={} persisted_tool_calls={} parsed_tool_calls={}", - iteration + 1, - persisted_tool_calls.len(), - calls.len() - ); - self.history.push(ConversationMessage::AssistantToolCalls { - text: if text.is_empty() { - None - } else { - Some(text.clone()) - }, - tool_calls: persisted_tool_calls, - reasoning_content: response - .reasoning_content - .as_deref() - .map(str::trim) - .filter(|s| !s.is_empty()) - .map(ToString::to_string), - }); - - // Persist the transcript **right after** the provider - // response lands — before executing tools — so if the - // session crashes mid-tool-call we still have the - // assistant's response + tool-call intents on disk. - // Rebuild `last_provider_messages` from the current - // history so the snapshot includes whatever the - // assistant just emitted (plain text + tool calls). - last_provider_messages = - Some(self.tool_dispatcher.to_provider_messages(&self.history)); - if let Some(ref messages) = last_provider_messages { - self.persist_session_transcript( - messages, - cumulative_input_tokens, - cumulative_output_tokens, - cumulative_cached_input_tokens, - cumulative_charged_usd, - last_turn_usage.as_ref(), - ); - } - - let (results, records) = self.execute_tools(&calls, iteration).await; - all_tool_records.extend(records); - log::info!( - "[agent_loop] tool results complete i={} result_count={}", - iteration + 1, - results.len() - ); - for r in &results { - log::info!( - "[agent] tool response name={} success={} output_chars={}", - r.name, - r.success, - r.output.chars().count(), - ); - log::debug!( - "[agent] tool response body name={}: {}", - r.name, - truncate_with_ellipsis(&r.output, 300) - ); - // Record this call in the turn digest (output truncated to - // bound size) for a possible max-iteration checkpoint. - turn_tool_digest.push_str(&format!( - "- {} [{}]: {}\n", - r.name, - if r.success { "ok" } else { "failed" }, - truncate_with_ellipsis(&r.output, 800) - )); - } - log::info!( - "[agent] all tools complete for iteration {} — looping back to provider", - iteration + 1 - ); - let formatted = self.tool_dispatcher.format_results(&results); - self.history.push(formatted); + // Capture everything the engine seams need as locals/clones *before* + // the observer takes `&mut self`, so the borrow checker is happy: + // the tool source + parser + checkpoint hold clones disjoint from + // the `Agent`, and the observer alone borrows it mutably. + let dispatcher = self.tool_dispatcher.clone(); + let provider = self.provider.clone(); + let provider_name = self.event_channel().to_string(); + let temperature = self.temperature; + let max_iterations = self.config.max_tool_iterations; + let multimodal = crate::openhuman::config::MultimodalConfig::default(); + let mut tool_source = AgentToolSource { + tools: self.tools.clone(), + visible_tool_names: self.visible_tool_names.clone(), + tool_policy_session: self.tool_policy_session.clone(), + tool_policy: self.tool_policy.clone(), + payload_summarizer: self.payload_summarizer.clone(), + event_session_id: self.event_session_id().to_string(), + event_channel: self.event_channel().to_string(), + agent_definition_id: self.agent_definition_id.clone(), + prefer_markdown: self.context.prefer_markdown_tool_output(), + budget_bytes: self.context.tool_result_budget_bytes(), + should_send_specs: self.tool_dispatcher.should_send_tool_specs(), + advertised_specs: self.visible_tool_specs.as_ref().clone(), + records: Vec::new(), + }; + let progress = super::super::engine::TurnProgress::new(self.on_progress.clone()); + let parser = super::super::engine::DispatcherParser { + dispatcher: dispatcher.as_ref(), + }; + let checkpoint = AgentCheckpoint { + provider: self.provider.clone(), + dispatcher: self.tool_dispatcher.clone(), + model: effective_model.clone(), + temperature, + on_progress: self.on_progress.clone(), + user_message: user_message.to_string(), + max_iterations, + }; + let cached_prefix = self.cached_transcript_messages.take(); + let mut observer = AgentObserver { + agent: self, + effective_model: effective_model.clone(), + cumulative_input: 0, + cumulative_output: 0, + cumulative_cached: 0, + cumulative_charged: 0.0, + last_turn_usage: None, + cached_prefix, + pending_results: Vec::new(), + did_push_final: false, + }; + let mut buf: Vec = Vec::new(); + + let outcome = super::super::engine::run_turn_engine( + provider.as_ref(), + &mut buf, + &mut tool_source, + &progress, + &mut observer, + &checkpoint, + &parser, + &provider_name, + &effective_model, + temperature, + true, // silent — the channel/UI renders via progress + the return value + &multimodal, + max_iterations, + None, // the web bridge streams via on_progress deltas, not on_delta + ) + .await?; + + // Pull the observer's accounting out, then drop it to release the + // `&mut self` borrow so the epilogue can use `self`. + let did_push_final = observer.did_push_final; + let cumulative_input = observer.cumulative_input; + let cumulative_output = observer.cumulative_output; + let cumulative_cached = observer.cumulative_cached; + let cumulative_charged = observer.cumulative_charged; + let last_turn_usage = observer.last_turn_usage.take(); + drop(observer); + let records = std::mem::take(&mut tool_source.records); + + self.context.record_tool_calls(records.len()); + + // For a clean final response the observer already pushed the + // assistant message + persisted. For a max-iteration checkpoint or + // circuit-breaker halt the engine returned the text without pushing + // it, so finish the history + transcript here (mirrors the old + // final/max-iter branches). + if !did_push_final { + self.history + .push(ConversationMessage::Chat(ChatMessage::assistant( + outcome.text.clone(), + ))); self.trim_history(); - // Flush the transcript again now that tool results have - // been appended — the pre-tool persist above only - // captured the assistant's tool-call intents. A crash - // or early-exit between iterations would otherwise lose - // the tool output from the on-disk session record. - let post_tool_messages = self.tool_dispatcher.to_provider_messages(&self.history); + // Note: the engine already emits `TurnCompleted` on the + // checkpoint exit (and every other terminal path), so we don't + // re-emit it here — doing so would double-fire for the UI. + let messages = self.tool_dispatcher.to_provider_messages(&self.history); self.persist_session_transcript( - &post_tool_messages, - cumulative_input_tokens, - cumulative_output_tokens, - cumulative_cached_input_tokens, - cumulative_charged_usd, + &messages, + cumulative_input, + cumulative_output, + cumulative_cached, + cumulative_charged, last_turn_usage.as_ref(), ); - last_provider_messages = Some(post_tool_messages); - log::info!( - "[agent_loop] iteration end i={} history_len={}", - iteration + 1, - self.history.len() - ); } - // Tool-call iteration cap reached. Instead of aborting the turn - // — which left the persisted transcript on an unterminated tool - // cycle and silently wedged the thread on the next message - // (bug-report-2026-05-26 A1) — emit a *resumable checkpoint*: - // ask the model (tools disabled) to summarize what it did and - // what comes next, persist that as the final assistant message, - // and return it. The full tool-call history stays in the - // transcript, so the user's next message naturally resumes the - // task — no heuristic "continue" detection needed. - log::warn!( - "[agent_loop] reached max tool iterations max={} — emitting resumable checkpoint instead of aborting", - self.config.max_tool_iterations - ); - - let base_messages = last_provider_messages - .clone() - .unwrap_or_else(|| self.tool_dispatcher.to_provider_messages(&self.history)); - // Summarize ONLY this turn's work: feed the compiled tool-call - // digest (no system prompt, no prior conversation), not the full - // conversation. `base_messages` above is still used for the - // transcript persist below, so the saved transcript is unchanged - // (bug-report-2026-05-26 A1). `user_message` below is the - // `turn(&mut self, message: &str)` parameter (the turn's request). - let turn_summary_input = vec![ChatMessage::user(format!( - "You were working on this user request:\n{user_message}\n\nHere are the tool calls you made this turn and their results — compile your checkpoint from these:\n{}", - if turn_tool_digest.is_empty() { - "(no tool calls recorded)" - } else { - turn_tool_digest.as_str() - } - ))]; - let checkpoint_iteration = (self.config.max_tool_iterations + 1) as u32; - let (mut checkpoint, checkpoint_usage) = self - .summarize_iteration_checkpoint( - &turn_summary_input, - &effective_model, - checkpoint_iteration, - ) - .await; - - // Fold the checkpoint call's usage into the turn's cumulative - // accounting. The provider call happens regardless of whether we - // keep its prose, so dropping its tokens would undercount the - // turn and mis-attribute the prior iteration's usage to the - // checkpoint message (mirrors the normal final-response path). - if let Some(ref usage) = checkpoint_usage { - self.context.record_usage(usage); - crate::openhuman::cost::record_provider_usage(&effective_model, usage); - cumulative_input_tokens += usage.input_tokens; - cumulative_output_tokens += usage.output_tokens; - cumulative_cached_input_tokens += usage.cached_input_tokens; - cumulative_charged_usd += usage.charged_amount_usd; - last_turn_usage = Some(transcript::TurnUsage { - model: effective_model.clone(), - usage: transcript::MessageUsage { - input: usage.input_tokens, - output: usage.output_tokens, - cached_input: usage.cached_input_tokens, - cost_usd: usage.charged_amount_usd, - }, - ts: chrono::Utc::now().to_rfc3339(), - }); - } else { - // No usage on the checkpoint call: don't attribute a stale - // prior-iteration snapshot to the checkpoint assistant message. - last_turn_usage = None; - } - - if checkpoint.trim().is_empty() { - log::warn!("[agent_loop] checkpoint summary empty — using deterministic fallback"); - checkpoint = build_deterministic_checkpoint( - &all_tool_records, - self.config.max_tool_iterations, - ); + // Auto-save a short memory of the final reply (not on a capped turn, + // matching the prior behavior). + if self.auto_save && !outcome.hit_cap { + let summary = truncate_with_ellipsis(&outcome.text, 100); + let _ = self + .memory + .store("", "assistant_resp", &summary, MemoryCategory::Daily, None) + .await; } - log::info!( - "[agent_loop] max-iter checkpoint emitted chars={}", - checkpoint.chars().count() - ); - - self.emit_progress(AgentProgress::TurnCompleted { - iterations: self.config.max_tool_iterations as u32, - }) - .await; - - self.history - .push(ConversationMessage::Chat(ChatMessage::assistant( - checkpoint.clone(), - ))); - self.trim_history(); - - // Persist the checkpoint so the transcript ends on a - // well-formed assistant message (never a dangling tool cycle). - // Note: `base_messages` ends before the final (capped) iteration's - // tool results — those landed after the last `last_provider_messages` - // snapshot — so the persisted transcript omits them. That's fine: - // the checkpoint prose covers the work done, and the transcript - // stays structurally correct (ends on an assistant message). - let mut checkpoint_messages = base_messages; - checkpoint_messages.push(ChatMessage::assistant(checkpoint.clone())); - self.persist_session_transcript( - &checkpoint_messages, - cumulative_input_tokens, - cumulative_output_tokens, - cumulative_cached_input_tokens, - cumulative_charged_usd, - last_turn_usage.as_ref(), - ); - - self.context.record_tool_calls(all_tool_records.len()); - // Fire post-turn hooks with the checkpoint as the assistant - // response (mirrors the normal final-response path). + // Fire post-turn hooks (non-blocking). if !self.post_turn_hooks.is_empty() { let ctx = TurnContext { user_message: user_message.to_string(), - assistant_response: checkpoint.clone(), - tool_calls: all_tool_records, + assistant_response: outcome.text.clone(), + tool_calls: records, turn_duration_ms: turn_started.elapsed().as_millis() as u64, session_id: Some(self.event_session_id.clone()) .filter(|session_id| !session_id.trim().is_empty()), @@ -1164,12 +578,12 @@ impl Agent { .filter(|agent_id| !agent_id.trim().is_empty()), entrypoint: Some(self.event_channel.clone()) .filter(|entrypoint| !entrypoint.trim().is_empty()), - iteration_count: self.config.max_tool_iterations, + iteration_count: outcome.iterations as usize, }; hooks::fire_hooks(&self.post_turn_hooks, ctx); } - Ok(checkpoint) + Ok(outcome.text) }; // end of `turn_body` async block // Run the turn body inside the parent-execution-context scope so @@ -1283,282 +697,25 @@ impl Agent { call: &ParsedToolCall, iteration: usize, ) -> (ToolExecutionResult, ToolCallRecord) { - let started = std::time::Instant::now(); - publish_global(DomainEvent::ToolExecutionStarted { - tool_name: call.name.clone(), - session_id: self.event_session_id().to_string(), - }); - // Synthesise a fallback id for prompt-guided (non-native) tool - // calls so downstream consumers always have a stable key to - // reconcile tool_call / tool_args_delta / tool_result rows by. - // A random uuid guarantees uniqueness even when the same tool - // name appears multiple times in the same iteration's parsed - // calls. - let call_id = call.tool_call_id.clone().unwrap_or_else(|| { - format!( - "turn-{iteration}-{}-{}", - call.name, - uuid::Uuid::new_v4().simple() - ) - }); - self.emit_progress(AgentProgress::ToolCallStarted { - call_id: call_id.clone(), - tool_name: call.name.clone(), - arguments: call.arguments.clone(), - iteration: (iteration + 1) as u32, - }) - .await; - log::info!("[agent] executing tool: {}", call.name); - log::info!("[agent_loop] tool start name={}", call.name); - - let (raw_result, success) = if !self.visible_tool_names.is_empty() - && !self.visible_tool_names.contains(&call.name) - { - log::warn!( - "[agent] blocked tool call '{}' — not in visible tool set", - call.name - ); - ( - format!("Tool '{}' is not available to this agent", call.name), - false, - ) - } else if let Some(tool) = self.tools.iter().find(|t| t.name() == call.name) { - let session_decision = self.tool_policy_session.decision_for(&call.name); - if session_decision.is_denied() { - let required = session_decision - .required_permission - .map(|permission| permission.to_string()) - .unwrap_or_else(|| "unknown".to_string()); - ( - format!( - "Tool '{}' blocked by tool policy: requires {}, channel '{}' allows {}", - call.name, - required, - self.event_channel, - session_decision.allowed_permission - ), - false, - ) - } else { - // Per-call args-aware permission check: tools that expose - // multi-level actions (e.g. schedule list vs schedule create) - // set a low static permission_level() so the tool is visible - // on read-capable channels, but declare the true per-action - // level via permission_level_with_args. - let call_required = tool.permission_level_with_args(&call.arguments); - if call_required > session_decision.allowed_permission { - tracing::debug!( - tool = call.name.as_str(), - call_required = %call_required, - allowed = %session_decision.allowed_permission, - "[agent_loop] tool action blocked by per-call permission check" - ); - ( - format!( - "Tool '{}' action requires {} permission, channel '{}' allows {}", - call.name, - call_required, - self.event_channel, - session_decision.allowed_permission - ), - false, - ) - } else { - let context = ToolCallContext::session( - self.event_session_id(), - self.event_channel(), - self.agent_definition_id.to_string(), - call_id.clone(), - (iteration + 1) as u32, - ); - let mut policy_request = - ToolPolicyRequest::new(call.name.clone(), call.arguments.clone(), context); - if let Some(generated_context) = tool.generated_runtime_context(&call.arguments) - { - policy_request = - policy_request.with_generated_tool_context(generated_context); - } - let policy_decision = self.tool_policy.check(&policy_request).await; - if let Some(reason) = policy_decision.blocking_reason() { - let blocked_action = match &policy_decision { - ToolPolicyDecision::RequireApproval { .. } => "requires approval", - ToolPolicyDecision::Deny { .. } => "denied", - ToolPolicyDecision::Allow => "allowed", - }; - crate::openhuman::tool_registry::denials::record( - call.name.as_str(), - self.tool_policy.name(), - blocked_action, - reason, - ); - tracing::debug!( - tool = call.name.as_str(), - policy = self.tool_policy.name(), - action = blocked_action, - reason = %reason, - "[agent_loop] tool blocked by policy" - ); - ( - format!( - "Tool '{}' {blocked_action} by policy '{}': {reason}", - call.name, - self.tool_policy.name() - ), - false, - ) - } else { - // Per-call options: ask the tool for markdown output when the - // context manager is configured to prefer it. Tools that - // implement `execute_with_options` will populate - // `markdown_formatted`; others fall through to the default - // implementation which forwards to `execute`. - let prefer_markdown = self.context.prefer_markdown_tool_output(); - let options = ToolCallOptions { prefer_markdown }; - let outcome = tool - .execute_with_options(call.arguments.clone(), options) - .await; - match outcome { - Ok(r) => { - if !r.is_error { - let mut output = r.output_for_llm(prefer_markdown); - if prefer_markdown && r.markdown_formatted.is_some() { - log::debug!( - "[agent_loop] tool={} returned markdown payload bytes={}", - call.name, - output.len() - ); - } - // Issue #574 — if a payload summarizer is wired - // in (orchestrator session only) and the output - // exceeds the configured threshold, hand it to - // the summarizer sub-agent before it enters - // history. On any failure or below-threshold - // payload, leave `output` untouched and let the - // existing tool_result_budget_bytes truncation - // pipeline handle it downstream. - if let Some(ps) = self.payload_summarizer.as_ref() { - log::debug!( - "[agent_loop] payload_summarizer intercepting tool={} bytes={}", - call.name, - output.len() - ); - match ps.maybe_summarize(&call.name, None, &output).await { - Ok(Some(payload)) => { - log::info!( - "[agent_loop] payload_summarizer compressed tool={} {}->{} bytes", - call.name, - payload.original_bytes, - payload.summary_bytes - ); - output = payload.summary; - } - Ok(None) => { - log::debug!( - "[agent_loop] payload_summarizer pass-through tool={} bytes={}", - call.name, - output.len() - ); - } - Err(e) => { - log::warn!( - "[agent_loop] payload_summarizer error tool={} err={} (passing raw payload through)", - call.name, - e - ); - } - } - } - (output, true) - } else { - ( - format!("Error: {}", r.output_for_llm(prefer_markdown)), - false, - ) - } - } - Err(e) => (format!("Error executing {}: {e}", call.name), false), - } - } - } // end else { // per-call permission ok - } - } else { - (format!("Unknown tool: {}", call.name), false) + // The per-call execution path lives in the shared + // [`super::agent_tool_exec::run_agent_tool_call`] so `Agent::turn` + // (when migrated to the turn engine, via `AgentToolSource`) and any + // direct caller run the identical logic. Progress is emitted through a + // `TurnProgress` over this agent's sink. + let progress = super::super::engine::TurnProgress::new(self.on_progress.clone()); + let ctx = super::agent_tool_exec::AgentToolExecCtx { + tools: &self.tools, + visible_tool_names: &self.visible_tool_names, + tool_policy_session: &self.tool_policy_session, + tool_policy: self.tool_policy.as_ref(), + payload_summarizer: self.payload_summarizer.as_deref(), + event_session_id: self.event_session_id(), + event_channel: self.event_channel(), + agent_definition_id: &self.agent_definition_id, + prefer_markdown: self.context.prefer_markdown_tool_output(), + budget_bytes: self.context.tool_result_budget_bytes(), }; - - // Context pipeline stage 1: apply the per-result byte budget - // *inline* before the result enters history. This is the only - // cache-safe reduction stage — the truncated body has never - // been sent to the backend so it creates no cache invalidation. - // Source the budget from the context manager so it tracks the - // resolved `context.tool_result_budget_bytes` (including any - // env/config overrides) rather than the deprecated - // `agent.tool_result_budget_bytes` field. - let budget_bytes = self.context.tool_result_budget_bytes(); - let (result, budget_outcome) = - crate::openhuman::context::apply_tool_result_budget(raw_result, budget_bytes); - if budget_outcome.truncated { - log::info!( - "[agent_loop] tool_result_budget applied name={} original_bytes={} final_bytes={} dropped_bytes={}", - call.name, - budget_outcome.original_bytes, - budget_outcome.final_bytes, - budget_outcome.original_bytes - budget_outcome.final_bytes - ); - } - - let elapsed_ms = started.elapsed().as_millis() as u64; - publish_global(DomainEvent::ToolExecutionCompleted { - tool_name: call.name.clone(), - session_id: self.event_session_id().to_string(), - success, - elapsed_ms, - }); - self.emit_progress(AgentProgress::ToolCallCompleted { - call_id: call_id.clone(), - tool_name: call.name.clone(), - success, - output_chars: result.chars().count(), - elapsed_ms, - iteration: (iteration + 1) as u32, - }) - .await; - log::info!( - "[agent] tool completed: {} success={} elapsed_ms={}", - call.name, - success, - elapsed_ms - ); - log::debug!( - "[agent] tool output for {}: {}", - call.name, - truncate_with_ellipsis(&result, 500) - ); - log::info!( - "[agent_loop] tool finish name={} elapsed_ms={} output_chars={} success={}", - call.name, - elapsed_ms, - result.chars().count(), - success - ); - - let output_summary = hooks::sanitize_tool_output(&result, &call.name, success); - - let record = ToolCallRecord { - name: call.name.clone(), - arguments: call.arguments.clone(), - success, - output_summary, - duration_ms: elapsed_ms, - }; - - let exec_result = ToolExecutionResult { - name: call.name.clone(), - output: result, - success, - tool_call_id: call.tool_call_id.clone(), - }; - - (exec_result, record) + super::agent_tool_exec::run_agent_tool_call(&ctx, &progress, call, iteration).await } /// Executes multiple tool calls in sequence. diff --git a/src/openhuman/agent/harness/session/turn_engine_adapter.rs b/src/openhuman/agent/harness/session/turn_engine_adapter.rs new file mode 100644 index 0000000000..03dd8e67d9 --- /dev/null +++ b/src/openhuman/agent/harness/session/turn_engine_adapter.rs @@ -0,0 +1,449 @@ +//! Engine seams for the stateful `Agent::turn`. +//! +//! These adapt the `Agent` to the shared [`run_turn_engine`] so web/desktop +//! chat runs the same loop as every other entry point, while preserving the +//! Agent's richer state: typed `ConversationMessage` history (with structured +//! tool calls + round-tripped `reasoning_content`), the `ContextManager` +//! reduction chain, KV-cache transcript prefixes, transcript persistence, and +//! the pluggable `ToolDispatcher` (incl. PFormat). +//! +//! * [`AgentToolSource`] owns `Arc`/value clones of the Agent's tool state +//! (disjoint from the `&mut Agent` the observer holds) and runs each call +//! through the shared [`run_agent_tool_call`], collecting `ToolCallRecord`s. +//! * [`AgentObserver`] borrows the `Agent` mutably: it runs the context +//! reduction + re-materializes the engine's `ChatMessage` buffer from the +//! typed history each iteration, rebuilds the typed history from the engine's +//! per-iteration callbacks, accumulates usage, and persists the transcript. +//! * [`AgentCheckpoint`] summarizes the turn-so-far into a resumable checkpoint +//! when the iteration cap is hit (mirrors `summarize_iteration_checkpoint`). + +use std::collections::HashSet; +use std::sync::Arc; + +use anyhow::Result; +use async_trait::async_trait; + +use super::agent_tool_exec::{run_agent_tool_call, AgentToolExecCtx}; +use super::transcript; +use super::turn_checkpoint::MAX_ITER_CHECKPOINT_INSTRUCTION; +use super::types::Agent; +use crate::openhuman::agent::dispatcher::{ + ParsedToolCall as DispatcherParsedToolCall, ToolDispatcher, ToolExecutionResult, +}; +use crate::openhuman::agent::harness::engine::{ + CheckpointOutcome, CheckpointStrategy, ProgressReporter, ToolRunResult, ToolSource, + TurnObserver, +}; +use crate::openhuman::agent::harness::parse::ParsedToolCall; +use crate::openhuman::agent::harness::payload_summarizer::PayloadSummarizer; +use crate::openhuman::agent::hooks::ToolCallRecord; +use crate::openhuman::agent::progress::AgentProgress; +use crate::openhuman::agent::tool_policy::ToolPolicy; +use crate::openhuman::agent_tool_policy::ToolPolicySession; +use crate::openhuman::context::ReductionOutcome; +use crate::openhuman::inference::model_context::context_window_for_model; +use crate::openhuman::inference::provider::{ + ChatMessage, ChatRequest, ConversationMessage, Provider, ProviderDelta, ToolCall, UsageInfo, +}; +use crate::openhuman::tools::{Tool, ToolSpec}; + +/// Rebuild the persisted `Vec` for an assistant-with-tools history +/// entry: prefer the provider's native calls, else synthesise from the parsed +/// calls (mirrors `Agent::persisted_tool_calls_for_history`). +fn persisted_tool_calls( + native: &[ToolCall], + parsed: &[ParsedToolCall], + results: &[ToolExecutionResult], + iteration: usize, +) -> Vec { + if !native.is_empty() { + return native.to_vec(); + } + // Synthesise from the parsed calls, reusing the *exact* id each result was + // recorded under (`results[i].tool_call_id`) so the persisted assistant + // tool-call id matches its `ToolResults` entry — what the next provider + // turn (and history-fidelity tests) rely on. + parsed + .iter() + .enumerate() + .map(|(idx, c)| { + let id = results + .get(idx) + .and_then(|r| r.tool_call_id.clone()) + .or_else(|| c.id.clone()) + .unwrap_or_else(|| format!("parsed-{}-{}", iteration + 1, idx + 1)); + ToolCall { + id, + name: c.name.clone(), + arguments: c.arguments.to_string(), + } + }) + .collect() +} + +/// Tool source for `Agent::turn`. Owns clones of the Agent's tool state so it +/// doesn't borrow the `Agent` (which [`AgentObserver`] holds mutably). +pub(super) struct AgentToolSource { + pub tools: Arc>>, + pub visible_tool_names: HashSet, + pub tool_policy_session: ToolPolicySession, + pub tool_policy: Arc, + pub payload_summarizer: Option>, + pub event_session_id: String, + pub event_channel: String, + pub agent_definition_id: String, + pub prefer_markdown: bool, + pub budget_bytes: usize, + pub should_send_specs: bool, + pub advertised_specs: Vec, + /// Collected per-call records, drained by the post-loop epilogue for hooks. + pub records: Vec, +} + +#[async_trait] +impl ToolSource for AgentToolSource { + fn request_specs(&self) -> &[ToolSpec] { + if self.should_send_specs { + &self.advertised_specs + } else { + &[] + } + } + + async fn execute_call( + &mut self, + call: &ParsedToolCall, + iteration: usize, + progress: &dyn ProgressReporter, + _progress_call_id: &str, + ) -> ToolRunResult { + // `run_agent_tool_call` takes the dispatcher's `ParsedToolCall` shape; + // convert from the engine's internal one. + let dispatcher_call = DispatcherParsedToolCall { + name: call.name.clone(), + arguments: call.arguments.clone(), + tool_call_id: call.id.clone(), + }; + let ctx = AgentToolExecCtx { + tools: &self.tools, + visible_tool_names: &self.visible_tool_names, + tool_policy_session: &self.tool_policy_session, + tool_policy: self.tool_policy.as_ref(), + payload_summarizer: self.payload_summarizer.as_deref(), + event_session_id: &self.event_session_id, + event_channel: &self.event_channel, + agent_definition_id: &self.agent_definition_id, + prefer_markdown: self.prefer_markdown, + budget_bytes: self.budget_bytes, + }; + let (exec_result, record) = + run_agent_tool_call(&ctx, progress, &dispatcher_call, iteration).await; + self.records.push(record); + ToolRunResult { + text: exec_result.output, + success: exec_result.success, + } + } +} + +/// Turn observer for `Agent::turn`: owns the typed-history rebuild, context +/// management, usage accounting, and transcript persistence. +pub(super) struct AgentObserver<'a> { + pub agent: &'a mut Agent, + pub effective_model: String, + pub cumulative_input: u64, + pub cumulative_output: u64, + pub cumulative_cached: u64, + pub cumulative_charged: f64, + pub last_turn_usage: Option, + /// Cached transcript prefix for KV-cache reuse on a resumed session, + /// consumed on the first iteration. + pub cached_prefix: Option>, + /// Tool results buffered during the per-call loop, flushed to typed history + /// via the dispatcher's `format_results` once the assistant turn lands. + pub pending_results: Vec, + /// Whether the engine reported a clean final response (so the post-loop + /// epilogue knows not to push `outcome.text` itself). + pub did_push_final: bool, +} + +impl AgentObserver<'_> { + fn persist(&mut self) { + let messages = self + .agent + .tool_dispatcher + .to_provider_messages(&self.agent.history); + self.agent.persist_session_transcript( + &messages, + self.cumulative_input, + self.cumulative_output, + self.cumulative_cached, + self.cumulative_charged, + self.last_turn_usage.as_ref(), + ); + } +} + +#[async_trait] +impl TurnObserver for AgentObserver<'_> { + async fn before_dispatch( + &mut self, + buf: &mut Vec, + _iteration: usize, + ) -> Result<()> { + // Pre-dispatch token-budget trim on the typed history. + if let Some(context_window) = context_window_for_model(&self.effective_model) { + super::super::token_budget::trim_conversation_history_to_budget( + &mut self.agent.history, + context_window, + ); + } + // Global context-management reduction chain. + let outcome = self + .agent + .context + .reduce_before_call(&mut self.agent.history) + .await?; + if let ReductionOutcome::Exhausted { + utilisation_pct, + reason, + } = &outcome + { + return Err(anyhow::anyhow!( + "Context window exhausted ({utilisation_pct}% full): {reason}" + )); + } + + // Re-materialize the engine's ChatMessage buffer from the typed + // history. On the first iteration of a resumed session, splice the + // byte-identical cached prefix + the new user-message tail for KV-cache + // reuse; otherwise rebuild from scratch. + let messages = if let Some(mut cached) = self.cached_prefix.take() { + let tail = self.agent.tool_dispatcher.to_provider_messages( + &self.agent.history[self.agent.history.len().saturating_sub(1)..], + ); + cached.extend(tail); + cached + } else { + self.agent + .tool_dispatcher + .to_provider_messages(&self.agent.history) + }; + *buf = messages; + // Second-pass trim on the materialized provider messages (mirrors the + // legacy `Agent::turn`, which trimmed both the typed history and the + // built `ChatMessage` list). + if let Some(context_window) = context_window_for_model(&self.effective_model) { + super::super::token_budget::trim_chat_messages_to_budget(buf, context_window); + } + Ok(()) + } + + fn allow_empty_final(&self) -> bool { + false + } + + fn record_usage(&mut self, model: &str, usage: &UsageInfo) { + self.agent.context.record_usage(usage); + crate::openhuman::cost::record_provider_usage(model, usage); + self.cumulative_input += usage.input_tokens; + self.cumulative_output += usage.output_tokens; + self.cumulative_cached += usage.cached_input_tokens; + self.cumulative_charged += usage.charged_amount_usd; + self.last_turn_usage = Some(transcript::TurnUsage { + model: model.to_string(), + usage: transcript::MessageUsage { + input: usage.input_tokens, + output: usage.output_tokens, + cached_input: usage.cached_input_tokens, + cost_usd: usage.charged_amount_usd, + }, + ts: chrono::Utc::now().to_rfc3339(), + }); + } + + fn on_assistant( + &mut self, + display_text: &str, + _response_text: &str, + reasoning_content: Option<&str>, + native_tool_calls: &[ToolCall], + parsed_calls: &[ParsedToolCall], + iteration: usize, + is_final: bool, + ) { + if is_final { + let mut assistant_msg = ChatMessage::assistant(display_text.to_string()); + if let Some(rc) = reasoning_content { + assistant_msg.extra_metadata = Some(serde_json::json!({ "reasoning_content": rc })); + } + self.agent + .history + .push(ConversationMessage::Chat(assistant_msg)); + self.agent.trim_history(); + self.did_push_final = true; + return; + } + + // Assistant turn with tool calls. Mirror `Agent::turn` exactly: push the + // pre-tool narrative text (if any) as a standalone Chat message, then + // the structured AssistantToolCalls, then the dispatcher-formatted + // results buffered during the per-call loop. + if !display_text.is_empty() { + self.agent + .history + .push(ConversationMessage::Chat(ChatMessage::assistant( + display_text.to_string(), + ))); + } + let tool_calls = persisted_tool_calls( + native_tool_calls, + parsed_calls, + &self.pending_results, + iteration, + ); + self.agent + .history + .push(ConversationMessage::AssistantToolCalls { + text: if display_text.is_empty() { + None + } else { + Some(display_text.to_string()) + }, + tool_calls, + reasoning_content: reasoning_content + .map(str::trim) + .filter(|s| !s.is_empty()) + .map(ToString::to_string), + }); + let results = std::mem::take(&mut self.pending_results); + let formatted = self.agent.tool_dispatcher.format_results(&results); + self.agent.history.push(formatted); + self.agent.trim_history(); + } + + fn on_tool_result( + &mut self, + call_id: &str, + tool_name: &str, + result_text: &str, + success: bool, + _iteration: usize, + ) { + self.pending_results.push(ToolExecutionResult { + name: tool_name.to_string(), + output: result_text.to_string(), + success, + tool_call_id: Some(call_id.to_string()), + }); + } + + fn after_iteration(&mut self, _buf: &[ChatMessage], _iteration: usize) { + self.persist(); + } +} + +/// Max-iteration checkpoint for `Agent::turn`: summarize the turn's tool digest +/// into a resumable checkpoint (streaming text deltas through the progress +/// sink), with a deterministic fallback. +pub(super) struct AgentCheckpoint { + pub provider: Arc, + pub dispatcher: Arc, + pub model: String, + pub temperature: f64, + pub on_progress: Option>, + pub user_message: String, + pub max_iterations: usize, +} + +#[async_trait] +impl CheckpointStrategy for AgentCheckpoint { + async fn on_max_iter(&self, digest: &str, max_iterations: usize) -> Result { + let deterministic = format!( + "I reached the tool-call limit for this turn ({max_iterations} steps), so I paused here.\n\n\ + **Done so far:**\n{digest}\n\ + **Next steps:** I'll continue from here — just reply (e.g. \"continue\") and I'll pick up \ + where I left off." + ); + let mut messages = vec![ChatMessage::user(format!( + "You were working on this user request:\n{}\n\nHere are the tool calls you made this turn \ + and their results — compile your checkpoint from these:\n{}", + self.user_message, digest + ))]; + messages.push(ChatMessage::user(MAX_ITER_CHECKPOINT_INSTRUCTION)); + + let checkpoint_iteration = (self.max_iterations + 1) as u32; + // Stream the checkpoint prose as text deltas (tools disabled). + let (delta_tx_opt, delta_forwarder) = if self.on_progress.is_some() { + let (tx, mut rx) = tokio::sync::mpsc::channel::(128); + let progress_tx = self.on_progress.clone(); + let forwarder = tokio::spawn(async move { + while let Some(event) = rx.recv().await { + let Some(ref sink) = progress_tx else { + continue; + }; + if let ProviderDelta::TextDelta { delta } = event { + if sink + .send(AgentProgress::TextDelta { + delta, + iteration: checkpoint_iteration, + }) + .await + .is_err() + { + break; + } + } + } + }); + (Some(tx), Some(forwarder)) + } else { + (None, None) + }; + + let result = self + .provider + .chat( + ChatRequest { + messages: &messages, + tools: None, + stream: delta_tx_opt.as_ref(), + }, + &self.model, + self.temperature, + ) + .await; + drop(delta_tx_opt); + if let Some(handle) = delta_forwarder { + let _ = handle.await; + } + + match result { + Ok(resp) => { + let usage = resp.usage.clone(); + // Strip any stray tool-call markup; keep only prose. + let (text, calls) = self.dispatcher.parse_response(&resp); + let checkpoint = if !text.trim().is_empty() { + text + } else if calls.is_empty() { + resp.text.unwrap_or_default() + } else { + String::new() + }; + let text = if checkpoint.trim().is_empty() { + deterministic + } else { + checkpoint + }; + Ok(CheckpointOutcome { text, usage }) + } + Err(e) => { + log::warn!("[agent_loop] checkpoint summary call failed: {e:#}"); + Ok(CheckpointOutcome { + text: deterministic, + usage: None, + }) + } + } + } +} diff --git a/src/openhuman/agent/harness/session/types.rs b/src/openhuman/agent/harness/session/types.rs index 651cee9ff0..da821f5a6f 100644 --- a/src/openhuman/agent/harness/session/types.rs +++ b/src/openhuman/agent/harness/session/types.rs @@ -45,7 +45,10 @@ pub struct Agent { pub(super) visible_tool_names: std::collections::HashSet, pub(super) tool_policy_session: ToolPolicySession, pub(super) memory: Arc, - pub(super) tool_dispatcher: Box, + // `Arc` (not `Box`) so the turn engine's parser seam can hold a cheap clone + // of the dispatcher without borrowing the `Agent` (which the turn observer + // borrows mutably) — see `engine::DispatcherParser`. + pub(super) tool_dispatcher: Arc, pub(super) memory_loader: Box, pub(super) config: crate::openhuman::config::AgentConfig, pub(super) model_name: String, diff --git a/src/openhuman/agent/harness/subagent_runner/ops.rs b/src/openhuman/agent/harness/subagent_runner/ops.rs index 166b8dba38..97dd618534 100644 --- a/src/openhuman/agent/harness/subagent_runner/ops.rs +++ b/src/openhuman/agent/harness/subagent_runner/ops.rs @@ -27,16 +27,12 @@ use crate::openhuman::agent::harness::definition::{AgentDefinition, PromptSource use crate::openhuman::agent::harness::{ current_spawn_depth, with_current_sandbox_mode, with_spawn_depth, MAX_SPAWN_DEPTH, }; -use crate::openhuman::agent::progress::AgentProgress; use crate::openhuman::context::prompt::{ render_subagent_system_prompt, PromptContext, PromptTool, SubagentRenderOptions, }; -use crate::openhuman::inference::provider::{ - ChatMessage, ChatRequest, Provider, ProviderDelta, ToolCall, -}; +use crate::openhuman::inference::provider::{ChatMessage, ChatRequest, Provider}; use crate::openhuman::memory_conversations::ConversationMessage; use crate::openhuman::tools::{Tool, ToolCategory, ToolSpec}; -use crate::openhuman::util::truncate_with_ellipsis; /// Prompt suffix injected into every typed sub-agent run. /// @@ -1223,9 +1219,9 @@ async fn run_inner_loop( provider: &dyn Provider, history: &mut Vec, parent_tools: &[Box], - mut extra_tools: Vec>, + extra_tools: Vec>, tool_specs: &[ToolSpec], - mut allowed_names: HashSet, + allowed_names: HashSet, lazy_resolver: Option, model: &str, temperature: f64, @@ -1238,32 +1234,18 @@ async fn run_inner_loop( ) -> Result<(String, usize, AggregatedUsage), SubagentRunError> { // An autonomous skill run (set via `with_autonomous_iter_cap`) lifts the // per-agent cap so sub-agents run until done / the circuit breaker trips. - // Take the larger of the two so a sub-agent that already wants more keeps it. let max_iterations = super::autonomous::autonomous_iter_cap() .map(|cap| cap.max(max_iterations)) .unwrap_or(max_iterations) .max(1); - // Compiled digest of this sub-agent run's tool calls + results, for a - // graceful checkpoint if it hits the iteration cap (mirrors the main - // agent — bug-report-2026-05-26 A1). Accumulated as the loop runs so it's - // robust to history trimming. - let mut run_tool_digest = String::new(); - - // Sub-agent transcript stem — mirrors what - // `persist_subagent_transcript` used to compute on one-shot - // post-loop writes. We compute it once up front so **every - // iteration's** persist call resolves to the same file on disk: - // `{parent_chain}__{unix_ts}_{agent_id}.jsonl`. + // Sub-agent transcript stem — computed once up front so every iteration's + // persist resolves to the same file: `{parent_chain}__{unix_ts}_{agent_id}`. let child_session_key = { let now = std::time::SystemTime::now() .duration_since(std::time::UNIX_EPOCH) .unwrap_or_default(); let unix_ts = now.as_secs(); - // Nanos component + task_id suffix disambiguate sibling sub-agents - // spawned within the same wall-clock second (tests and fan-out - // flows routinely do this, and a shared stem would overwrite the - // earlier sibling's transcript file). let nanos = now.subsec_nanos(); let sanitized: String = agent_id .chars() @@ -1294,47 +1276,15 @@ async fn run_inner_loop( format!("{parent_chain}__{child_session_key}") }; - // ── Text-mode override for integrations_agent ──────────────────────────── - // - // Large Composio toolkits (Notion, Salesforce, HubSpot, GitHub) ship - // per-action JSON schemas that are extraordinarily dense — deeply - // nested object/block types, recursive refs, huge discriminated - // unions. Fireworks-style providers (which the backend forwards to) - // auto-compile every entry in `tools: [...]` into a grammar and - // index rules with a `uint16_t` — max 65 535 rules. Even with the - // upstream fuzzy filter narrowing Notion 48 → 16, a single request - // generates 100 000+ rules and the provider rejects it with 400 - // before generation starts. - // - // The fuzzy filter can't fix this because the bound is per-action, - // not per-toolkit: one Notion schema alone can produce thousands of - // rules. The only client-side lever is to **not send `tools: [...]` - // at all** — the backend has nothing to compile, so no grammar, so - // no ceiling. We then describe the tools in the system prompt as - // prose (XmlToolDispatcher format) and parse `` tags out - // of the model's free-form response text. - // - // Scoped to `integrations_agent` because that's the only path where we - // pass Composio toolkit schemas. Every other typed sub-agent - // (welcome, researcher, summarizer, …) uses small built-in tool - // sets that stay well under the grammar ceiling and benefit from - // native mode's stricter formatting guarantees. + // ── Text-mode override for integrations_agent ── + // Large Composio toolkits compile into provider grammars that blow the + // 65 535-rule ceiling, so for `integrations_agent` we omit `tools: [...]` + // and describe them in the system prompt as prose, parsing `` + // tags out of the model's response. Forcing `request_specs() == &[]` makes + // the engine skip native tools and fall back to its XML parse + batched + // `[Tool results]` path — exactly what text mode needs. let force_text_mode = agent_id == "integrations_agent" && !tool_specs.is_empty(); - - let supports_native = - !force_text_mode && provider.supports_native_tools() && !tool_specs.is_empty(); - let request_tools = if supports_native { - Some(tool_specs) - } else { - None - }; - if force_text_mode { - // Append the XML tool protocol + available-tool list to the - // existing system prompt. `history[0]` is the system message - // built by `run_typed_mode` upstream; we - // augment it in-place so the model learns the call format for - // this session without an extra message round-trip. if let Some(sys) = history.iter_mut().find(|m| m.role == "system") { sys.content.push_str("\n\n"); sys.content @@ -1348,24 +1298,282 @@ async fn run_inner_loop( ); } - let mut usage = AggregatedUsage::default(); + let advertised_specs: Vec = if force_text_mode { + Vec::new() + } else { + tool_specs.to_vec() + }; - // Per-iteration transcript persistence. Mirrors the main-agent - // turn loop: right after each provider response lands (and again - // after the final response is pushed) we flush the full history - // to disk. A crash during tool execution no longer erases the - // sub-agent's response — the bytes are on disk before any tool - // runs. Best-effort: write failures are logged at `debug` and the - // loop continues. - let persist_transcript = |history: &[ChatMessage], usage: &AggregatedUsage| { + let mut tool_source = SubagentToolSource { + parent_tools, + extra_tools, + allowed_names, + lazy_resolver, + advertised_specs, + handoff_cache, + policy: crate::openhuman::tools::policy::DefaultToolPolicy, + agent_id: agent_id.to_string(), + }; + let mut observer = SubagentObserver { + worker_thread_id, + workspace_dir: parent.workspace_dir.clone(), + transcript_stem, + agent_id: agent_id.to_string(), + task_id: task_id.to_string(), + force_text_mode, + usage: AggregatedUsage::default(), + }; + let checkpoint = SubagentCheckpoint { + provider, + model: model.to_string(), + temperature, + agent_id: agent_id.to_string(), + }; + let progress = super::super::engine::SubagentProgress { + sink: parent.on_progress.clone(), + agent_id: agent_id.to_string(), + task_id: task_id.to_string(), + }; + + let parser = super::super::engine::DefaultParser; + let outcome = super::super::engine::run_turn_engine( + provider, + history, + &mut tool_source, + &progress, + &mut observer, + &checkpoint, + &parser, + "subagent", + model, + temperature, + true, // silent — sub-agents never echo to stdout + &crate::openhuman::config::MultimodalConfig::default(), + max_iterations, + None, // sub-agents don't stream a draft + ) + .await?; + + Ok((outcome.text, outcome.iterations as usize, observer.usage)) +} + +/// Apply the progressive-disclosure handoff to a tool result. If a cache is +/// present and the (cleaned) result is large and not an error / not from the +/// extractor tool, stash the raw payload and substitute a short placeholder the +/// sub-agent can drill into with `extract_from_result`. Errors and +/// already-extracted output pass through unchanged. +fn apply_handoff( + cache: &ResultHandoffCache, + tool_name: &str, + task_id: &str, + agent_id: &str, + result_text: String, +) -> String { + let skip_cleaning = tool_name == "extract_from_result" || result_text.starts_with("Error"); + let cleaned = if skip_cleaning { + result_text + } else { + let pre_len = result_text.len(); + let cleaned = clean_tool_output(&result_text); + if cleaned.len() < pre_len { + tracing::debug!( + tool = %tool_name, + before_bytes = pre_len, + after_bytes = cleaned.len(), + saved_pct = ((pre_len - cleaned.len()) * 100) / pre_len.max(1), + "[subagent_runner:handoff] cleaned tool output (stripped markup/data-uris/whitespace)" + ); + } + cleaned + }; + let tokens = cleaned.len().div_ceil(4); + if !skip_cleaning && tokens > HANDOFF_OVERSIZE_THRESHOLD_TOKENS { + let id = cache.store(tool_name.to_string(), cleaned.clone()); + let placeholder = build_handoff_placeholder(tool_name, &id, &cleaned); + tracing::info!( + task_id = %task_id, + agent_id = %agent_id, + tool = %tool_name, + raw_tokens = tokens, + raw_bytes = cleaned.len(), + threshold_tokens = HANDOFF_OVERSIZE_THRESHOLD_TOKENS, + result_id = %id, + "[subagent_runner:handoff] stashed oversized tool output; substituted placeholder into history" + ); + placeholder + } else { + cleaned + } +} + +/// Sub-agent [`ToolSource`]: looks up tools in `extra_tools` then the parent +/// registry, lazily registers toolkit actions the fuzzy filter omitted, rejects +/// names outside the allowlist, and routes execution through the shared +/// [`run_one_tool`] (so sub-agents now get the same approval gate, audit, +/// credential scrub, tokenjuice and timeout as the channel loop), then applies +/// the progressive-disclosure handoff. +struct SubagentToolSource<'a> { + parent_tools: &'a [Box], + extra_tools: Vec>, + allowed_names: HashSet, + lazy_resolver: Option, + advertised_specs: Vec, + handoff_cache: Option<&'a ResultHandoffCache>, + policy: crate::openhuman::tools::policy::DefaultToolPolicy, + agent_id: String, +} + +#[async_trait::async_trait] +impl super::super::engine::ToolSource for SubagentToolSource<'_> { + fn request_specs(&self) -> &[ToolSpec] { + &self.advertised_specs + } + + async fn execute_call( + &mut self, + call: &super::super::parse::ParsedToolCall, + iteration: usize, + progress: &dyn super::super::engine::ProgressReporter, + progress_call_id: &str, + ) -> super::super::engine::ToolRunResult { + // Lazy registration: a call for an unknown tool that matches a real + // action slug in the bound toolkit gets built on the spot and admitted + // to the allowlist. The fuzzy top-K filter keeps schemas out of the + // prompt, not out of execution. + if !self.allowed_names.contains(&call.name) { + if let Some(resolver) = self.lazy_resolver.as_ref() { + if let Some(tool) = resolver.resolve(&call.name) { + tracing::info!( + agent_id = %self.agent_id, + tool = %call.name, + "[subagent_runner] lazily registered toolkit action outside fuzzy top-K" + ); + self.allowed_names.insert(tool.name().to_string()); + self.extra_tools.push(tool); + } + } + } + + if !self.allowed_names.contains(&call.name) { + tracing::warn!( + agent_id = %self.agent_id, + tool = %call.name, + "[subagent_runner] tool not in allowlist for this sub-agent" + ); + let iteration_u32 = (iteration + 1) as u32; + progress + .tool_started(progress_call_id, &call.name, &call.arguments, iteration_u32) + .await; + let mut available: Vec<&str> = self.allowed_names.iter().map(|s| s.as_str()).collect(); + if let Some(resolver) = self.lazy_resolver.as_ref() { + available.extend(resolver.known_slugs()); + } + available.sort_unstable(); + available.dedup(); + let text = format!( + "Error: tool '{}' is not available to the {} sub-agent. Available tools: {}", + call.name, + self.agent_id, + available.join(", ") + ); + progress + .tool_completed( + progress_call_id, + &call.name, + false, + text.chars().count(), + 0, + iteration_u32, + ) + .await; + return super::super::engine::ToolRunResult { + text, + success: false, + }; + } + + let tool_opt: Option<&dyn Tool> = self + .extra_tools + .iter() + .find(|t| t.name() == call.name) + .or_else(|| self.parent_tools.iter().find(|t| t.name() == call.name)) + .map(|b| b.as_ref()); + let outcome = super::super::engine::run_one_tool( + tool_opt, + call, + iteration, + progress, + &self.policy, + None, + progress_call_id, + ) + .await; + + let text = match self.handoff_cache { + Some(cache) => apply_handoff(cache, &call.name, "", &self.agent_id, outcome.text), + None => outcome.text, + }; + super::super::engine::ToolRunResult { + text, + success: outcome.success, + } + } +} + +/// Sub-agent [`TurnObserver`]: accumulates usage, persists the per-iteration +/// transcript, and mirrors assistant intents / tool results / final responses +/// to the spawn's worker thread (when one is attached). +struct SubagentObserver { + worker_thread_id: Option, + workspace_dir: std::path::PathBuf, + transcript_stem: String, + agent_id: String, + task_id: String, + force_text_mode: bool, + usage: AggregatedUsage, +} + +impl SubagentObserver { + fn append_worker_message( + &self, + content: String, + sender: String, + extra_metadata: serde_json::Value, + ) { + let Some(ref thread_id) = self.worker_thread_id else { + return; + }; + let message = ConversationMessage { + id: format!("{}:{}", sender, uuid::Uuid::new_v4()), + content, + message_type: "text".to_string(), + extra_metadata, + sender, + created_at: chrono::Utc::now().to_rfc3339(), + }; + if let Err(err) = crate::openhuman::memory_conversations::append_message( + self.workspace_dir.clone(), + thread_id, + message, + ) { + tracing::debug!( + agent_id = %self.agent_id, + thread_id = %thread_id, + error = %err, + "[subagent_runner] failed to append message to worker thread" + ); + } + } + + fn persist_transcript(&self, history: &[ChatMessage]) { let path = match transcript::resolve_keyed_transcript_path( - &parent.workspace_dir, - &transcript_stem, + &self.workspace_dir, + &self.transcript_stem, ) { Ok(p) => p, Err(err) => { tracing::debug!( - agent_id = %agent_id, + agent_id = %self.agent_id, error = %err, "[subagent_runner] failed to resolve transcript path" ); @@ -1374,642 +1582,184 @@ async fn run_inner_loop( }; let now = chrono::Utc::now().to_rfc3339(); let meta = transcript::TranscriptMeta { - agent_name: agent_id.to_string(), + agent_name: self.agent_id.clone(), dispatcher: "native".into(), created: now.clone(), updated: now, turn_count: 1, - input_tokens: usage.input_tokens, - output_tokens: usage.output_tokens, - cached_input_tokens: usage.cached_input_tokens, - charged_amount_usd: usage.charged_amount_usd, + input_tokens: self.usage.input_tokens, + output_tokens: self.usage.output_tokens, + cached_input_tokens: self.usage.cached_input_tokens, + charged_amount_usd: self.usage.charged_amount_usd, thread_id: crate::openhuman::inference::provider::thread_context::current_thread_id(), }; if let Err(err) = transcript::write_transcript(&path, history, &meta, None) { tracing::debug!( - agent_id = %agent_id, + agent_id = %self.agent_id, error = %err, "[subagent_runner] failed to write transcript" ); } - }; - - let append_worker_message = - |content: String, sender: String, extra_metadata: serde_json::Value| { - if let Some(ref thread_id) = worker_thread_id { - let message = ConversationMessage { - id: format!("{}:{}", sender, uuid::Uuid::new_v4()), - content, - message_type: "text".to_string(), - extra_metadata, - sender, - created_at: chrono::Utc::now().to_rfc3339(), - }; - if let Err(err) = crate::openhuman::memory_conversations::append_message( - parent.workspace_dir.clone(), - thread_id, - message, - ) { - tracing::debug!( - agent_id = %agent_id, - thread_id = %thread_id, - error = %err, - "[subagent_runner] failed to append message to worker thread" - ); - } - } - }; - - // Per-turn progress sink shared with the parent — `None` for runs - // that don't have a subscriber (CLI / triage / tests). Cloned upfront - // so the inner loop body doesn't repeatedly re-resolve `parent.on_progress`. - let progress_sink = parent.on_progress.clone(); - - // Repeated-failure circuit breaker (shared guard with run_tool_call_loop): - // halt the subagent with a root cause instead of grinding to - // MaxIterationsExceeded when it re-issues a doomed action or makes no - // progress (e.g. re-running `pip install` that keeps failing PEP 668). - let mut failure_guard = crate::openhuman::agent::harness::tool_loop::RepeatFailureGuard::new(); - let mut halt_reason: Option = None; - for iteration in 0..max_iterations { - tracing::debug!( - task_id = %task_id, - agent_id = %agent_id, - iteration, - history_len = history.len(), - "[subagent_runner] iteration start" - ); - - if let Some(ref tx) = progress_sink { - let _ = tx - .send(AgentProgress::SubagentIterationStarted { - agent_id: agent_id.to_string(), - task_id: task_id.to_string(), - iteration: (iteration + 1) as u32, - max_iterations: max_iterations as u32, - }) - .await; - } - - // Stream the child's tokens to the parent's progress sink so the - // UI can render the sub-agent's thinking/output live, attributed - // to this row via `task_id`. Mirrors the main turn loop - // (`session/turn.rs`): only set up the SSE sink when a listener - // exists, otherwise the channel buffer would back-pressure the - // provider and we'd lose the non-streaming HTTP fast path for - // providers that don't implement streaming. - let child_iteration_for_stream = (iteration + 1) as u32; - let (delta_tx_opt, delta_forwarder) = if let Some(ref sink) = progress_sink { - let (tx, mut rx) = tokio::sync::mpsc::channel::(128); - let sink = sink.clone(); - let agent_id_for_stream = agent_id.to_string(); - let task_id_for_stream = task_id.to_string(); - let forwarder = tokio::spawn(async move { - while let Some(event) = rx.recv().await { - // Only visible text and reasoning deltas attribute to - // the subagent transcript; tool-call arg fragments are - // already surfaced via SubagentToolCall* lifecycle - // events, so they're dropped here to avoid double-render. - let mapped = match event { - ProviderDelta::TextDelta { delta } => AgentProgress::SubagentTextDelta { - agent_id: agent_id_for_stream.clone(), - task_id: task_id_for_stream.clone(), - delta, - iteration: child_iteration_for_stream, - }, - ProviderDelta::ThinkingDelta { delta } => { - AgentProgress::SubagentThinkingDelta { - agent_id: agent_id_for_stream.clone(), - task_id: task_id_for_stream.clone(), - delta, - iteration: child_iteration_for_stream, - } - } - ProviderDelta::ToolCallStart { .. } - | ProviderDelta::ToolCallArgsDelta { .. } => continue, - }; - // Await backpressure so streamed deltas arrive in order - // and aren't silently dropped when the downstream - // progress bridge is slow. - if sink.send(mapped).await.is_err() { - break; - } - } - }); - (Some(tx), Some(forwarder)) - } else { - (None, None) - }; - - let chat_result = provider - .chat( - ChatRequest { - messages: history.as_slice(), - tools: request_tools, - stream: delta_tx_opt.as_ref(), - }, - model, - temperature, - ) - .await; - - // Drop the sender so the forwarder task observes channel close and - // terminates instead of leaking. This must run on BOTH the success - // and error paths — propagating the provider error with `?` before - // joining the forwarder would orphan the task and leak the sender. - drop(delta_tx_opt); - if let Some(forwarder) = delta_forwarder { - let _ = forwarder.await; - } - let resp = chat_result?; + } +} - if let Some(ref u) = resp.usage { - usage.input_tokens += u.input_tokens; - usage.output_tokens += u.output_tokens; - usage.cached_input_tokens += u.cached_input_tokens; - usage.charged_amount_usd += u.charged_amount_usd; - } +#[async_trait::async_trait] +impl super::super::engine::TurnObserver for SubagentObserver { + fn record_usage( + &mut self, + _model: &str, + usage: &crate::openhuman::inference::provider::UsageInfo, + ) { + self.usage.input_tokens += usage.input_tokens; + self.usage.output_tokens += usage.output_tokens; + self.usage.cached_input_tokens += usage.cached_input_tokens; + self.usage.charged_amount_usd += usage.charged_amount_usd; + } - let response_text = resp.text.clone().unwrap_or_default(); - - // In text mode the model emits `{…}` tags - // inline inside `resp.text` (and `resp.tool_calls` is empty - // because we told the provider not to structure them). Parse - // them ourselves via the shared harness helper and synthesise a - // `ToolCall` per parsed block so the rest of the loop can stay - // uniform. - let native_calls: Vec = if force_text_mode { - let (_cleaned, parsed) = super::super::parse::parse_tool_calls(&response_text); - parsed - .into_iter() - .enumerate() - .map(|(i, call)| { - let args_str = if call.arguments.is_null() { - "{}".to_string() - } else { - call.arguments.to_string() - }; - ToolCall { - id: call - .id - .clone() - .unwrap_or_else(|| format!("call_text_{iteration}_{i}")), - name: call.name, - arguments: args_str, - } - }) - .collect() + fn on_assistant( + &mut self, + _display_text: &str, + response_text: &str, + _reasoning_content: Option<&str>, + _native_tool_calls: &[crate::openhuman::inference::provider::ToolCall], + parsed_calls: &[super::super::parse::ParsedToolCall], + iteration: usize, + is_final: bool, + ) { + let tool_calls = parsed_calls.len(); + let extra = if is_final { + serde_json::json!({ + "scope": "worker_thread", + "agent_id": self.agent_id, + "task_id": self.task_id, + "iteration": iteration + 1, + "final": true, + }) } else { - resp.tool_calls.clone() + serde_json::json!({ + "scope": "worker_thread", + "agent_id": self.agent_id, + "task_id": self.task_id, + "iteration": iteration + 1, + "tool_calls": tool_calls, + }) }; + self.append_worker_message(response_text.to_string(), "agent".to_string(), extra); + } - if native_calls.is_empty() { - tracing::debug!( - task_id = %task_id, - agent_id = %agent_id, - iteration, - final_chars = response_text.chars().count(), - "[subagent_runner] no tool calls — returning final response" - ); - history.push(ChatMessage::assistant(response_text.clone())); - append_worker_message( - response_text.clone(), - "agent".to_string(), - serde_json::json!({ - "scope": "worker_thread", - "agent_id": agent_id, - "task_id": task_id, - "iteration": iteration + 1, - "final": true, - }), - ); - // Persist the final response before returning so the - // transcript always captures the last provider reply. - persist_transcript(history, &usage); - return Ok((response_text, iteration + 1, usage)); - } - - // Persist the assistant turn. In native mode use the canonical - // serialiser (wraps text + structured tool_calls for the - // backend's jinja template). In text mode the raw response - // already contains the `` tags inline, so persist it - // verbatim — on the next turn the model sees its own prior - // emissions exactly as it wrote them. - if force_text_mode { - history.push(ChatMessage::assistant(response_text.clone())); - } else { - let assistant_history_content = super::super::parse::build_native_assistant_history( - &response_text, - resp.reasoning_content.as_deref(), - &native_calls, - ); - history.push(ChatMessage::assistant(assistant_history_content)); + fn on_tool_result( + &mut self, + call_id: &str, + tool_name: &str, + result_text: &str, + _success: bool, + iteration: usize, + ) { + // Native mode mirrors each tool result individually; text mode batches + // them in `on_results_batch` instead. + if self.force_text_mode { + return; } - - append_worker_message( - response_text.clone(), - "agent".to_string(), + self.append_worker_message( + result_text.to_string(), + "user".to_string(), serde_json::json!({ "scope": "worker_thread", - "agent_id": agent_id, - "task_id": task_id, + "agent_id": self.agent_id, + "task_id": self.task_id, "iteration": iteration + 1, - "tool_calls": native_calls.len(), + "tool_call_id": call_id, + "tool_name": tool_name, }), ); + } - // Persist the assistant response + tool-call intents **before** - // executing tools. If the session crashes mid-tool-call we - // still have what the model emitted on disk. - persist_transcript(history, &usage); - - // Execute each call, collect outputs. Native mode pushes one - // `role=tool` message per call with the structured `tool_call_id` - // reference. Text mode has no such reference (the model just - // emitted tags in prose), so we batch all results into a single - // user message formatted with `` tags — mirroring - // XmlToolDispatcher's `format_results`. - let mut text_mode_result_block = String::new(); - for call in &native_calls { - let call_started = Instant::now(); - if let Some(ref tx) = progress_sink { - let _ = tx - .send(AgentProgress::SubagentToolCallStarted { - agent_id: agent_id.to_string(), - task_id: task_id.to_string(), - call_id: call.id.clone(), - tool_name: call.name.clone(), - iteration: (iteration + 1) as u32, - }) - .await; - } - - // Lazy registration: if the call is for an unknown tool but - // matches a real action slug in the bound toolkit's full - // catalogue, build the [`ComposioActionTool`] on the spot and - // admit it to the allowlist for this and subsequent turns. - // The fuzzy top-K filter exists to keep schemas out of the - // system prompt, not to gate execution — when the model - // names the slug correctly we should just dispatch. - if !allowed_names.contains(&call.name) { - if let Some(resolver) = lazy_resolver.as_ref() { - if let Some(tool) = resolver.resolve(&call.name) { - tracing::info!( - task_id = %task_id, - agent_id = %agent_id, - tool = %call.name, - "[subagent_runner] lazily registered toolkit action outside fuzzy top-K" - ); - allowed_names.insert(tool.name().to_string()); - extra_tools.push(tool); - } - } - } + fn on_results_batch(&mut self, content: &str, iteration: usize) { + self.append_worker_message( + content.to_string(), + "user".to_string(), + serde_json::json!({ + "scope": "worker_thread", + "agent_id": self.agent_id, + "task_id": self.task_id, + "iteration": iteration + 1, + "mode": "text", + }), + ); + } - let result_text = if !allowed_names.contains(&call.name) { - tracing::warn!( - task_id = %task_id, - agent_id = %agent_id, - tool = %call.name, - "[subagent_runner] tool not in allowlist for this sub-agent" - ); - let mut available: Vec<&str> = allowed_names.iter().map(|s| s.as_str()).collect(); - if let Some(resolver) = lazy_resolver.as_ref() { - available.extend(resolver.known_slugs()); - } - available.sort_unstable(); - available.dedup(); - format!( - "Error: tool '{}' is not available to the {} sub-agent. Available tools: {}", - call.name, - agent_id, - available.join(", ") - ) - } else if let Some(tool) = extra_tools - .iter() - .find(|t| t.name() == call.name) - .or_else(|| parent_tools.iter().find(|t| t.name() == call.name)) - { - let args = parse_tool_arguments(&call.arguments); - let timeout = crate::openhuman::tool_timeout::tool_execution_timeout_duration(); - // ── External-effect approval gate (#1339, #2135) ─ - // Subagents share the same gate as the parent loop; - // see `tool_loop.rs` for the rationale. - // - // When the call is allowed and persisted, we keep - // hold of the `request_id` so we can stamp the - // terminal execution outcome onto the same audit - // row (issue #2135). - let mut approval_request_id: Option = None; - let mut approval_gate_for_audit: Option< - std::sync::Arc, - > = None; - let gate_denial: Option = if tool.external_effect_with_args(&args) { - if let Some(gate) = crate::openhuman::approval::ApprovalGate::try_global() { - let summary = - crate::openhuman::approval::summarize_action(&call.name, &args); - let redacted = crate::openhuman::approval::redact_args(&args); - let (outcome, request_id) = - gate.intercept_audited(&call.name, &summary, redacted).await; - match outcome { - crate::openhuman::approval::GateOutcome::Allow => { - approval_request_id = request_id; - if approval_request_id.is_some() { - approval_gate_for_audit = Some(gate); - } - None - } - crate::openhuman::approval::GateOutcome::Deny { reason } => { - tracing::warn!( - tool = call.name.as_str(), - reason = %reason, - "[subagent_runner] approval gate denied tool call" - ); - Some(reason) - } - } - } else { - None - } - } else { - None - }; + fn after_iteration(&mut self, history: &[ChatMessage], _iteration: usize) { + self.persist_transcript(history); + } +} - if let Some(reason) = gate_denial { - // Prefix as Error so the downstream `call_success` - // computation (`!result_text.starts_with("Error")`) - // marks the denial as a failed tool call in - // progress events and tool_result blocks. - // (CodeRabbit review on PR #2149.) - format!("Error: {reason}") - } else { - let (raw, exec_success) = - match tokio::time::timeout(timeout, tool.execute(args)).await { - Ok(Ok(result)) => { - let raw = result.output(); - if result.is_error { - (format!("Error: {raw}"), false) - } else { - (raw, true) - } - } - Ok(Err(err)) => { - (format!("Error executing {}: {err}", call.name), false) - } - Err(_) => (format!("Error: tool '{}' timed out", call.name), false), - }; - // Stamp the terminal status onto the - // pending_approvals audit row — best-effort, - // failures don't propagate to the agent (#2135). - // Success comes from the structured execute result, - // not from parsing `raw.starts_with("Error")` — a - // legitimate success payload can start with "Error" - // (search hits, copied logs), which would otherwise - // persist a false Failure (CodeRabbit review on #2367). - if let (Some(gate), Some(req_id)) = ( - approval_gate_for_audit.as_ref(), - approval_request_id.as_ref(), - ) { - let success = exec_success; - let exec_outcome = if success { - crate::openhuman::approval::ExecutionOutcome::Success - } else { - crate::openhuman::approval::ExecutionOutcome::Failure - }; - let err_text = if success { None } else { Some(raw.as_str()) }; - gate.record_execution(req_id, exec_outcome, err_text); - } - raw - } - } else { - format!("Unknown tool: {}", call.name) - }; +/// Sub-agent [`CheckpointStrategy`]: when the iteration cap is hit, summarize +/// the run-so-far into a resumable checkpoint (so the delegating agent can +/// continue from partial progress) instead of erroring. Falls back to a +/// deterministic digest summary if the summarization call fails or returns no +/// prose. +struct SubagentCheckpoint<'a> { + provider: &'a dyn Provider, + model: String, + temperature: f64, + agent_id: String, +} - // Progressive-disclosure handoff: if this spawn has a cache - // (integrations_agent-with-toolkit path) and the result is large - // and not itself an error / not from the extractor tool, - // stash the raw payload and replace it in history with a - // short placeholder. The sub-agent can drill in with - // `extract_from_result(result_id=..., query=...)` on the - // next turn. Errors and already-extracted output go through - // unchanged — no point handing off a 200-byte error or an - // already-compressed summary. - // - // Cleaning happens before the size check so HTML-heavy tool - // outputs (Gmail bodies, HTML-embedded Notion blocks) that - // drop below threshold after stripping markup skip the - // extract pipeline entirely. For anything still over - // threshold, the cache stores the cleaned text — chunks see - // real content, not `
` soup. - let result_text = if let Some(cache) = handoff_cache { - let skip_cleaning = - call.name == "extract_from_result" || result_text.starts_with("Error"); - let cleaned = if skip_cleaning { - result_text +#[async_trait::async_trait] +impl super::super::engine::CheckpointStrategy for SubagentCheckpoint<'_> { + async fn on_max_iter( + &self, + digest: &str, + max_iterations: usize, + ) -> anyhow::Result { + let agent_id = &self.agent_id; + let deterministic = format!( + "I reached my tool-call limit ({max_iterations} steps) before finishing this task. \ + Progress so far (tool calls + results):\n{digest}\n\nThe task is incomplete — the above is \ + what I accomplished; continue from here." + ); + let summary_input = vec![ChatMessage::user(format!( + "You are sub-agent `{agent_id}` and reached your tool-call limit before finishing. Here are \ + the tool calls you made and their results — compile a brief progress checkpoint (what you \ + accomplished, what still remains) for the agent that delegated to you. Do not call tools.\n\n{digest}" + ))]; + match self + .provider + .chat( + ChatRequest { + messages: &summary_input, + tools: None, + stream: None, + }, + &self.model, + self.temperature, + ) + .await + { + Ok(resp) => { + let usage = resp.usage.clone(); + let raw = resp.text.unwrap_or_default(); + let (prose, _) = super::super::parse::parse_tool_calls(&raw); + let text = if prose.trim().is_empty() { + deterministic } else { - let pre_len = result_text.len(); - let cleaned = clean_tool_output(&result_text); - if cleaned.len() < pre_len { - tracing::debug!( - tool = %call.name, - before_bytes = pre_len, - after_bytes = cleaned.len(), - saved_pct = ((pre_len - cleaned.len()) * 100) / pre_len.max(1), - "[subagent_runner:handoff] cleaned tool output (stripped markup/data-uris/whitespace)" - ); - } - cleaned + prose }; - let tokens = cleaned.len().div_ceil(4); - if !skip_cleaning && tokens > HANDOFF_OVERSIZE_THRESHOLD_TOKENS { - let id = cache.store(call.name.clone(), cleaned.clone()); - let placeholder = build_handoff_placeholder(&call.name, &id, &cleaned); - tracing::info!( - task_id = %task_id, - agent_id = %agent_id, - tool = %call.name, - raw_tokens = tokens, - raw_bytes = cleaned.len(), - threshold_tokens = HANDOFF_OVERSIZE_THRESHOLD_TOKENS, - result_id = %id, - "[subagent_runner:handoff] stashed oversized tool output; substituted placeholder into history" - ); - placeholder - } else { - cleaned - } - } else { - result_text - }; - - let call_success = !result_text.starts_with("Error"); - let call_output_chars = result_text.chars().count(); - let call_elapsed_ms = call_started.elapsed().as_millis() as u64; - - // Record this call in the run digest (output truncated to bound - // size) for a possible max-iteration checkpoint. - run_tool_digest.push_str(&format!( - "- {} [{}]: {}\n", - call.name, - if call_success { "ok" } else { "failed" }, - truncate_with_ellipsis(&result_text, 800) - )); - - // Repeated-failure circuit breaker (shared guard). `call.arguments` - // is the stable signature; on a trip we stash the root-cause summary - // and bail after this iteration's tool results are recorded. - if let Some(reason) = - failure_guard.record(&call.name, &call.arguments, call_success, &result_text) - { - tracing::warn!( - agent_id = %agent_id, - tool = call.name.as_str(), - "[subagent_runner] circuit breaker tripped — halting with root cause" - ); - halt_reason = Some(reason); + Ok(super::super::engine::CheckpointOutcome { text, usage }) } - - if force_text_mode { - let status = if call_success { "ok" } else { "error" }; - let _ = std::fmt::Write::write_fmt( - &mut text_mode_result_block, - format_args!( - "\n{}\n\n", - call.name, status, result_text - ), - ); - } else { - let tool_msg = serde_json::json!({ - "tool_call_id": call.id, - "content": result_text.clone(), - }); - history.push(ChatMessage::tool(tool_msg.to_string())); - append_worker_message( - result_text.clone(), - "user".to_string(), - serde_json::json!({ - "scope": "worker_thread", - "agent_id": agent_id, - "task_id": task_id, - "iteration": iteration + 1, - "tool_call_id": call.id, - "tool_name": call.name, - }), + Err(e) => { + tracing::warn!( + agent_id = %self.agent_id, + error = %e, + "[subagent_runner] checkpoint summary call failed — using deterministic fallback" ); + Ok(super::super::engine::CheckpointOutcome { + text: deterministic, + usage: None, + }) } - - if let Some(ref tx) = progress_sink { - let _ = tx - .send(AgentProgress::SubagentToolCallCompleted { - agent_id: agent_id.to_string(), - task_id: task_id.to_string(), - call_id: call.id.clone(), - tool_name: call.name.clone(), - success: call_success, - output_chars: call_output_chars, - elapsed_ms: call_elapsed_ms, - iteration: (iteration + 1) as u32, - }) - .await; - } - } - - if force_text_mode && !text_mode_result_block.is_empty() { - let content = format!("[Tool results]\n{text_mode_result_block}"); - history.push(ChatMessage::user(content.clone())); - append_worker_message( - content, - "user".to_string(), - serde_json::json!({ - "scope": "worker_thread", - "agent_id": agent_id, - "task_id": task_id, - "iteration": iteration + 1, - "mode": "text", - }), - ); - } - - // Persist again after tool results have been appended so the - // on-disk transcript reflects each round's complete - // assistant-intent + tool-result pair. Without this, a crash - // between `persist_transcript` at line ~1044 and the next - // iteration's provider call would leave the transcript without - // the tool outputs the next turn will be reasoning from. - persist_transcript(history, &usage); - - // Circuit breaker tripped this iteration: return the root-cause summary - // as the subagent's result (tool results are already in `history`), - // instead of looping to MaxIterationsExceeded and being re-delegated. - if let Some(reason) = halt_reason.take() { - return Ok((reason, iteration + 1, usage)); } } - - // Iteration cap reached. Instead of erroring — which discards all of the - // sub-agent's partial work (the parent just sees "delegate failed") — - // compile a graceful checkpoint of what it accomplished and return it as - // the result, so the calling agent can continue from the partial progress - // (mirrors the main-agent checkpoint — bug-report-2026-05-26 A1). - let digest = if run_tool_digest.is_empty() { - "(no tool calls completed)" - } else { - run_tool_digest.as_str() - }; - let deterministic = format!( - "I reached my tool-call limit ({max_iterations} steps) before finishing this task. \ - Progress so far (tool calls + results):\n{digest}\n\nThe task is incomplete — the above is \ - what I accomplished; continue from here." - ); - let summary_input = vec![ChatMessage::user(format!( - "You are sub-agent `{agent_id}` and reached your tool-call limit before finishing. Here are \ - the tool calls you made and their results — compile a brief progress checkpoint (what you \ - accomplished, what still remains) for the agent that delegated to you. Do not call tools.\n\n{digest}" - ))]; - let checkpoint = match provider - .chat( - ChatRequest { - messages: &summary_input, - tools: None, - stream: None, - }, - model, - temperature, - ) - .await - { - Ok(resp) => { - if let Some(ref u) = resp.usage { - usage.input_tokens += u.input_tokens; - usage.output_tokens += u.output_tokens; - usage.cached_input_tokens += u.cached_input_tokens; - usage.charged_amount_usd += u.charged_amount_usd; - } - // Strip any stray tool-call markup a text-mode model emits; if no - // prose survives, fall back to the deterministic digest. - let raw = resp.text.unwrap_or_default(); - let (prose, _) = super::super::parse::parse_tool_calls(&raw); - if prose.trim().is_empty() { - deterministic - } else { - prose - } - } - Err(e) => { - tracing::warn!( - agent_id = %agent_id, - task_id = %task_id, - error = %e, - "[subagent_runner] checkpoint summary call failed — using deterministic fallback" - ); - deterministic - } - }; - // NB: unlike the main-agent path, this checkpoint is intentionally NOT - // written to a sub-agent transcript — the calling agent's transcript - // captures the delegated result, so there's no data loss. Don't "fix" - // this by adding a `persist_subagent_transcript` call. - Ok((checkpoint, max_iterations, usage)) } fn parse_tool_arguments(arguments: &str) -> serde_json::Value { diff --git a/src/openhuman/agent/harness/tool_loop.rs b/src/openhuman/agent/harness/tool_loop.rs index 26e243bc34..4b9cbfbfbc 100644 --- a/src/openhuman/agent/harness/tool_loop.rs +++ b/src/openhuman/agent/harness/tool_loop.rs @@ -1,28 +1,14 @@ -use crate::openhuman::agent::cost::TurnCost; -use crate::openhuman::agent::multimodal; use crate::openhuman::agent::progress::AgentProgress; -use crate::openhuman::agent::stop_hooks::{current_stop_hooks, StopDecision, TurnState}; -use crate::openhuman::inference::provider::{ - ChatMessage, ChatRequest, Provider, ProviderCapabilityError, ProviderDelta, -}; -use crate::openhuman::tools::policy::{DefaultToolPolicy, PolicyDecision, ToolPolicy}; -use crate::openhuman::tools::traits::ToolScope; +use crate::openhuman::inference::provider::{ChatMessage, Provider}; +use crate::openhuman::tools::policy::{DefaultToolPolicy, ToolPolicy}; use crate::openhuman::tools::Tool; use anyhow::Result; use std::collections::HashSet; -use std::fmt::Write as _; -use std::io::Write as _; -use super::credentials::scrub_credentials; -use super::parse::{build_native_assistant_history, parse_structured_tool_calls, parse_tool_calls}; use super::payload_summarizer::PayloadSummarizer; -use crate::openhuman::context::guard::{ContextCheckResult, ContextGuard}; -use crate::openhuman::inference::model_context::context_window_for_model; - -use super::token_budget::trim_chat_messages_to_budget; /// Minimum characters per chunk when relaying LLM text to a streaming draft. -const STREAM_CHUNK_MIN_CHARS: usize = 80; +pub(crate) const STREAM_CHUNK_MIN_CHARS: usize = 80; /// Default maximum agentic tool-use iterations per user message to prevent runaway loops. /// Used as a safe fallback when `max_tool_iterations` is unset or configured as zero. @@ -272,932 +258,49 @@ pub(crate) async fn run_tool_call_loop( max_tool_iterations }; - // Is a given tool name visible to the model this turn? `None` - // means no filter (legacy behaviour = everything visible). - let is_visible = |name: &str| -> bool { - match visible_tool_names { - Some(set) => set.contains(name), - None => true, - } - }; - - // Filter to visible tools, then dedup by name before sending to the - // provider. Registry tools may collide with per-turn synthesised - // extra_tools (e.g. an `ArchetypeDelegationTool` whose - // `delegate_name = "research"` shadowing a same-named skill). Some - // providers (Anthropic, OpenHuman cloud after the uniqueness-enforcement - // rollout) 400 on duplicate tool names — see TAURI-RUST-4. - let filtered_specs: Vec = tools_registry - .iter() - .chain(extra_tools.iter()) - .filter(|tool| is_visible(tool.name())) - .map(|tool| tool.spec()) - .collect(); - let tool_specs = - crate::openhuman::agent::harness::session::dedup_visible_tool_specs(filtered_specs); - let use_native_tools = provider.supports_native_tools() && !tool_specs.is_empty(); - + // The agentic loop itself now lives in the shared turn engine; this + // function is a thin adapter that builds the channel/CLI tool source + // (registry + per-turn extras, visibility whitelist, pluggable policy) + // and hands off. The signature is retained verbatim so existing callers + // (the `agent.run_turn` bus handler, triage, the payload summarizer, and + // the harness test suite) are unaffected. log::debug!( - "[tool-loop] Registry has {} tool(s), extra {} tool(s), filter={} — {} visible in schema: [{}]", + "[tool-loop] Registry has {} tool(s), extra {} tool(s), filter={}", tools_registry.len(), extra_tools.len(), visible_tool_names .map(|s| format!("whitelist({})", s.len())) .unwrap_or_else(|| "none".to_string()), - tool_specs.len(), - tool_specs - .iter() - .map(|s| s.name.as_str()) - .collect::>() - .join(", ") ); - - let mut context_guard = context_window_for_model(model) - .map(ContextGuard::with_context_window) - .unwrap_or_else(ContextGuard::new); - let mut turn_cost = TurnCost::new(); - - // Announce turn start to progress subscribers (if any). We use - // `send().await` for lifecycle (turn/iteration) events so they - // survive downstream backpressure — dropping one of these would - // desync the web-channel progress bridge. High-volume delta events - // use the same backpressure discipline (see below). - if let Some(ref sink) = on_progress { - if let Err(e) = sink.send(AgentProgress::TurnStarted).await { - log::warn!("[agent_loop] progress sink closed at TurnStarted: {e}"); - } - } - - let stop_hooks = current_stop_hooks(); - // Repeated-failure circuit breaker — halts with a root cause rather than - // grinding to `max_iterations` (shared with the subagent loop). - let mut failure_guard = RepeatFailureGuard::new(); - let mut halt_reason: Option = None; - for iteration in 0..max_iterations { - if let Some(ref sink) = on_progress { - if let Err(e) = sink - .send(AgentProgress::IterationStarted { - iteration: (iteration + 1) as u32, - max_iterations: max_iterations as u32, - }) - .await - { - log::warn!("[agent_loop] progress sink closed at IterationStarted: {e}"); - } - } - - // ── Stop hooks: policy check before the next LLM call ── - if !stop_hooks.is_empty() { - let state = TurnState { - iteration: (iteration + 1) as u32, - max_iterations: max_iterations as u32, - cost: &turn_cost, - model, - }; - for hook in &stop_hooks { - match hook.check(&state).await { - StopDecision::Continue => {} - StopDecision::Stop { reason } => { - tracing::warn!( - iteration = (iteration + 1), - hook = hook.name(), - reason = %reason, - "[agent_loop] stop hook triggered — aborting turn" - ); - anyhow::bail!("Agent turn stopped by hook '{}': {reason}", hook.name()); - } - } - } - } - - // ── Context guard: check utilization before each LLM call ── - match context_guard.check() { - ContextCheckResult::Ok => {} - ContextCheckResult::CompactionNeeded => { - tracing::warn!( - iteration, - "[agent_loop] context guard: compaction needed (>{:.0}% full)", - crate::openhuman::context::guard::COMPACTION_TRIGGER_THRESHOLD * 100.0 - ); - // Compaction is handled by history management upstream; - // log and continue so the caller can act on it. - } - ContextCheckResult::ContextExhausted { - utilization_pct, - reason, - } => { - let msg = format!("Context window exhausted ({utilization_pct}% full): {reason}"); - crate::core::observability::report_error( - msg.as_str(), - "agent", - "context_exhausted", - &[ - ("provider", provider_name), - ("model", model), - ("utilization_pct", &utilization_pct.to_string()), - ], - ); - anyhow::bail!(msg); - } - } - - if let Some(context_window) = context_window_for_model(model) { - let budget_outcome = trim_chat_messages_to_budget(history, context_window); - if budget_outcome.trimmed { - log::warn!( - "[agent_loop] pre-dispatch history trimmed model={} context_window={} original_tokens={} final_tokens={} messages_removed={}", - model, - context_window, - budget_outcome.original_tokens, - budget_outcome.final_tokens, - budget_outcome.messages_removed - ); - } else { - tracing::debug!( - iteration, - model, - context_window, - estimated_tokens = budget_outcome.final_tokens, - "[agent_loop] pre-dispatch token budget ok" - ); - } - } - - tracing::debug!(iteration, "[agent_loop] sending LLM request"); - let image_marker_count = multimodal::count_image_markers(history); - if image_marker_count > 0 && !provider.supports_vision() { - let cap_err = ProviderCapabilityError { - provider: provider_name.to_string(), - capability: "vision".to_string(), - message: format!( - "received {image_marker_count} image marker(s), but this provider does not support vision input" - ), - }; - crate::core::observability::report_error( - &cap_err, - "agent", - "provider_capability", - &[ - ("provider", provider_name), - ("capability", "vision"), - ("model", model), - ], - ); - return Err(cap_err.into()); - } - - let prepared_messages = - multimodal::prepare_messages_for_provider(history, multimodal_config).await?; - - // Unified path via Provider::chat so provider-specific native tool logic - // (OpenAI/Anthropic/OpenRouter/compatible adapters) is honored. - let request_tools = if use_native_tools { - Some(tool_specs.as_slice()) - } else { - None - }; - - // Wire up a ProviderDelta → AgentProgress forwarder for this - // iteration when a progress sink exists. Senders dropped after - // the chat call so the forwarder task exits cleanly. - let iteration_for_stream = (iteration + 1) as u32; - let (delta_tx_opt, delta_forwarder) = if let Some(progress_sink) = on_progress.clone() { - let (tx, mut rx) = tokio::sync::mpsc::channel::(128); - let forwarder = tokio::spawn(async move { - while let Some(event) = rx.recv().await { - let mapped = match event { - ProviderDelta::TextDelta { delta } => AgentProgress::TextDelta { - delta, - iteration: iteration_for_stream, - }, - ProviderDelta::ThinkingDelta { delta } => AgentProgress::ThinkingDelta { - delta, - iteration: iteration_for_stream, - }, - ProviderDelta::ToolCallStart { call_id, tool_name } => { - AgentProgress::ToolCallArgsDelta { - call_id, - tool_name, - delta: String::new(), - iteration: iteration_for_stream, - } - } - ProviderDelta::ToolCallArgsDelta { call_id, delta } => { - AgentProgress::ToolCallArgsDelta { - call_id, - tool_name: String::new(), - delta, - iteration: iteration_for_stream, - } - } - }; - // Await backpressure rather than dropping deltas so - // partial streamed text/args stays consistent with the - // eventual ToolCallStarted / ToolCallCompleted events. - if progress_sink.send(mapped).await.is_err() { - // Downstream closed — abandon the forwarder. - break; - } - } - }); - (Some(tx), Some(forwarder)) - } else { - (None, None) - }; - - let chat_result = provider - .chat( - ChatRequest { - messages: &prepared_messages.messages, - tools: request_tools, - stream: delta_tx_opt.as_ref(), - }, - model, - temperature, - ) - .await; - - drop(delta_tx_opt); - if let Some(handle) = delta_forwarder { - let _ = handle.await; - } - - let (response_text, parsed_text, tool_calls, assistant_history_content, native_tool_calls) = - match chat_result { - Ok(resp) => { - // Update context guard with token usage from this response. - if let Some(ref usage) = resp.usage { - context_guard.update_usage(usage); - turn_cost.add_call(model, usage); - tracing::debug!( - iteration, - input_tokens = usage.input_tokens, - output_tokens = usage.output_tokens, - context_window = usage.context_window, - cumulative_usd = turn_cost.total_usd(), - "[agent_loop] LLM response received" - ); - if let Some(ref sink) = on_progress { - let event = AgentProgress::TurnCostUpdated { - model: model.to_string(), - iteration: (iteration + 1) as u32, - input_tokens: turn_cost.input_tokens, - output_tokens: turn_cost.output_tokens, - cached_input_tokens: turn_cost.cached_input_tokens, - total_usd: turn_cost.total_usd(), - }; - if let Err(e) = sink.send(event).await { - log::warn!( - "[agent_loop] progress sink closed at TurnCostUpdated: {e}" - ); - } - } - } else { - tracing::debug!( - iteration, - "[agent_loop] LLM response received (no usage info)" - ); - } - - let response_text = resp.text_or_empty().to_string(); - let mut calls = parse_structured_tool_calls(&resp.tool_calls); - let mut parsed_text = String::new(); - - if calls.is_empty() { - let (fallback_text, fallback_calls) = parse_tool_calls(&response_text); - if !fallback_text.is_empty() { - parsed_text = fallback_text; - } - calls = fallback_calls; - } - - tracing::debug!( - iteration, - native_tool_calls = resp.tool_calls.len(), - parsed_tool_calls = calls.len(), - "[agent_loop] tool calls parsed" - ); - - // Preserve native tool call IDs in assistant history so role=tool - // follow-up messages can reference the exact call id. - let assistant_history_content = if resp.tool_calls.is_empty() { - response_text.clone() - } else { - build_native_assistant_history( - &response_text, - resp.reasoning_content.as_deref(), - &resp.tool_calls, - ) - }; - - let native_calls = resp.tool_calls; - ( - response_text, - parsed_text, - calls, - assistant_history_content, - native_calls, - ) - } - Err(e) => { - // Transient upstream failures (rate-limit, gateway 5xx, "no - // healthy upstream", etc.) are already classified + retried - // by reliable.rs and produce an aggregate Sentry event only - // when every provider/model is exhausted. Reporting each - // per-iteration provider_chat error here duplicates the - // signal and floods Sentry — see OPENHUMAN-TAURI-3Y/3Z - // (~46 events combined) and the underlying TAURI-2E/84/T - // (~3300 events from raw per-attempt 429/503/504 reports). - let transient = crate::openhuman::inference::provider::reliable::is_rate_limited( - &e, - ) - || crate::openhuman::inference::provider::reliable::is_upstream_unhealthy( - &e, - ); - if transient { - tracing::warn!( - domain = "agent", - operation = "provider_chat", - provider = provider_name, - model = model, - iteration = iteration + 1, - error = %format!("{e:#}"), - "[agent] transient provider_chat failure — retried upstream; \ - aggregated all-providers-exhausted will report if applicable" - ); - } else { - crate::core::observability::report_error_or_expected( - &e, - "agent", - "provider_chat", - &[ - ("provider", provider_name), - ("model", model), - ("iteration", &(iteration + 1).to_string()), - ], - ); - } - return Err(e); - } - }; - - let display_text = if parsed_text.is_empty() { - response_text.clone() - } else { - parsed_text - }; - - if tool_calls.is_empty() { - tracing::debug!( - iteration, - "[agent_loop] no tool calls — returning final response" - ); - // No tool calls — this is the final response. - // If a streaming sender is provided, relay the text in small chunks - // so the channel can progressively update the draft message. - if let Some(ref tx) = on_delta { - // Split on whitespace boundaries, accumulating chunks of at least - // STREAM_CHUNK_MIN_CHARS characters for progressive draft updates. - let mut chunk = String::new(); - for word in display_text.split_inclusive(char::is_whitespace) { - chunk.push_str(word); - if chunk.len() >= STREAM_CHUNK_MIN_CHARS - && tx.send(std::mem::take(&mut chunk)).await.is_err() - { - break; // receiver dropped - } - } - if !chunk.is_empty() { - let _ = tx.send(chunk).await; - } - } - history.push(ChatMessage::assistant(response_text.clone())); - log::info!( - "[agent_loop] turn complete: iters={} provider_calls={} tokens_in={} tokens_out={} cached_in={} usd={:.4}", - (iteration + 1), - turn_cost.call_count, - turn_cost.input_tokens, - turn_cost.output_tokens, - turn_cost.cached_input_tokens, - turn_cost.total_usd(), - ); - if let Some(ref sink) = on_progress { - if let Err(e) = sink - .send(AgentProgress::TurnCompleted { - iterations: (iteration + 1) as u32, - }) - .await - { - log::warn!("[agent_loop] progress sink closed at TurnCompleted: {e}"); - } - } - return Ok(display_text); - } - - // Print any text the LLM produced alongside tool calls (unless silent) - if !silent && !display_text.is_empty() { - print!("{display_text}"); - let _ = std::io::stdout().flush(); - } - - // Execute each tool call and build results. - // `individual_results` tracks per-call output so that native-mode history - // can emit one `role: tool` message per tool call with the correct ID. - let mut tool_results = String::new(); - let mut individual_results: Vec = Vec::new(); - for (call_idx, call) in tool_calls.iter().enumerate() { - // Stable id threaded through the start/complete pair (and - // any preceding args-delta events) so consumers can - // reconcile tool rows by id. The fallback includes - // `call_idx` to stay unique when the same tool name - // appears multiple times in one iteration. - let progress_call_id = call - .id - .clone() - .unwrap_or_else(|| format!("loop-{iteration}-{call_idx}-{}", call.name)); - // Emit `ToolCallStarted` for every parsed call, even ones - // that will be rejected below (approval denied, CliRpcOnly, - // unknown) — the client-side row was created from the - // streamed args and needs a terminal event to resolve. - if let Some(ref sink) = on_progress { - if let Err(e) = sink - .send(AgentProgress::ToolCallStarted { - call_id: progress_call_id.clone(), - tool_name: call.name.clone(), - arguments: call.arguments.clone(), - iteration: (iteration + 1) as u32, - }) - .await - { - log::warn!( - "[agent_loop] progress sink closed while emitting ToolCallStarted: {e}" - ); - } - } - - // Helper: emit a failed `ToolCallCompleted` for an - // early-exit path (denied / CliRpcOnly / unknown) so the - // client row flips to `error` instead of staying running. - let emit_failed_completion = |message: &str| { - let call_id = progress_call_id.clone(); - let tool_name = call.name.clone(); - let output_chars = message.chars().count(); - let iteration_u32 = (iteration + 1) as u32; - let sink_opt = on_progress.clone(); - async move { - if let Some(sink) = sink_opt { - if let Err(e) = sink - .send(AgentProgress::ToolCallCompleted { - call_id, - tool_name, - success: false, - output_chars, - elapsed_ms: 0, - iteration: iteration_u32, - }) - .await - { - log::warn!( - "[agent_loop] progress sink closed while emitting early-exit ToolCallCompleted: {e}" - ); - } - } - } - }; - - // ── Tool policy check (#2131) ───────────────── - // Evaluate the pluggable ToolPolicy before any approval or - // execution. If the policy denies the call, skip everything - // (including approval side-effects) and return the denial - // reason as a tool error to the model. - if let PolicyDecision::Deny(reason) = tool_policy.evaluate(&call.name, &call.arguments) - { - tracing::debug!( - iteration, - tool = call.name.as_str(), - reason = %reason, - "[agent_loop] tool policy denied tool call" - ); - let denied = format!("Tool '{}' denied by policy: {reason}", call.name); - emit_failed_completion(&denied).await; - individual_results.push(denied.clone()); - let _ = writeln!( - tool_results, - "\n{denied}\n", - call.name - ); - // Record so a re-issued identical call halts the turn rather than - // repeating a deterministic policy denial to max_iterations. - if let Some(halt) = - failure_guard.record(&call.name, &call.arguments.to_string(), false, &denied) - { - halt_reason = Some(halt); - } - continue; - } - - // Look up the tool by name in the combined registry + extras, - // subject to the visibility whitelist. If the model hallucinated - // a filtered-out tool name we treat it as unknown — the error - // path below produces a structured error message the LLM can - // correct in the next iteration. - let tool_opt: Option<&dyn Tool> = tools_registry - .iter() - .chain(extra_tools.iter()) - .find(|t| t.name() == call.name && is_visible(t.name())) - .map(|b| b.as_ref()); - tracing::debug!( - iteration, - tool = call.name.as_str(), - found = tool_opt.is_some(), - "[agent_loop] executing tool" - ); - - // Scope check: CliRpcOnly tools cannot run in the autonomous agent loop. - if let Some(tool) = tool_opt { - if tool.scope() == ToolScope::CliRpcOnly { - tracing::warn!( - iteration, - tool = call.name.as_str(), - "[agent_loop] tool scope is CliRpcOnly — denied in agent loop" - ); - let denied = format!( - "Tool '{}' is only available via explicit CLI/RPC invocation, not in the autonomous agent loop.", - call.name - ); - emit_failed_completion(&denied).await; - individual_results.push(denied.clone()); - let _ = writeln!( - tool_results, - "\n{denied}\n", - call.name - ); - if let Some(halt) = failure_guard.record( - &call.name, - &call.arguments.to_string(), - false, - &denied, - ) { - halt_reason = Some(halt); - } - continue; - } - } - - // ── External-effect approval gate (#1339, #2135) ── - // Tools whose `external_effect()` returns true route - // through the process-global `ApprovalGate` so the UI - // can prompt the user before `execute()` runs. The gate - // is `None` when supervised mode is disabled or in test - // envs — behavior matches the pre-#1339 path. - // - // `approval_request_id` carries the persisted row id - // forward so we can stamp the terminal execution - // outcome onto the same `pending_approvals` row after - // the tool finishes (issue #2135). `None` means the - // tool was either not gated (no supervised gate, not - // external-effect), was session-allowlist-shortcutted, - // or was denied — none of which produce an audit row - // that needs an "after" entry. - let mut approval_request_id: Option = None; - let mut approval_gate_for_audit: Option< - std::sync::Arc, - > = None; - if let Some(tool) = tool_opt { - if tool.external_effect_with_args(&call.arguments) { - if let Some(gate) = crate::openhuman::approval::ApprovalGate::try_global() { - let summary = crate::openhuman::approval::summarize_action( - &call.name, - &call.arguments, - ); - let redacted = crate::openhuman::approval::redact_args(&call.arguments); - let (outcome, request_id) = - gate.intercept_audited(&call.name, &summary, redacted).await; - match outcome { - crate::openhuman::approval::GateOutcome::Allow => { - approval_request_id = request_id; - if approval_request_id.is_some() { - approval_gate_for_audit = Some(gate); - } - } - crate::openhuman::approval::GateOutcome::Deny { reason } => { - tracing::warn!( - iteration, - tool = call.name.as_str(), - reason = %reason, - "[agent_loop] approval gate denied tool call" - ); - emit_failed_completion(&reason).await; - individual_results.push(reason.clone()); - let _ = writeln!( - tool_results, - "\n{reason}\n", - call.name - ); - // Record the denial in the shared breaker (the - // gate's `[policy-denied]` marker makes it a - // hard reject) so a re-issued identical call - // halts the turn instead of re-prompting - // forever — the normal record path below is - // skipped by this `continue`. - if let Some(halt) = failure_guard.record( - &call.name, - &call.arguments.to_string(), - false, - &reason, - ) { - halt_reason = Some(halt); - } - continue; - } - } - } - } - } - - let (result, call_succeeded) = if let Some(tool) = tool_opt { - let tool_deadline = - crate::openhuman::tool_timeout::tool_execution_timeout_duration(); - let timeout_secs = crate::openhuman::tool_timeout::tool_execution_timeout_secs(); - let tool_started = std::time::Instant::now(); - let outcome = - tokio::time::timeout(tool_deadline, tool.execute(call.arguments.clone())).await; - let elapsed_ms = tool_started.elapsed().as_millis() as u64; - let (result_text, success) = match outcome { - Ok(Ok(r)) => { - let output = r.output(); - let success = !r.is_error; - if success { - tracing::debug!( - iteration, - tool = call.name.as_str(), - output_len = output.len(), - "[agent_loop] tool succeeded" - ); - let mut scrubbed = scrub_credentials(&output); - let (compacted, tj_stats) = - crate::openhuman::tokenjuice::compact_tool_output( - &call.name, - Some(&call.arguments), - &scrubbed, - Some(0), - ); - if tj_stats.applied { - log::debug!( - "[agent_loop] tokenjuice applied tool={} rule={} {}->{} bytes", - call.name, - tj_stats.rule_id, - tj_stats.original_bytes, - tj_stats.compacted_bytes - ); - scrubbed = compacted; - } - - // Per-tool max_result_size_chars cap. When - // a tool sets it and the (post-tokenjuice) - // body still exceeds the cap, truncate - // here and skip the global payload - // summarizer for this call — the cap is - // fast and deterministic, the summarizer - // is the fallback for tools that don't - // know their own size budget. - let mut hit_per_tool_cap = false; - if let Some(cap) = tool.max_result_size_chars() { - let char_count = scrubbed.chars().count(); - if char_count > cap { - let truncated: String = scrubbed.chars().take(cap).collect(); - let dropped = char_count - cap; - log::info!( - "[agent_loop] per-tool cap applied tool={} cap_chars={} original_chars={} dropped_chars={}", - call.name, - cap, - char_count, - dropped, - ); - scrubbed = format!( - "{truncated}\n\n[truncated by tool cap: {dropped} more chars not shown]" - ); - hit_per_tool_cap = true; - } - } - - if !hit_per_tool_cap { - if let Some(summarizer) = payload_summarizer { - log::debug!( - "[agent_loop] payload_summarizer intercepting tool={} bytes={}", - call.name, - scrubbed.len() - ); - match summarizer - .maybe_summarize(&call.name, None, &scrubbed) - .await - { - Ok(Some(payload)) => { - log::info!( - "[agent_loop] payload_summarizer compressed tool={} {}->{} bytes", - call.name, - payload.original_bytes, - payload.summary_bytes - ); - scrubbed = payload.summary; - } - Ok(None) => { - log::debug!( - "[agent_loop] payload_summarizer pass-through tool={} bytes={}", - call.name, - scrubbed.len() - ); - } - Err(e) => { - log::warn!( - "[agent_loop] payload_summarizer error tool={} err={} (passing raw payload through)", - call.name, - e - ); - } - } - } - } - (scrubbed, true) - } else { - tracing::warn!( - iteration, - tool = call.name.as_str(), - "[agent_loop] tool returned error: {output}" - ); - let scrubbed = scrub_credentials(&output); - let (compacted, _) = crate::openhuman::tokenjuice::compact_tool_output( - &call.name, - Some(&call.arguments), - &scrubbed, - Some(1), - ); - (format!("Error: {compacted}"), false) - } - } - Ok(Err(e)) => { - crate::core::observability::report_error( - &e, - "tool", - "execute", - &[ - ("tool", call.name.as_str()), - ("outcome", "failed"), - ("iteration", &(iteration + 1).to_string()), - ], - ); - (format!("Error executing {}: {e}", call.name), false) - } - Err(_) => { - let msg = format!( - "tool '{}' timed out after {} seconds", - call.name, timeout_secs - ); - crate::core::observability::report_error( - msg.as_str(), - "tool", - "execute", - &[ - ("tool", call.name.as_str()), - ("outcome", "timeout"), - ("timeout_secs", &timeout_secs.to_string()), - ("iteration", &(iteration + 1).to_string()), - ], - ); - ( - format!( - "Error: tool '{}' timed out after {} seconds", - call.name, timeout_secs - ), - false, - ) - } - }; - if let Some(ref sink) = on_progress { - if let Err(e) = sink - .send(AgentProgress::ToolCallCompleted { - call_id: progress_call_id.clone(), - tool_name: call.name.clone(), - success, - output_chars: result_text.chars().count(), - elapsed_ms, - iteration: (iteration + 1) as u32, - }) - .await - { - log::warn!("[agent_loop] progress sink closed while emitting ToolCallCompleted: {e}"); - } - } - // ── Approval audit after-action row (#2135) ──── - // Stamp the terminal status onto the same - // `pending_approvals` row the gate created before - // execution, so the audit trail carries both the - // before (approval) and after (executed_at + - // outcome). Best-effort: a write failure here is - // logged but not propagated to the agent. - if let (Some(gate), Some(req_id)) = ( - approval_gate_for_audit.as_ref(), - approval_request_id.as_ref(), - ) { - let exec_outcome = if success { - crate::openhuman::approval::ExecutionOutcome::Success - } else { - crate::openhuman::approval::ExecutionOutcome::Failure - }; - let err_text = if success { - None - } else { - Some(result_text.as_str()) - }; - gate.record_execution(req_id, exec_outcome, err_text); - } - (result_text, success) - } else { - tracing::warn!( - iteration, - tool = call.name.as_str(), - "[agent_loop] unknown tool requested" - ); - let msg = format!("Unknown tool: {}", call.name); - emit_failed_completion(&msg).await; - (msg, false) - }; - - individual_results.push(result.clone()); - let _ = writeln!( - tool_results, - "\n{}\n", - call.name, result - ); - - // Repeated-failure circuit breaker (shared guard) — halt with a root - // cause instead of grinding to `max_iterations` on a doomed action. - if let Some(reason) = failure_guard.record( - &call.name, - &call.arguments.to_string(), - call_succeeded, - &result, - ) { - tracing::warn!( - iteration, - tool = call.name.as_str(), - "[agent_loop] circuit breaker tripped — halting with root cause" - ); - halt_reason = Some(reason); - } - } - - // Add assistant message with tool calls + tool results to history. - // Native mode: use JSON-structured messages so convert_messages() can - // reconstruct proper OpenAI-format tool_calls and tool result messages. - // Prompt mode: use XML-based text format as before. - history.push(ChatMessage::assistant(assistant_history_content)); - if native_tool_calls.is_empty() { - history.push(ChatMessage::user(format!("[Tool results]\n{tool_results}"))); - } else { - for (native_call, result) in native_tool_calls.iter().zip(individual_results.iter()) { - let tool_msg = serde_json::json!({ - "tool_call_id": native_call.id, - "content": result, - }); - history.push(ChatMessage::tool(tool_msg.to_string())); - } - } - - // Circuit breaker tripped this iteration: return the root-cause summary - // as the agent's result instead of looping to `max_iterations`. The - // tool results are already in `history` above, so the caller still has - // full context if it wants it. - if let Some(reason) = halt_reason.take() { - // Mirror the normal-completion path: emit TurnCompleted before the - // early return, otherwise progress consumers stay "in-flight" - // indefinitely when the circuit breaker trips. - if let Some(ref sink) = on_progress { - if let Err(e) = sink - .send(AgentProgress::TurnCompleted { - iterations: (iteration + 1) as u32, - }) - .await - { - log::warn!("[agent_loop] progress sink closed at TurnCompleted: {e}"); - } - } - return Ok(reason); - } - } - - // Return the typed `AgentError::MaxIterationsExceeded` variant (boxed - // through `anyhow::Error`) so downstream wrappers — notably - // `Agent::run_single` in `harness/session/runtime.rs` — can downcast and - // suppress Sentry emission for this deterministic agent-state outcome - // (OPENHUMAN-TAURI-99 / -98). The `Display` text is preserved verbatim so - // any caller that already inspects the string (UI chat surface, tests) - // continues to work. - Err(anyhow::Error::new( - crate::openhuman::agent::error::AgentError::MaxIterationsExceeded { - max: max_iterations, - }, - )) + let mut tool_source = super::engine::RegistryToolSource::new( + tools_registry, + extra_tools, + visible_tool_names, + tool_policy, + payload_summarizer, + ); + let progress = super::engine::TurnProgress::new(on_progress); + let mut observer = super::engine::NullObserver; + let checkpoint = super::engine::ErrorCheckpoint; + let parser = super::engine::DefaultParser; + super::engine::run_turn_engine( + provider, + history, + &mut tool_source, + &progress, + &mut observer, + &checkpoint, + &parser, + provider_name, + model, + temperature, + silent, + multimodal_config, + max_iterations, + on_delta, + ) + .await + .map(|outcome| outcome.text) } #[cfg(test)] diff --git a/src/openhuman/agent/harness/tool_loop_tests.rs b/src/openhuman/agent/harness/tool_loop_tests.rs index fd4a63ab83..56f2804758 100644 --- a/src/openhuman/agent/harness/tool_loop_tests.rs +++ b/src/openhuman/agent/harness/tool_loop_tests.rs @@ -1,6 +1,6 @@ use super::*; use crate::openhuman::inference::provider::traits::ProviderCapabilities; -use crate::openhuman::inference::provider::ChatResponse; +use crate::openhuman::inference::provider::{ChatRequest, ChatResponse}; use crate::openhuman::tools::{ToolResult, ToolScope}; use async_trait::async_trait; use parking_lot::Mutex;