diff --git a/gitbooks/developing/architecture/agent-harness.md b/gitbooks/developing/architecture/agent-harness.md
index 8c3c9711d5..d1b9b6d680 100644
--- a/gitbooks/developing/architecture/agent-harness.md
+++ b/gitbooks/developing/architecture/agent-harness.md
@@ -92,6 +92,8 @@ loop {
 
 Every iteration emits a real-time `AgentProgress` event so the UI can render token-by-token streaming, "calling tool X" status, and per-iteration cost updates.
 
+**One engine, three entry points.** This loop lives in one place — `engine::run_turn_engine` (`harness/engine/`) — and every caller drives it: `Agent::turn` (web/desktop chat), `run_tool_call_loop` (the `agent.run_turn` bus handler for other channels + triage), and `run_subagent` (spawned sub-agents). What varies per caller is supplied through small seams the engine calls into: a `ToolSource` (which tools are advertised + how a call executes), a `ProgressReporter` (top-level `Turn*` events with streaming vs. nested `Subagent*` events), a `TurnObserver` (context management, transcript persistence, history shape), a `CheckpointStrategy` (error vs. summarize when the iteration cap is hit), and a `ResponseParser` (the `ToolDispatcher` dialect). The per-call executor (`run_one_tool`), the repeated-failure circuit breaker, and the `ProviderDelta → AgentProgress` stream forwarder are shared across all three, so they can't drift.
+
 ### Tool dispatch and tool-call dialects
 
 Different LLMs speak different tool-calling dialects. The harness abstracts that with a `ToolDispatcher` trait, which has three concrete implementations:
diff --git a/src/openhuman/agent/harness/engine/checkpoint.rs b/src/openhuman/agent/harness/engine/checkpoint.rs
new file mode 100644
index 0000000000..eb84b6c35d
--- /dev/null
+++ b/src/openhuman/agent/harness/engine/checkpoint.rs
@@ -0,0 +1,51 @@
+//! Max-iteration checkpoint seam.
+//!
+//! When a turn exhausts its iteration budget the three callers diverge:
+//!
+//! * the channel/CLI loop returns the typed `AgentError::MaxIterationsExceeded`
+//!   so `Agent::run_single` can downcast and suppress Sentry noise
+//!   ([`ErrorCheckpoint`]);
+//! * the subagent and `Agent::turn` instead summarize the run-so-far into a
+//!   resumable checkpoint string and return it as the turn's result (the
+//!   `SummarizeCheckpoint`, landed with the subagent/Agent migrations).
+//!
+//! [`CheckpointStrategy::on_max_iter`] receives the accumulated tool digest so a
+//! summarizing strategy can produce a root-cause-aware checkpoint.
+
+use anyhow::Result;
+use async_trait::async_trait;
+
+use crate::openhuman::inference::provider::UsageInfo;
+
+/// A checkpoint result. `usage`, when present, is the provider usage from a
+/// summarization call the strategy made — the engine folds it into the turn's
+/// cost and reports it to the observer so token accounting stays complete.
+pub(crate) struct CheckpointOutcome {
+    pub text: String,
+    pub usage: Option<UsageInfo>,
+}
+
+#[async_trait]
+pub(crate) trait CheckpointStrategy: Send + Sync {
+    /// Produce the turn's result after the iteration cap is hit, or return an
+    /// error to surface the cap to the caller. `digest` is the accumulated
+    /// `tool → outcome` summary of the run so far.
+    async fn on_max_iter(&self, digest: &str, max_iterations: usize) -> Result<CheckpointOutcome>;
+}
+
+/// Surface the cap as the typed [`AgentError::MaxIterationsExceeded`], boxed
+/// through `anyhow::Error`, so downstream wrappers — notably
+/// `Agent::run_single` — can downcast and suppress Sentry emission for this
+/// deterministic agent-state outcome (OPENHUMAN-TAURI-99 / -98).
+pub(crate) struct ErrorCheckpoint;
+
+#[async_trait]
+impl CheckpointStrategy for ErrorCheckpoint {
+    async fn on_max_iter(&self, _digest: &str, max_iterations: usize) -> Result<CheckpointOutcome> {
+        Err(anyhow::Error::new(
+            crate::openhuman::agent::error::AgentError::MaxIterationsExceeded {
+                max: max_iterations,
+            },
+        ))
+    }
+}
diff --git a/src/openhuman/agent/harness/engine/core.rs b/src/openhuman/agent/harness/engine/core.rs
new file mode 100644
index 0000000000..4cffb5e7b2
--- /dev/null
+++ b/src/openhuman/agent/harness/engine/core.rs
@@ -0,0 +1,554 @@
+//! The unified turn loop.
+//!
+//! [`run_turn_engine`] is the single agentic loop the harness runs: announce the
+//! turn, then per iteration run the stop-hook + context guards, send the
+//! provider request (streaming deltas when the [`ProgressReporter`] supplies a
+//! sink), parse the response, either return the final text or execute every
+//! requested tool through the [`ToolSource`] and loop again — bailing early via
+//! the shared repeated-failure circuit breaker, or handing the iteration cap to
+//! the [`CheckpointStrategy`].
+//!
+//! Everything that varies per caller lives behind a seam: [`ToolSource`] (tool
+//! advertisement + per-call execution), [`ProgressReporter`] (Turn* vs
+//! Subagent* events + streaming), [`TurnObserver`] (context management,
+//! transcript persistence, worker-thread mirroring) and [`CheckpointStrategy`]
+//! (error vs summarize on cap). The universal concerns — stop hooks, the
+//! context guard, token-budget trimming, native/text parsing and the circuit
+//! breaker — stay inline.
+
+use anyhow::Result;
+use std::fmt::Write as _;
+use std::io::Write as _;
+
+use crate::openhuman::agent::cost::TurnCost;
+use crate::openhuman::agent::multimodal;
+use crate::openhuman::agent::stop_hooks::{current_stop_hooks, StopDecision, TurnState};
+use crate::openhuman::context::guard::{ContextCheckResult, ContextGuard};
+use crate::openhuman::inference::model_context::context_window_for_model;
+use crate::openhuman::inference::provider::{
+    ChatMessage, ChatRequest, Provider, ProviderCapabilityError,
+};
+
+use super::super::parse::build_native_assistant_history;
+use super::super::token_budget::trim_chat_messages_to_budget;
+use super::super::tool_loop::{RepeatFailureGuard, STREAM_CHUNK_MIN_CHARS};
+use super::checkpoint::CheckpointStrategy;
+use super::parser::ResponseParser;
+use super::progress::ProgressReporter;
+use super::state::TurnObserver;
+use super::tool_source::ToolSource;
+
+/// What a completed turn yields. `text` is the final assistant text (or the
+/// circuit-breaker / checkpoint summary); `iterations` and `cost` let stateful
+/// callers attribute the run.
+pub(crate) struct TurnEngineOutcome {
+    pub text: String,
+    pub iterations: u32,
+    pub cost: TurnCost,
+    /// True when the turn stopped because it hit the iteration cap (the
+    /// `CheckpointStrategy` produced `text`), false for a normal final response
+    /// or an early circuit-breaker halt. `Agent::turn` keys its checkpoint-only
+    /// history/transcript handling off this.
+    pub hit_cap: bool,
+}
+
+/// Truncate a digest entry's body so a huge tool result can't blow up the
+/// checkpoint summary. Mirrors the subagent's previous `truncate_with_ellipsis`.
+fn truncate_with_ellipsis(s: &str, max: usize) -> String {
+    if s.chars().count() <= max {
+        return s.to_string();
+    }
+    let head: String = s.chars().take(max).collect();
+    format!("{head}…")
+}
+
+/// Run the agent loop over `history` using `tools`. `max_iterations` must be
+/// pre-normalized (callers map `0` to a sane default). See the module docs for
+/// the per-iteration flow.
+#[allow(clippy::too_many_arguments)]
+pub(crate) async fn run_turn_engine(
+    provider: &dyn Provider,
+    history: &mut Vec<ChatMessage>,
+    tools: &mut dyn ToolSource,
+    progress: &dyn ProgressReporter,
+    observer: &mut dyn TurnObserver,
+    checkpoint: &dyn CheckpointStrategy,
+    parser: &dyn ResponseParser,
+    provider_name: &str,
+    model: &str,
+    temperature: f64,
+    silent: bool,
+    multimodal_config: &crate::openhuman::config::MultimodalConfig,
+    max_iterations: usize,
+    on_delta: Option<tokio::sync::mpsc::Sender<String>>,
+) -> Result<TurnEngineOutcome> {
+    let mut context_guard = context_window_for_model(model)
+        .map(ContextGuard::with_context_window)
+        .unwrap_or_else(ContextGuard::new);
+    let mut turn_cost = TurnCost::new();
+
+    // Compiled digest of this run's tool calls + results, for a graceful
+    // checkpoint if the iteration cap is hit. Accumulated as the loop runs so
+    // it survives history trimming.
+    let mut run_tool_digest = String::new();
+
+    // Announce turn start. Lifecycle (turn/iteration) events are `.await`-ed so
+    // they survive downstream backpressure — dropping one would desync the
+    // web-channel progress bridge.
+    progress.turn_started().await;
+
+    let stop_hooks = current_stop_hooks();
+    // Repeated-failure circuit breaker — halts with a root cause rather than
+    // grinding to `max_iterations`.
+    let mut failure_guard = RepeatFailureGuard::new();
+    let mut halt_reason: Option<String> = None;
+    for iteration in 0..max_iterations {
+        progress
+            .iteration_started((iteration + 1) as u32, max_iterations as u32)
+            .await;
+
+        // ── Stop hooks: policy check before the next LLM call ──
+        if !stop_hooks.is_empty() {
+            let state = TurnState {
+                iteration: (iteration + 1) as u32,
+                max_iterations: max_iterations as u32,
+                cost: &turn_cost,
+                model,
+            };
+            for hook in &stop_hooks {
+                match hook.check(&state).await {
+                    StopDecision::Continue => {}
+                    StopDecision::Stop { reason } => {
+                        tracing::warn!(
+                            iteration = (iteration + 1),
+                            hook = hook.name(),
+                            reason = %reason,
+                            "[agent_loop] stop hook triggered — aborting turn"
+                        );
+                        anyhow::bail!("Agent turn stopped by hook '{}': {reason}", hook.name());
+                    }
+                }
+            }
+        }
+
+        // ── Context guard: check utilization before each LLM call ──
+        match context_guard.check() {
+            ContextCheckResult::Ok => {}
+            ContextCheckResult::CompactionNeeded => {
+                tracing::warn!(
+                    iteration,
+                    "[agent_loop] context guard: compaction needed (>{:.0}% full)",
+                    crate::openhuman::context::guard::COMPACTION_TRIGGER_THRESHOLD * 100.0
+                );
+            }
+            ContextCheckResult::ContextExhausted {
+                utilization_pct,
+                reason,
+            } => {
+                let msg = format!("Context window exhausted ({utilization_pct}% full): {reason}");
+                crate::core::observability::report_error(
+                    msg.as_str(),
+                    "agent",
+                    "context_exhausted",
+                    &[
+                        ("provider", provider_name),
+                        ("model", model),
+                        ("utilization_pct", &utilization_pct.to_string()),
+                    ],
+                );
+                anyhow::bail!(msg);
+            }
+        }
+
+        if let Some(context_window) = context_window_for_model(model) {
+            let budget_outcome = trim_chat_messages_to_budget(history, context_window);
+            if budget_outcome.trimmed {
+                log::warn!(
+                    "[agent_loop] pre-dispatch history trimmed model={} context_window={} original_tokens={} final_tokens={} messages_removed={}",
+                    model,
+                    context_window,
+                    budget_outcome.original_tokens,
+                    budget_outcome.final_tokens,
+                    budget_outcome.messages_removed
+                );
+            } else {
+                tracing::debug!(
+                    iteration,
+                    model,
+                    context_window,
+                    estimated_tokens = budget_outcome.final_tokens,
+                    "[agent_loop] pre-dispatch token budget ok"
+                );
+            }
+        }
+
+        // Caller-specific pre-dispatch work (e.g. Agent's ContextManager).
+        observer.before_dispatch(history, iteration).await?;
+
+        tracing::debug!(iteration, "[agent_loop] sending LLM request");
+        let image_marker_count = multimodal::count_image_markers(history);
+        if image_marker_count > 0 && !provider.supports_vision() {
+            let cap_err = ProviderCapabilityError {
+                provider: provider_name.to_string(),
+                capability: "vision".to_string(),
+                message: format!(
+                    "received {image_marker_count} image marker(s), but this provider does not support vision input"
+                ),
+            };
+            crate::core::observability::report_error(
+                &cap_err,
+                "agent",
+                "provider_capability",
+                &[
+                    ("provider", provider_name),
+                    ("capability", "vision"),
+                    ("model", model),
+                ],
+            );
+            return Err(cap_err.into());
+        }
+
+        let prepared_messages =
+            multimodal::prepare_messages_for_provider(history, multimodal_config).await?;
+
+        // Recomputed each iteration: a `ToolSource` may register tools lazily
+        // mid-turn, so native-tool enablement can flip from off to on.
+        let request_tools = if provider.supports_native_tools() && !tools.request_specs().is_empty()
+        {
+            Some(tools.request_specs())
+        } else {
+            None
+        };
+
+        // ProviderDelta → progress forwarder for this iteration (no-op for
+        // flavors that don't stream). Sender dropped after the chat call so the
+        // forwarder exits cleanly.
+        let (delta_tx_opt, delta_forwarder) = progress.make_stream_sink((iteration + 1) as u32);
+
+        let chat_result = provider
+            .chat(
+                ChatRequest {
+                    messages: &prepared_messages.messages,
+                    tools: request_tools,
+                    stream: delta_tx_opt.as_ref(),
+                },
+                model,
+                temperature,
+            )
+            .await;
+
+        drop(delta_tx_opt);
+        if let Some(handle) = delta_forwarder {
+            let _ = handle.await;
+        }
+
+        let (
+            response_text,
+            display_text,
+            reasoning_content,
+            tool_calls,
+            assistant_history_content,
+            native_tool_calls,
+        ) = match chat_result {
+            Ok(resp) => {
+                // Update context guard + cost with token usage from this response.
+                if let Some(ref usage) = resp.usage {
+                    context_guard.update_usage(usage);
+                    turn_cost.add_call(model, usage);
+                    observer.record_usage(model, usage);
+                    tracing::debug!(
+                        iteration,
+                        input_tokens = usage.input_tokens,
+                        output_tokens = usage.output_tokens,
+                        context_window = usage.context_window,
+                        cumulative_usd = turn_cost.total_usd(),
+                        "[agent_loop] LLM response received"
+                    );
+                    progress
+                        .cost_updated(model, (iteration + 1) as u32, &turn_cost)
+                        .await;
+                } else {
+                    tracing::debug!(
+                        iteration,
+                        "[agent_loop] LLM response received (no usage info)"
+                    );
+                }
+
+                let response_text = resp.text_or_empty().to_string();
+                let (display_text, calls) = parser.parse(&resp);
+
+                tracing::debug!(
+                    iteration,
+                    native_tool_calls = resp.tool_calls.len(),
+                    parsed_tool_calls = calls.len(),
+                    "[agent_loop] tool calls parsed"
+                );
+
+                let assistant_history_content = if resp.tool_calls.is_empty() {
+                    response_text.clone()
+                } else {
+                    build_native_assistant_history(
+                        &response_text,
+                        resp.reasoning_content.as_deref(),
+                        &resp.tool_calls,
+                    )
+                };
+
+                let reasoning_content = resp.reasoning_content;
+                let native_calls = resp.tool_calls;
+                (
+                    response_text,
+                    display_text,
+                    reasoning_content,
+                    calls,
+                    assistant_history_content,
+                    native_calls,
+                )
+            }
+            Err(e) => {
+                // Transient upstream failures are already classified + retried by
+                // reliable.rs and reported once when all providers are exhausted;
+                // re-reporting per iteration floods Sentry (OPENHUMAN-TAURI-3Y/3Z).
+                let transient =
+                    crate::openhuman::inference::provider::reliable::is_rate_limited(&e)
+                        || crate::openhuman::inference::provider::reliable::is_upstream_unhealthy(
+                            &e,
+                        );
+                if transient {
+                    tracing::warn!(
+                        domain = "agent",
+                        operation = "provider_chat",
+                        provider = provider_name,
+                        model = model,
+                        iteration = iteration + 1,
+                        error = %format!("{e:#}"),
+                        "[agent] transient provider_chat failure — retried upstream"
+                    );
+                } else {
+                    crate::core::observability::report_error_or_expected(
+                        &e,
+                        "agent",
+                        "provider_chat",
+                        &[
+                            ("provider", provider_name),
+                            ("model", model),
+                            ("iteration", &(iteration + 1).to_string()),
+                        ],
+                    );
+                }
+                return Err(e);
+            }
+        };
+
+        if tool_calls.is_empty() {
+            tracing::debug!(
+                iteration,
+                "[agent_loop] no tool calls — returning final response"
+            );
+            // The final answer is the narrative text, falling back to the raw
+            // response text when the parser stripped everything (mirrors the
+            // legacy `Agent::turn` `final_text` logic).
+            let final_out = if display_text.is_empty() {
+                response_text.clone()
+            } else {
+                display_text.clone()
+            };
+            // A completion with no text *and* no tool calls is a degenerate
+            // response. Callers that disallow it (Agent::turn) surface a typed
+            // error instead of a silent blank reply; the channel/subagent loops
+            // return it verbatim.
+            if final_out.trim().is_empty() && !observer.allow_empty_final() {
+                log::warn!(
+                    "[agent_loop] provider returned an empty final response (i={}, no text, no tool calls) — surfacing as error",
+                    iteration + 1
+                );
+                return Err(
+                    crate::openhuman::agent::error::AgentError::EmptyProviderResponse {
+                        iteration: iteration + 1,
+                    }
+                    .into(),
+                );
+            }
+            // No tool calls — final response. Relay the text in small chunks
+            // when a streaming draft sink exists.
+            if let Some(ref tx) = on_delta {
+                let mut chunk = String::new();
+                for word in final_out.split_inclusive(char::is_whitespace) {
+                    chunk.push_str(word);
+                    if chunk.len() >= STREAM_CHUNK_MIN_CHARS
+                        && tx.send(std::mem::take(&mut chunk)).await.is_err()
+                    {
+                        break; // receiver dropped
+                    }
+                }
+                if !chunk.is_empty() {
+                    let _ = tx.send(chunk).await;
+                }
+            }
+            history.push(ChatMessage::assistant(response_text.clone()));
+            observer.on_assistant(
+                &final_out,
+                &response_text,
+                reasoning_content.as_deref(),
+                &[],
+                &[],
+                iteration,
+                true,
+            );
+            observer.after_iteration(history, iteration);
+            log::info!(
+                "[agent_loop] turn complete: iters={} provider_calls={} tokens_in={} tokens_out={} cached_in={} usd={:.4}",
+                (iteration + 1),
+                turn_cost.call_count,
+                turn_cost.input_tokens,
+                turn_cost.output_tokens,
+                turn_cost.cached_input_tokens,
+                turn_cost.total_usd(),
+            );
+            progress.turn_completed((iteration + 1) as u32).await;
+            return Ok(TurnEngineOutcome {
+                text: final_out,
+                iterations: (iteration + 1) as u32,
+                cost: turn_cost,
+                hit_cap: false,
+            });
+        }
+
+        // Print any text the LLM produced alongside tool calls (unless silent)
+        if !silent && !display_text.is_empty() {
+            print!("{display_text}");
+            let _ = std::io::stdout().flush();
+        }
+
+        // Execute each tool call and build results. `individual_results` tracks
+        // per-call output so native-mode history can emit one `role: tool`
+        // message per call with the correct id.
+        let mut tool_results = String::new();
+        let mut individual_results: Vec<String> = Vec::new();
+        for (call_idx, call) in tool_calls.iter().enumerate() {
+            // Stable id threaded through the start/complete pair. The fallback
+            // includes `call_idx` to stay unique when the same tool name
+            // appears multiple times in one iteration.
+            let progress_call_id = call
+                .id
+                .clone()
+                .unwrap_or_else(|| format!("loop-{iteration}-{call_idx}-{}", call.name));
+
+            // Full per-call lifecycle is owned by the ToolSource.
+            let outcome = tools
+                .execute_call(call, iteration, progress, &progress_call_id)
+                .await;
+
+            individual_results.push(outcome.text.clone());
+            let _ = writeln!(
+                tool_results,
+                "<tool_result name=\"{}\">\n{}\n</tool_result>",
+                call.name, outcome.text
+            );
+
+            // Record this call in the run digest (output truncated) for a
+            // possible max-iteration checkpoint.
+            let _ = writeln!(
+                run_tool_digest,
+                "- {} [{}]: {}",
+                call.name,
+                if outcome.success { "ok" } else { "failed" },
+                truncate_with_ellipsis(&outcome.text, 800)
+            );
+
+            observer.on_tool_result(
+                &progress_call_id,
+                &call.name,
+                &outcome.text,
+                outcome.success,
+                iteration,
+            );
+
+            // Repeated-failure circuit breaker (shared guard).
+            if let Some(reason) = failure_guard.record(
+                &call.name,
+                &call.arguments.to_string(),
+                outcome.success,
+                &outcome.text,
+            ) {
+                tracing::warn!(
+                    iteration,
+                    tool = call.name.as_str(),
+                    "[agent_loop] circuit breaker tripped — halting with root cause"
+                );
+                halt_reason = Some(reason);
+            }
+        }
+
+        // Add assistant message with tool calls + tool results to history.
+        // Native mode: JSON-structured messages so convert_messages() can
+        // reconstruct OpenAI-format tool_calls + tool result messages. Prompt
+        // mode: XML-based text format.
+        history.push(ChatMessage::assistant(assistant_history_content));
+        observer.on_assistant(
+            &display_text,
+            &response_text,
+            reasoning_content.as_deref(),
+            &native_tool_calls,
+            &tool_calls,
+            iteration,
+            false,
+        );
+        if native_tool_calls.is_empty() {
+            let content = format!("[Tool results]\n{tool_results}");
+            observer.on_results_batch(&content, iteration);
+            history.push(ChatMessage::user(content));
+        } else {
+            for (native_call, result) in native_tool_calls.iter().zip(individual_results.iter()) {
+                let tool_msg = serde_json::json!({
+                    "tool_call_id": native_call.id,
+                    "content": result,
+                });
+                history.push(ChatMessage::tool(tool_msg.to_string()));
+            }
+        }
+
+        observer.after_iteration(history, iteration);
+
+        // Circuit breaker tripped this iteration: return the root-cause summary
+        // instead of looping to `max_iterations`. Tool results are already in
+        // `history`, so the caller still has full context.
+        if let Some(reason) = halt_reason.take() {
+            // Mirror the normal-completion path: emit turn-completed before the
+            // early return so progress consumers don't stay in-flight.
+            progress.turn_completed((iteration + 1) as u32).await;
+            return Ok(TurnEngineOutcome {
+                text: reason,
+                iterations: (iteration + 1) as u32,
+                cost: turn_cost,
+                hit_cap: false,
+            });
+        }
+    }
+
+    // Iteration cap reached — hand off to the checkpoint strategy (error vs
+    // summarize). The accumulated digest lets a summarizing strategy produce a
+    // resumable, root-cause-aware checkpoint.
+    let digest = if run_tool_digest.is_empty() {
+        "(no tool calls completed)"
+    } else {
+        run_tool_digest.as_str()
+    };
+    let co = checkpoint.on_max_iter(digest, max_iterations).await?;
+    // Fold any summarization-call usage into the turn cost + observer so token
+    // accounting stays complete.
+    if let Some(ref u) = co.usage {
+        turn_cost.add_call(model, u);
+        observer.record_usage(model, u);
+    }
+    // Emit the terminal lifecycle event on this successful (checkpoint) exit
+    // too, so consumers aren't left waiting — matching the final-response and
+    // circuit-breaker paths.
+    progress.turn_completed(max_iterations as u32).await;
+    Ok(TurnEngineOutcome {
+        text: co.text,
+        iterations: max_iterations as u32,
+        cost: turn_cost,
+        hit_cap: true,
+    })
+}
diff --git a/src/openhuman/agent/harness/engine/mod.rs b/src/openhuman/agent/harness/engine/mod.rs
new file mode 100644
index 0000000000..df22ec0cf7
--- /dev/null
+++ b/src/openhuman/agent/harness/engine/mod.rs
@@ -0,0 +1,31 @@
+//! Unified agent turn engine.
+//!
+//! Historically the harness carried THREE near-identical agentic loops — one
+//! per entry point (`Agent::turn` for web/desktop chat, `run_tool_call_loop`
+//! for non-web channels + triage, and the subagent `run_inner_loop`). They each
+//! re-implemented the same shape (call the LLM → parse tool calls → execute
+//! tools → append results → repeat until final text or the iteration cap) and
+//! had drifted in subtle ways.
+//!
+//! This module is the single home for the pieces those loops share, so they
+//! can't drift again. The extraction is incremental (see the unify-agent-turn
+//! plan): the first piece to land is [`tools::run_one_tool`] — the per-call
+//! tool executor (policy gate → scope guard → approval gate → execute with
+//! timeout → scrub/tokenjuice/cap/summarize → audit), which was previously
+//! duplicated verbatim across all three loops.
+
+pub(crate) mod checkpoint;
+pub(crate) mod core;
+pub(crate) mod parser;
+pub(crate) mod progress;
+pub(crate) mod state;
+pub(crate) mod tool_source;
+pub(crate) mod tools;
+
+pub(crate) use checkpoint::{CheckpointOutcome, CheckpointStrategy, ErrorCheckpoint};
+pub(crate) use core::run_turn_engine;
+pub(crate) use parser::{DefaultParser, DispatcherParser};
+pub(crate) use progress::{ProgressReporter, SubagentProgress, TurnProgress};
+pub(crate) use state::{NullObserver, TurnObserver};
+pub(crate) use tool_source::{RegistryToolSource, ToolSource};
+pub(crate) use tools::{run_one_tool, ToolRunResult};
diff --git a/src/openhuman/agent/harness/engine/parser.rs b/src/openhuman/agent/harness/engine/parser.rs
new file mode 100644
index 0000000000..05601b548d
--- /dev/null
+++ b/src/openhuman/agent/harness/engine/parser.rs
@@ -0,0 +1,70 @@
+//! Response-parsing seam.
+//!
+//! The channel loop and subagent extract tool calls from a provider response
+//! with the built-in native-first + XML-fallback logic ([`DefaultParser`]).
+//! `Agent::turn` instead uses its configured [`ToolDispatcher`] (native / XML /
+//! PFormat) — PFormat in particular parses positional `name[args]` calls the
+//! built-in path can't. [`DispatcherParser`] adapts a dispatcher to this seam so
+//! the engine stays parser-agnostic while preserving every dispatcher's grammar.
+//!
+//! `parse` returns `(display_text, calls)`: the narrative text to surface (tool
+//! markup stripped) and the parsed calls in the engine's internal
+//! [`ParsedToolCall`] shape. The engine keeps the *raw* response text
+//! separately for assistant-history serialization.
+
+use crate::openhuman::agent::dispatcher::ToolDispatcher;
+use crate::openhuman::agent::harness::parse::{
+    parse_structured_tool_calls, parse_tool_calls, ParsedToolCall,
+};
+use crate::openhuman::inference::provider::ChatResponse;
+
+pub(crate) trait ResponseParser: Send + Sync {
+    /// Returns `(display_text, calls)` for this provider response.
+    fn parse(&self, resp: &ChatResponse) -> (String, Vec<ParsedToolCall>);
+}
+
+/// Built-in parser: prefer native structured tool calls, fall back to the
+/// XML-tag parser over the response text. Used by the channel loop + subagent.
+pub(crate) struct DefaultParser;
+
+impl ResponseParser for DefaultParser {
+    fn parse(&self, resp: &ChatResponse) -> (String, Vec<ParsedToolCall>) {
+        let response_text = resp.text_or_empty().to_string();
+        let mut calls = parse_structured_tool_calls(&resp.tool_calls);
+        let mut parsed_text = String::new();
+        if calls.is_empty() {
+            let (fallback_text, fallback_calls) = parse_tool_calls(&response_text);
+            if !fallback_text.is_empty() {
+                parsed_text = fallback_text;
+            }
+            calls = fallback_calls;
+        }
+        let display_text = if parsed_text.is_empty() {
+            response_text
+        } else {
+            parsed_text
+        };
+        (display_text, calls)
+    }
+}
+
+/// Adapts an [`Agent`]'s configured [`ToolDispatcher`] to the parser seam,
+/// converting the dispatcher's `ParsedToolCall` shape into the engine's.
+pub(crate) struct DispatcherParser<'a> {
+    pub dispatcher: &'a dyn ToolDispatcher,
+}
+
+impl ResponseParser for DispatcherParser<'_> {
+    fn parse(&self, resp: &ChatResponse) -> (String, Vec<ParsedToolCall>) {
+        let (text, calls) = self.dispatcher.parse_response(resp);
+        let calls = calls
+            .into_iter()
+            .map(|c| ParsedToolCall {
+                name: c.name,
+                arguments: c.arguments,
+                id: c.tool_call_id,
+            })
+            .collect();
+        (text, calls)
+    }
+}
diff --git a/src/openhuman/agent/harness/engine/progress.rs b/src/openhuman/agent/harness/engine/progress.rs
new file mode 100644
index 0000000000..64648a6e2c
--- /dev/null
+++ b/src/openhuman/agent/harness/engine/progress.rs
@@ -0,0 +1,356 @@
+//! Progress reporting seam + the shared streaming-delta forwarder.
+//!
+//! The engine never names a concrete [`AgentProgress`] variant. It talks to a
+//! [`ProgressReporter`], whose impls pick the event *flavor*:
+//!
+//! * [`TurnProgress`] — top-level chat (channel loop, `Agent::turn`): emits the
+//!   `Turn*` / `ToolCall*` / `TurnCostUpdated` events and streams provider
+//!   deltas as `TextDelta` / `ThinkingDelta` / `ToolCallArgsDelta`.
+//! * [`SubagentProgress`] — a spawned sub-agent: emits the `Subagent*` /
+//!   `SubagentToolCall*` events (nested under the subagent row in the UI) and
+//!   does not stream deltas. The `SubagentSpawned` / `SubagentCompleted` /
+//!   `SubagentFailed` lifecycle events stay in the spawn tool, outside the loop.
+//! * [`NullProgress`] — triage / tests: every method is a no-op.
+
+use async_trait::async_trait;
+
+use crate::openhuman::agent::cost::TurnCost;
+use crate::openhuman::agent::progress::AgentProgress;
+use crate::openhuman::inference::provider::ProviderDelta;
+
+/// What the engine emits as a turn progresses. All methods default to no-ops so
+/// an impl only overrides the events its flavor cares about.
+#[async_trait]
+pub(crate) trait ProgressReporter: Send + Sync {
+    async fn turn_started(&self) {}
+    async fn iteration_started(&self, _iteration: u32, _max_iterations: u32) {}
+    async fn cost_updated(&self, _model: &str, _iteration: u32, _cost: &TurnCost) {}
+    async fn turn_completed(&self, _iterations: u32) {}
+    async fn tool_started(
+        &self,
+        _call_id: &str,
+        _tool_name: &str,
+        _arguments: &serde_json::Value,
+        _iteration: u32,
+    ) {
+    }
+    #[allow(clippy::too_many_arguments)]
+    async fn tool_completed(
+        &self,
+        _call_id: &str,
+        _tool_name: &str,
+        _success: bool,
+        _output_chars: usize,
+        _elapsed_ms: u64,
+        _iteration: u32,
+    ) {
+    }
+
+    /// Build the per-iteration `ProviderDelta` streaming sink + forwarder task,
+    /// or `(None, None)` when this flavor doesn't stream. Default: no streaming.
+    fn make_stream_sink(
+        &self,
+        _iteration: u32,
+    ) -> (
+        Option<tokio::sync::mpsc::Sender<ProviderDelta>>,
+        Option<tokio::task::JoinHandle<()>>,
+    ) {
+        (None, None)
+    }
+}
+
+/// Top-level chat flavor: `Turn*` lifecycle + `ToolCall*` + streaming.
+pub(crate) struct TurnProgress {
+    pub sink: Option<tokio::sync::mpsc::Sender<AgentProgress>>,
+}
+
+impl TurnProgress {
+    pub(crate) fn new(sink: Option<tokio::sync::mpsc::Sender<AgentProgress>>) -> Self {
+        Self { sink }
+    }
+}
+
+#[async_trait]
+impl ProgressReporter for TurnProgress {
+    async fn turn_started(&self) {
+        if let Some(ref sink) = self.sink {
+            if let Err(e) = sink.send(AgentProgress::TurnStarted).await {
+                log::warn!("[agent_loop] progress sink closed at TurnStarted: {e}");
+            }
+        }
+    }
+
+    async fn iteration_started(&self, iteration: u32, max_iterations: u32) {
+        if let Some(ref sink) = self.sink {
+            if let Err(e) = sink
+                .send(AgentProgress::IterationStarted {
+                    iteration,
+                    max_iterations,
+                })
+                .await
+            {
+                log::warn!("[agent_loop] progress sink closed at IterationStarted: {e}");
+            }
+        }
+    }
+
+    async fn cost_updated(&self, model: &str, iteration: u32, cost: &TurnCost) {
+        if let Some(ref sink) = self.sink {
+            let event = AgentProgress::TurnCostUpdated {
+                model: model.to_string(),
+                iteration,
+                input_tokens: cost.input_tokens,
+                output_tokens: cost.output_tokens,
+                cached_input_tokens: cost.cached_input_tokens,
+                total_usd: cost.total_usd(),
+            };
+            if let Err(e) = sink.send(event).await {
+                log::warn!("[agent_loop] progress sink closed at TurnCostUpdated: {e}");
+            }
+        }
+    }
+
+    async fn turn_completed(&self, iterations: u32) {
+        if let Some(ref sink) = self.sink {
+            if let Err(e) = sink.send(AgentProgress::TurnCompleted { iterations }).await {
+                log::warn!("[agent_loop] progress sink closed at TurnCompleted: {e}");
+            }
+        }
+    }
+
+    async fn tool_started(
+        &self,
+        call_id: &str,
+        tool_name: &str,
+        arguments: &serde_json::Value,
+        iteration: u32,
+    ) {
+        if let Some(ref sink) = self.sink {
+            if let Err(e) = sink
+                .send(AgentProgress::ToolCallStarted {
+                    call_id: call_id.to_string(),
+                    tool_name: tool_name.to_string(),
+                    arguments: arguments.clone(),
+                    iteration,
+                })
+                .await
+            {
+                log::warn!("[agent_loop] progress sink closed while emitting ToolCallStarted: {e}");
+            }
+        }
+    }
+
+    async fn tool_completed(
+        &self,
+        call_id: &str,
+        tool_name: &str,
+        success: bool,
+        output_chars: usize,
+        elapsed_ms: u64,
+        iteration: u32,
+    ) {
+        if let Some(ref sink) = self.sink {
+            if let Err(e) = sink
+                .send(AgentProgress::ToolCallCompleted {
+                    call_id: call_id.to_string(),
+                    tool_name: tool_name.to_string(),
+                    success,
+                    output_chars,
+                    elapsed_ms,
+                    iteration,
+                })
+                .await
+            {
+                log::warn!(
+                    "[agent_loop] progress sink closed while emitting ToolCallCompleted: {e}"
+                );
+            }
+        }
+    }
+
+    fn make_stream_sink(
+        &self,
+        iteration: u32,
+    ) -> (
+        Option<tokio::sync::mpsc::Sender<ProviderDelta>>,
+        Option<tokio::task::JoinHandle<()>>,
+    ) {
+        spawn_delta_forwarder(self.sink.clone(), iteration)
+    }
+}
+
+/// Sub-agent flavor: `Subagent*` lifecycle + `SubagentToolCall*`, no streaming.
+pub(crate) struct SubagentProgress {
+    pub sink: Option<tokio::sync::mpsc::Sender<AgentProgress>>,
+    pub agent_id: String,
+    pub task_id: String,
+}
+
+#[async_trait]
+impl ProgressReporter for SubagentProgress {
+    async fn iteration_started(&self, iteration: u32, max_iterations: u32) {
+        if let Some(ref sink) = self.sink {
+            let _ = sink
+                .send(AgentProgress::SubagentIterationStarted {
+                    agent_id: self.agent_id.clone(),
+                    task_id: self.task_id.clone(),
+                    iteration,
+                    max_iterations,
+                })
+                .await;
+        }
+    }
+
+    async fn tool_started(
+        &self,
+        call_id: &str,
+        tool_name: &str,
+        _arguments: &serde_json::Value,
+        iteration: u32,
+    ) {
+        if let Some(ref sink) = self.sink {
+            let _ = sink
+                .send(AgentProgress::SubagentToolCallStarted {
+                    agent_id: self.agent_id.clone(),
+                    task_id: self.task_id.clone(),
+                    call_id: call_id.to_string(),
+                    tool_name: tool_name.to_string(),
+                    iteration,
+                })
+                .await;
+        }
+    }
+
+    async fn tool_completed(
+        &self,
+        call_id: &str,
+        tool_name: &str,
+        success: bool,
+        output_chars: usize,
+        elapsed_ms: u64,
+        iteration: u32,
+    ) {
+        if let Some(ref sink) = self.sink {
+            let _ = sink
+                .send(AgentProgress::SubagentToolCallCompleted {
+                    agent_id: self.agent_id.clone(),
+                    task_id: self.task_id.clone(),
+                    call_id: call_id.to_string(),
+                    tool_name: tool_name.to_string(),
+                    success,
+                    output_chars,
+                    elapsed_ms,
+                    iteration,
+                })
+                .await;
+        }
+    }
+
+    /// Stream the child's visible text + reasoning deltas to the parent,
+    /// attributed to this sub-agent's `task_id` so the UI renders them inside
+    /// the live subagent row (PR #3007). Tool-call arg fragments are dropped
+    /// here — they're already surfaced via the `SubagentToolCall*` lifecycle
+    /// events, so forwarding them too would double-render.
+    fn make_stream_sink(
+        &self,
+        iteration: u32,
+    ) -> (
+        Option<tokio::sync::mpsc::Sender<ProviderDelta>>,
+        Option<tokio::task::JoinHandle<()>>,
+    ) {
+        let Some(sink) = self.sink.clone() else {
+            return (None, None);
+        };
+        let agent_id = self.agent_id.clone();
+        let task_id = self.task_id.clone();
+        let (tx, mut rx) = tokio::sync::mpsc::channel::<ProviderDelta>(128);
+        let forwarder = tokio::spawn(async move {
+            while let Some(event) = rx.recv().await {
+                let mapped = match event {
+                    ProviderDelta::TextDelta { delta } => AgentProgress::SubagentTextDelta {
+                        agent_id: agent_id.clone(),
+                        task_id: task_id.clone(),
+                        delta,
+                        iteration,
+                    },
+                    ProviderDelta::ThinkingDelta { delta } => {
+                        AgentProgress::SubagentThinkingDelta {
+                            agent_id: agent_id.clone(),
+                            task_id: task_id.clone(),
+                            delta,
+                            iteration,
+                        }
+                    }
+                    ProviderDelta::ToolCallStart { .. }
+                    | ProviderDelta::ToolCallArgsDelta { .. } => continue,
+                };
+                // Await backpressure so streamed deltas arrive in order.
+                if sink.send(mapped).await.is_err() {
+                    break;
+                }
+            }
+        });
+        (Some(tx), Some(forwarder))
+    }
+}
+
+/// No-op reporter for triage / tests.
+pub(crate) struct NullProgress;
+
+impl ProgressReporter for NullProgress {}
+
+/// Spawn a task that forwards `ProviderDelta`s from the provider's streaming
+/// channel into `on_progress` as `AgentProgress` delta events, tagged with
+/// `iteration` (1-based). Returns the sender to hand to
+/// [`crate::openhuman::inference::provider::ChatRequest::stream`] and the task
+/// handle to await after the chat call.
+///
+/// Returns `(None, None)` when there is no progress sink — the caller then
+/// passes `stream: None` and the provider uses its non-streaming HTTP path.
+///
+/// Backpressure discipline: the forwarder `.await`s each `send`, so streamed
+/// deltas arrive in order and are never silently dropped when the downstream
+/// bridge is slow. It exits cleanly once the sender is dropped (after the chat
+/// call) or the downstream closes.
+pub(crate) fn spawn_delta_forwarder(
+    on_progress: Option<tokio::sync::mpsc::Sender<AgentProgress>>,
+    iteration: u32,
+) -> (
+    Option<tokio::sync::mpsc::Sender<ProviderDelta>>,
+    Option<tokio::task::JoinHandle<()>>,
+) {
+    let Some(progress_sink) = on_progress else {
+        return (None, None);
+    };
+    let (tx, mut rx) = tokio::sync::mpsc::channel::<ProviderDelta>(128);
+    let forwarder = tokio::spawn(async move {
+        while let Some(event) = rx.recv().await {
+            let mapped = match event {
+                ProviderDelta::TextDelta { delta } => AgentProgress::TextDelta { delta, iteration },
+                ProviderDelta::ThinkingDelta { delta } => {
+                    AgentProgress::ThinkingDelta { delta, iteration }
+                }
+                ProviderDelta::ToolCallStart { call_id, tool_name } => {
+                    AgentProgress::ToolCallArgsDelta {
+                        call_id,
+                        tool_name,
+                        delta: String::new(),
+                        iteration,
+                    }
+                }
+                ProviderDelta::ToolCallArgsDelta { call_id, delta } => {
+                    AgentProgress::ToolCallArgsDelta {
+                        call_id,
+                        tool_name: String::new(),
+                        delta,
+                        iteration,
+                    }
+                }
+            };
+            if progress_sink.send(mapped).await.is_err() {
+                break;
+            }
+        }
+    });
+    (Some(tx), Some(forwarder))
+}
diff --git a/src/openhuman/agent/harness/engine/state.rs b/src/openhuman/agent/harness/engine/state.rs
new file mode 100644
index 0000000000..359c6fc5b7
--- /dev/null
+++ b/src/openhuman/agent/harness/engine/state.rs
@@ -0,0 +1,96 @@
+//! Turn-state observer seam.
+//!
+//! The engine drives the loop over a `Vec<ChatMessage>` working buffer, but the
+//! three callers want to *do* different things around each step:
+//!
+//! * the channel loop wants nothing extra ([`NullObserver`]);
+//! * the subagent wants per-iteration transcript persistence, usage
+//!   accumulation, and worker-thread mirroring (assistant intents, per-call
+//!   results, batched text-mode results, final response);
+//! * `Agent::turn` wants its `ContextManager` reduction before each dispatch,
+//!   transcript persistence, and per-turn usage/cost snapshots.
+//!
+//! [`TurnObserver`] is the seam: every method defaults to a no-op, so an impl
+//! only overrides the hooks its caller needs. The engine still owns the
+//! universal concerns (stop hooks, context guard, token-budget trim, the
+//! circuit breaker) inline — the observer is for caller-specific side effects.
+
+use anyhow::Result;
+use async_trait::async_trait;
+
+use crate::openhuman::agent::harness::parse::ParsedToolCall;
+use crate::openhuman::inference::provider::{ChatMessage, ToolCall, UsageInfo};
+
+#[async_trait]
+pub(crate) trait TurnObserver: Send {
+    /// Called before each provider dispatch, after the engine's own context
+    /// guard + token-budget trim. `Agent::turn` runs its `ContextManager`
+    /// reduction chain here. Default: no-op.
+    async fn before_dispatch(
+        &mut self,
+        _history: &mut Vec<ChatMessage>,
+        _iteration: usize,
+    ) -> Result<()> {
+        Ok(())
+    }
+
+    /// Called once per provider response that carried a usage block, so the
+    /// caller can accumulate its own token tally / transcript usage snapshot.
+    fn record_usage(&mut self, _model: &str, _usage: &UsageInfo) {}
+
+    /// Called after the assistant message for this iteration is committed to
+    /// the engine's working buffer. `response_text` is the raw provider text
+    /// (pre native serialization); `reasoning_content` is the thinking-model
+    /// content to round-trip; `native_tool_calls` are the provider's structured
+    /// calls (empty in text/prompt mode); `parsed_calls` are the engine-parsed
+    /// calls (empty when `is_final`). `Agent::turn` uses these to rebuild its
+    /// typed `ConversationMessage` history; the subagent mirrors to its worker
+    /// thread.
+    #[allow(clippy::too_many_arguments)]
+    fn on_assistant(
+        &mut self,
+        _display_text: &str,
+        _response_text: &str,
+        _reasoning_content: Option<&str>,
+        _native_tool_calls: &[ToolCall],
+        _parsed_calls: &[ParsedToolCall],
+        _iteration: usize,
+        _is_final: bool,
+    ) {
+    }
+
+    /// Called after one tool's result is known, in native-tool mode (one
+    /// `role:tool` message per call). Subagent mirrors per-call results to its
+    /// worker thread; `Agent::turn` buffers them to rebuild typed history.
+    fn on_tool_result(
+        &mut self,
+        _call_id: &str,
+        _tool_name: &str,
+        _result_text: &str,
+        _success: bool,
+        _iteration: usize,
+    ) {
+    }
+
+    /// Called after a batched `[Tool results]` user message is committed
+    /// (text/prompt mode, where there are no per-call `role:tool` messages).
+    fn on_results_batch(&mut self, _content: &str, _iteration: usize) {}
+
+    /// Called after the iteration's history is finalized (the transcript
+    /// persistence point) — both after the final response and after each tool
+    /// round's results are appended.
+    fn after_iteration(&mut self, _history: &[ChatMessage], _iteration: usize) {}
+
+    /// Whether an empty final response (no text, no tool calls) is acceptable.
+    /// The channel/subagent loops return it as `Ok("")`; `Agent::turn` treats
+    /// it as a degenerate/poisoned completion and surfaces an error instead of
+    /// a silent blank reply (bug-report-2026-05-26 A1). Default: allowed.
+    fn allow_empty_final(&self) -> bool {
+        true
+    }
+}
+
+/// No-op observer for the channel/CLI/triage loop, which keeps no extra state.
+pub(crate) struct NullObserver;
+
+impl TurnObserver for NullObserver {}
diff --git a/src/openhuman/agent/harness/engine/tool_source.rs b/src/openhuman/agent/harness/engine/tool_source.rs
new file mode 100644
index 0000000000..0e35a5879b
--- /dev/null
+++ b/src/openhuman/agent/harness/engine/tool_source.rs
@@ -0,0 +1,131 @@
+//! Tool sourcing seam for the turn engine.
+//!
+//! The three former loops resolved "what tools can the model call this turn and
+//! how do I execute one" differently:
+//!
+//! * the channel loop advertised `registry + extra` filtered by a visibility
+//!   whitelist, and executed via the shared [`run_one_tool`];
+//! * the subagent loop advertised a definition-filtered slice of the parent's
+//!   tools (with lazy toolkit registration), and had its own per-call body;
+//! * `Agent::turn` advertised `Agent.visible_tool_specs` and executed via the
+//!   richer `Agent::execute_tool_call` (session policy + per-call permission
+//!   levels + `execute_with_options`).
+//!
+//! [`ToolSource`] is the single seam the engine talks to: it advertises the
+//! request specs and owns per-call execution (including the start/complete
+//! progress events). [`RegistryToolSource`] is the channel/CLI/triage impl; the
+//! subagent and `Agent` impls land in later phases.
+
+use std::collections::HashSet;
+
+use async_trait::async_trait;
+
+use super::super::payload_summarizer::PayloadSummarizer;
+use super::progress::ProgressReporter;
+use super::{run_one_tool, ToolRunResult};
+use crate::openhuman::agent::harness::parse::ParsedToolCall;
+use crate::openhuman::tools::policy::ToolPolicy;
+use crate::openhuman::tools::{Tool, ToolSpec};
+
+/// What the engine needs from "the set of tools available this turn".
+#[async_trait]
+pub(crate) trait ToolSource: Send {
+    /// The deduped, visibility-filtered specs to advertise to the provider
+    /// this turn. Re-read each iteration so impls that register tools lazily
+    /// (subagent toolkit resolution) can grow the advertised set over a turn.
+    fn request_specs(&self) -> &[ToolSpec];
+
+    /// Execute one parsed tool call end-to-end, emitting its `ToolCallStarted`
+    /// / `ToolCallCompleted` (or flavor-equivalent) progress events. Returns a
+    /// [`ToolRunResult`] the engine folds into history + the circuit breaker.
+    async fn execute_call(
+        &mut self,
+        call: &ParsedToolCall,
+        iteration: usize,
+        progress: &dyn ProgressReporter,
+        progress_call_id: &str,
+    ) -> ToolRunResult;
+}
+
+/// The channel/CLI/triage tool source: a persistent `registry`, optional
+/// per-turn synthesised `extra` tools, an optional visibility whitelist, and a
+/// pluggable [`ToolPolicy`]. Mirrors the original `run_tool_call_loop` tool
+/// plumbing exactly.
+pub(crate) struct RegistryToolSource<'a> {
+    registry: &'a [Box<dyn Tool>],
+    extra: &'a [Box<dyn Tool>],
+    visible: Option<&'a HashSet<String>>,
+    tool_policy: &'a dyn ToolPolicy,
+    payload_summarizer: Option<&'a dyn PayloadSummarizer>,
+    specs: Vec<ToolSpec>,
+}
+
+impl<'a> RegistryToolSource<'a> {
+    pub(crate) fn new(
+        registry: &'a [Box<dyn Tool>],
+        extra: &'a [Box<dyn Tool>],
+        visible: Option<&'a HashSet<String>>,
+        tool_policy: &'a dyn ToolPolicy,
+        payload_summarizer: Option<&'a dyn PayloadSummarizer>,
+    ) -> Self {
+        // Filter to visible tools, then dedup by name before sending to the
+        // provider. Registry tools may collide with per-turn synthesised
+        // extra_tools (e.g. an `ArchetypeDelegationTool` whose
+        // `delegate_name = "research"` shadowing a same-named skill). Some
+        // providers 400 on duplicate tool names — see TAURI-RUST-4.
+        let filtered: Vec<ToolSpec> = registry
+            .iter()
+            .chain(extra.iter())
+            .filter(|tool| visible.map(|s| s.contains(tool.name())).unwrap_or(true))
+            .map(|tool| tool.spec())
+            .collect();
+        let specs = crate::openhuman::agent::harness::session::dedup_visible_tool_specs(filtered);
+        Self {
+            registry,
+            extra,
+            visible,
+            tool_policy,
+            payload_summarizer,
+            specs,
+        }
+    }
+
+    fn is_visible(&self, name: &str) -> bool {
+        self.visible.map(|s| s.contains(name)).unwrap_or(true)
+    }
+}
+
+#[async_trait]
+impl ToolSource for RegistryToolSource<'_> {
+    fn request_specs(&self) -> &[ToolSpec] {
+        &self.specs
+    }
+
+    async fn execute_call(
+        &mut self,
+        call: &ParsedToolCall,
+        iteration: usize,
+        progress: &dyn ProgressReporter,
+        progress_call_id: &str,
+    ) -> ToolRunResult {
+        // Look up the tool by name in the combined registry + extras, subject
+        // to the visibility whitelist. A hallucinated / filtered-out name
+        // resolves to `None`, which `run_one_tool` reports as an unknown tool.
+        let tool_opt: Option<&dyn Tool> = self
+            .registry
+            .iter()
+            .chain(self.extra.iter())
+            .find(|t| t.name() == call.name && self.is_visible(t.name()))
+            .map(|b| b.as_ref());
+        run_one_tool(
+            tool_opt,
+            call,
+            iteration,
+            progress,
+            self.tool_policy,
+            self.payload_summarizer,
+            progress_call_id,
+        )
+        .await
+    }
+}
diff --git a/src/openhuman/agent/harness/engine/tools.rs b/src/openhuman/agent/harness/engine/tools.rs
new file mode 100644
index 0000000000..07297ec3b6
--- /dev/null
+++ b/src/openhuman/agent/harness/engine/tools.rs
@@ -0,0 +1,381 @@
+//! Shared per-call tool executor.
+//!
+//! [`run_one_tool`] runs the full lifecycle of a single parsed tool call:
+//!
+//! 1. emit `ToolCallStarted` (for *every* call, including ones rejected below,
+//!    so a client row created from streamed args always gets a terminal event);
+//! 2. evaluate the pluggable [`ToolPolicy`] (deny short-circuits everything,
+//!    including approval side-effects);
+//! 3. guard `CliRpcOnly` scope (such tools can't run in the autonomous loop);
+//! 4. route external-effect tools through the process-global `ApprovalGate`;
+//! 5. execute with the configured timeout, then scrub credentials, apply
+//!    tokenjuice, the per-tool size cap, and the optional payload summarizer;
+//! 6. stamp the approval audit "after" row (#2135);
+//! 7. emit `ToolCallCompleted`.
+//!
+//! It returns a [`ToolRunResult`] (`text` + `success`). The caller owns history
+//! shaping (native `role:tool` messages vs XML `<tool_result>` blocks) and the
+//! repeated-failure circuit breaker, both of which it drives uniformly from the
+//! returned `success`/`text` regardless of which branch produced them.
+//!
+//! This body was lifted verbatim (behavior-preserving) from the canonical
+//! `run_tool_call_loop` in `tool_loop.rs`; the three loops now call it instead
+//! of each carrying their own copy.
+
+use super::super::payload_summarizer::PayloadSummarizer;
+use super::progress::ProgressReporter;
+use crate::openhuman::agent::harness::parse::ParsedToolCall;
+use crate::openhuman::tools::policy::{PolicyDecision, ToolPolicy};
+use crate::openhuman::tools::traits::ToolScope;
+use crate::openhuman::tools::Tool;
+
+use super::super::credentials::scrub_credentials;
+
+/// Outcome of a single tool call. `text` is what should be fed back to the
+/// model (a result body, an error, or a denial reason); `success` is `false`
+/// for any non-OK outcome (policy/approval denial, scope rejection, timeout,
+/// tool error, unknown tool) so the caller's circuit breaker and history
+/// formatting can treat every failure mode uniformly.
+pub(crate) struct ToolRunResult {
+    pub text: String,
+    pub success: bool,
+}
+
+/// Execute one parsed tool call end-to-end. See the module docs for the full
+/// lifecycle. `tool_opt` is the (already visibility-filtered) tool the caller
+/// resolved by name — `None` means the model requested an unknown/filtered-out
+/// tool, which is reported as a structured error the LLM can correct next turn.
+///
+/// `progress_call_id` is the stable id threaded through the start/complete
+/// event pair (and any preceding args-delta events) so consumers can reconcile
+/// tool rows by id.
+pub(crate) async fn run_one_tool(
+    tool_opt: Option<&dyn Tool>,
+    call: &ParsedToolCall,
+    iteration: usize,
+    progress: &dyn ProgressReporter,
+    tool_policy: &dyn ToolPolicy,
+    payload_summarizer: Option<&dyn PayloadSummarizer>,
+    progress_call_id: &str,
+) -> ToolRunResult {
+    let iteration_u32 = (iteration + 1) as u32;
+
+    // Emit a "tool started" event for every parsed call, even ones that will be
+    // rejected below (approval denied, CliRpcOnly, unknown) — the client-side
+    // row was created from the streamed args and needs a terminal event.
+    progress
+        .tool_started(progress_call_id, &call.name, &call.arguments, iteration_u32)
+        .await;
+
+    // Helper: emit a failed "tool completed" event for an early-exit path
+    // (denied / CliRpcOnly / unknown) so the client row flips to `error`
+    // instead of staying running.
+    let emit_failed_completion = |message: &str| {
+        let output_chars = message.chars().count();
+        async move {
+            progress
+                .tool_completed(
+                    progress_call_id,
+                    &call.name,
+                    false,
+                    output_chars,
+                    0,
+                    iteration_u32,
+                )
+                .await;
+        }
+    };
+
+    // ── Tool policy check (#2131) ─────────────────
+    // Evaluate the pluggable ToolPolicy before any approval or execution. If
+    // the policy denies the call, skip everything (including approval
+    // side-effects) and return the denial reason as a tool error to the model.
+    if let PolicyDecision::Deny(reason) = tool_policy.evaluate(&call.name, &call.arguments) {
+        tracing::debug!(
+            iteration,
+            tool = call.name.as_str(),
+            reason = %reason,
+            "[agent_loop] tool policy denied tool call"
+        );
+        let denied = format!("Tool '{}' denied by policy: {reason}", call.name);
+        emit_failed_completion(&denied).await;
+        return ToolRunResult {
+            text: denied,
+            success: false,
+        };
+    }
+
+    let Some(tool) = tool_opt else {
+        tracing::warn!(
+            iteration,
+            tool = call.name.as_str(),
+            "[agent_loop] unknown tool requested"
+        );
+        let msg = format!("Unknown tool: {}", call.name);
+        emit_failed_completion(&msg).await;
+        return ToolRunResult {
+            text: msg,
+            success: false,
+        };
+    };
+
+    tracing::debug!(
+        iteration,
+        tool = call.name.as_str(),
+        found = true,
+        "[agent_loop] executing tool"
+    );
+
+    // Scope check: CliRpcOnly tools cannot run in the autonomous agent loop.
+    if tool.scope() == ToolScope::CliRpcOnly {
+        tracing::warn!(
+            iteration,
+            tool = call.name.as_str(),
+            "[agent_loop] tool scope is CliRpcOnly — denied in agent loop"
+        );
+        let denied = format!(
+            "Tool '{}' is only available via explicit CLI/RPC invocation, not in the autonomous agent loop.",
+            call.name
+        );
+        emit_failed_completion(&denied).await;
+        return ToolRunResult {
+            text: denied,
+            success: false,
+        };
+    }
+
+    // ── External-effect approval gate (#1339, #2135) ──
+    // Tools whose `external_effect()` returns true route through the
+    // process-global `ApprovalGate` so the UI can prompt the user before
+    // `execute()` runs. The gate is `None` when supervised mode is disabled or
+    // in test envs — behavior matches the pre-#1339 path.
+    //
+    // `approval_request_id` carries the persisted row id forward so we can
+    // stamp the terminal execution outcome onto the same `pending_approvals`
+    // row after the tool finishes (issue #2135). `None` means the tool was
+    // either not gated, was session-allowlist-shortcutted, or was denied —
+    // none of which produce an audit row that needs an "after" entry.
+    let mut approval_request_id: Option<String> = None;
+    let mut approval_gate_for_audit: Option<
+        std::sync::Arc<crate::openhuman::approval::ApprovalGate>,
+    > = None;
+    if tool.external_effect_with_args(&call.arguments) {
+        if let Some(gate) = crate::openhuman::approval::ApprovalGate::try_global() {
+            let summary = crate::openhuman::approval::summarize_action(&call.name, &call.arguments);
+            let redacted = crate::openhuman::approval::redact_args(&call.arguments);
+            let (outcome, request_id) =
+                gate.intercept_audited(&call.name, &summary, redacted).await;
+            match outcome {
+                crate::openhuman::approval::GateOutcome::Allow => {
+                    approval_request_id = request_id;
+                    if approval_request_id.is_some() {
+                        approval_gate_for_audit = Some(gate);
+                    }
+                }
+                crate::openhuman::approval::GateOutcome::Deny { reason } => {
+                    tracing::warn!(
+                        iteration,
+                        tool = call.name.as_str(),
+                        reason = %reason,
+                        "[agent_loop] approval gate denied tool call"
+                    );
+                    emit_failed_completion(&reason).await;
+                    return ToolRunResult {
+                        text: reason,
+                        success: false,
+                    };
+                }
+            }
+        }
+    }
+
+    let tool_deadline = crate::openhuman::tool_timeout::tool_execution_timeout_duration();
+    let timeout_secs = crate::openhuman::tool_timeout::tool_execution_timeout_secs();
+    let tool_started = std::time::Instant::now();
+    let outcome = tokio::time::timeout(tool_deadline, tool.execute(call.arguments.clone())).await;
+    let elapsed_ms = tool_started.elapsed().as_millis() as u64;
+    let (result_text, success) = match outcome {
+        Ok(Ok(r)) => {
+            let output = r.output();
+            let success = !r.is_error;
+            if success {
+                tracing::debug!(
+                    iteration,
+                    tool = call.name.as_str(),
+                    output_len = output.len(),
+                    "[agent_loop] tool succeeded"
+                );
+                let mut scrubbed = scrub_credentials(&output);
+                let (compacted, tj_stats) = crate::openhuman::tokenjuice::compact_tool_output(
+                    &call.name,
+                    Some(&call.arguments),
+                    &scrubbed,
+                    Some(0),
+                );
+                if tj_stats.applied {
+                    log::debug!(
+                        "[agent_loop] tokenjuice applied tool={} rule={} {}->{} bytes",
+                        call.name,
+                        tj_stats.rule_id,
+                        tj_stats.original_bytes,
+                        tj_stats.compacted_bytes
+                    );
+                    scrubbed = compacted;
+                }
+
+                // Per-tool max_result_size_chars cap. When a tool sets it and
+                // the (post-tokenjuice) body still exceeds the cap, truncate
+                // here and skip the global payload summarizer for this call —
+                // the cap is fast and deterministic, the summarizer is the
+                // fallback for tools that don't know their own size budget.
+                let mut hit_per_tool_cap = false;
+                if let Some(cap) = tool.max_result_size_chars() {
+                    let char_count = scrubbed.chars().count();
+                    if char_count > cap {
+                        let truncated: String = scrubbed.chars().take(cap).collect();
+                        let dropped = char_count - cap;
+                        log::info!(
+                            "[agent_loop] per-tool cap applied tool={} cap_chars={} original_chars={} dropped_chars={}",
+                            call.name,
+                            cap,
+                            char_count,
+                            dropped,
+                        );
+                        scrubbed = format!(
+                            "{truncated}\n\n[truncated by tool cap: {dropped} more chars not shown]"
+                        );
+                        hit_per_tool_cap = true;
+                    }
+                }
+
+                if !hit_per_tool_cap {
+                    if let Some(summarizer) = payload_summarizer {
+                        log::debug!(
+                            "[agent_loop] payload_summarizer intercepting tool={} bytes={}",
+                            call.name,
+                            scrubbed.len()
+                        );
+                        match summarizer
+                            .maybe_summarize(&call.name, None, &scrubbed)
+                            .await
+                        {
+                            Ok(Some(payload)) => {
+                                log::info!(
+                                    "[agent_loop] payload_summarizer compressed tool={} {}->{} bytes",
+                                    call.name,
+                                    payload.original_bytes,
+                                    payload.summary_bytes
+                                );
+                                scrubbed = payload.summary;
+                            }
+                            Ok(None) => {
+                                log::debug!(
+                                    "[agent_loop] payload_summarizer pass-through tool={} bytes={}",
+                                    call.name,
+                                    scrubbed.len()
+                                );
+                            }
+                            Err(e) => {
+                                log::warn!(
+                                    "[agent_loop] payload_summarizer error tool={} err={} (passing raw payload through)",
+                                    call.name,
+                                    e
+                                );
+                            }
+                        }
+                    }
+                }
+                (scrubbed, true)
+            } else {
+                // Scrub before logging — a failing tool payload can carry
+                // credentials / PII, so never log the raw output.
+                let scrubbed = scrub_credentials(&output);
+                tracing::warn!(
+                    iteration,
+                    tool = call.name.as_str(),
+                    "[agent_loop] tool returned error: {scrubbed}"
+                );
+                let (compacted, _) = crate::openhuman::tokenjuice::compact_tool_output(
+                    &call.name,
+                    Some(&call.arguments),
+                    &scrubbed,
+                    Some(1),
+                );
+                (format!("Error: {compacted}"), false)
+            }
+        }
+        Ok(Err(e)) => {
+            crate::core::observability::report_error(
+                &e,
+                "tool",
+                "execute",
+                &[
+                    ("tool", call.name.as_str()),
+                    ("outcome", "failed"),
+                    ("iteration", &(iteration + 1).to_string()),
+                ],
+            );
+            (format!("Error executing {}: {e}", call.name), false)
+        }
+        Err(_) => {
+            let msg = format!(
+                "tool '{}' timed out after {} seconds",
+                call.name, timeout_secs
+            );
+            crate::core::observability::report_error(
+                msg.as_str(),
+                "tool",
+                "execute",
+                &[
+                    ("tool", call.name.as_str()),
+                    ("outcome", "timeout"),
+                    ("timeout_secs", &timeout_secs.to_string()),
+                    ("iteration", &(iteration + 1).to_string()),
+                ],
+            );
+            (
+                format!(
+                    "Error: tool '{}' timed out after {} seconds",
+                    call.name, timeout_secs
+                ),
+                false,
+            )
+        }
+    };
+    progress
+        .tool_completed(
+            progress_call_id,
+            &call.name,
+            success,
+            result_text.chars().count(),
+            elapsed_ms,
+            iteration_u32,
+        )
+        .await;
+    // ── Approval audit after-action row (#2135) ────
+    // Stamp the terminal status onto the same `pending_approvals` row the gate
+    // created before execution, so the audit trail carries both the before
+    // (approval) and after (executed_at + outcome). Best-effort: a write
+    // failure here is logged but not propagated to the agent.
+    if let (Some(gate), Some(req_id)) = (
+        approval_gate_for_audit.as_ref(),
+        approval_request_id.as_ref(),
+    ) {
+        let exec_outcome = if success {
+            crate::openhuman::approval::ExecutionOutcome::Success
+        } else {
+            crate::openhuman::approval::ExecutionOutcome::Failure
+        };
+        let err_text = if success {
+            None
+        } else {
+            Some(result_text.as_str())
+        };
+        gate.record_execution(req_id, exec_outcome, err_text);
+    }
+
+    ToolRunResult {
+        text: result_text,
+        success,
+    }
+}
diff --git a/src/openhuman/agent/harness/mod.rs b/src/openhuman/agent/harness/mod.rs
index 9d3b5bcc2e..1fee0268f7 100644
--- a/src/openhuman/agent/harness/mod.rs
+++ b/src/openhuman/agent/harness/mod.rs
@@ -24,6 +24,7 @@ pub(crate) mod builtin_definitions;
 mod credentials;
 pub mod definition;
 pub(crate) mod definition_loader;
+pub(crate) mod engine;
 pub mod fork_context;
 mod instructions;
 pub mod interrupt;
diff --git a/src/openhuman/agent/harness/session/agent_tool_exec.rs b/src/openhuman/agent/harness/session/agent_tool_exec.rs
new file mode 100644
index 0000000000..b4e10cfd11
--- /dev/null
+++ b/src/openhuman/agent/harness/session/agent_tool_exec.rs
@@ -0,0 +1,283 @@
+//! The Agent's per-call tool executor, extracted as a free function so both
+//! [`super::types::Agent::execute_tool_call`] and the turn engine's
+//! `AgentToolSource` run the exact same path (visibility gate → session policy
+//! → per-call permission → pluggable `ToolPolicy` → `execute_with_options` +
+//! payload summarizer → per-result byte budget), without one borrowing the
+//! `Agent` while the turn observer borrows it mutably.
+//!
+//! Progress is emitted through a [`ProgressReporter`] (the channel/web flavor),
+//! matching the `Agent::turn` events 1:1.
+
+use std::collections::HashSet;
+
+use crate::core::event_bus::{publish_global, DomainEvent};
+use crate::openhuman::agent::dispatcher::{ParsedToolCall, ToolExecutionResult};
+use crate::openhuman::agent::harness::engine::ProgressReporter;
+use crate::openhuman::agent::harness::payload_summarizer::PayloadSummarizer;
+use crate::openhuman::agent::hooks::{self, ToolCallRecord};
+use crate::openhuman::agent::tool_policy::{
+    ToolCallContext, ToolPolicy, ToolPolicyDecision, ToolPolicyRequest,
+};
+use crate::openhuman::agent_tool_policy::ToolPolicySession;
+use crate::openhuman::tools::{Tool, ToolCallOptions};
+use crate::openhuman::util::truncate_with_ellipsis;
+
+/// Read-only context the Agent tool executor needs, captured up front so it
+/// never borrows the `Agent` (whose history/context the turn observer mutates).
+pub(super) struct AgentToolExecCtx<'a> {
+    pub tools: &'a [Box<dyn Tool>],
+    pub visible_tool_names: &'a HashSet<String>,
+    pub tool_policy_session: &'a ToolPolicySession,
+    pub tool_policy: &'a dyn ToolPolicy,
+    pub payload_summarizer: Option<&'a dyn PayloadSummarizer>,
+    pub event_session_id: &'a str,
+    pub event_channel: &'a str,
+    pub agent_definition_id: &'a str,
+    pub prefer_markdown: bool,
+    pub budget_bytes: usize,
+}
+
+/// Execute one parsed tool call end-to-end with the Agent's semantics, emitting
+/// `ToolCallStarted` / `ToolCallCompleted` through `progress`. Returns the
+/// result (for history formatting) + the call record (for post-turn hooks).
+pub(super) async fn run_agent_tool_call(
+    ctx: &AgentToolExecCtx<'_>,
+    progress: &dyn ProgressReporter,
+    call: &ParsedToolCall,
+    iteration: usize,
+) -> (ToolExecutionResult, ToolCallRecord) {
+    let started = std::time::Instant::now();
+    publish_global(DomainEvent::ToolExecutionStarted {
+        tool_name: call.name.clone(),
+        session_id: ctx.event_session_id.to_string(),
+    });
+    // Synthesise a fallback id for prompt-guided (non-native) tool calls so
+    // downstream consumers always have a stable key to reconcile rows by.
+    let call_id = call.tool_call_id.clone().unwrap_or_else(|| {
+        format!(
+            "turn-{iteration}-{}-{}",
+            call.name,
+            uuid::Uuid::new_v4().simple()
+        )
+    });
+    progress
+        .tool_started(
+            &call_id,
+            &call.name,
+            &call.arguments,
+            (iteration + 1) as u32,
+        )
+        .await;
+    log::info!("[agent] executing tool: {}", call.name);
+
+    let (raw_result, success) = if !ctx.visible_tool_names.is_empty()
+        && !ctx.visible_tool_names.contains(&call.name)
+    {
+        log::warn!(
+            "[agent] blocked tool call '{}' — not in visible tool set",
+            call.name
+        );
+        (
+            format!("Tool '{}' is not available to this agent", call.name),
+            false,
+        )
+    } else if let Some(tool) = ctx.tools.iter().find(|t| t.name() == call.name) {
+        let session_decision = ctx.tool_policy_session.decision_for(&call.name);
+        if session_decision.is_denied() {
+            let required = session_decision
+                .required_permission
+                .map(|permission| permission.to_string())
+                .unwrap_or_else(|| "unknown".to_string());
+            (
+                format!(
+                    "Tool '{}' blocked by tool policy: requires {}, channel '{}' allows {}",
+                    call.name, required, ctx.event_channel, session_decision.allowed_permission
+                ),
+                false,
+            )
+        } else {
+            let call_required = tool.permission_level_with_args(&call.arguments);
+            if call_required > session_decision.allowed_permission {
+                tracing::debug!(
+                    tool = call.name.as_str(),
+                    call_required = %call_required,
+                    allowed = %session_decision.allowed_permission,
+                    "[agent_loop] tool action blocked by per-call permission check"
+                );
+                (
+                    format!(
+                        "Tool '{}' action requires {} permission, channel '{}' allows {}",
+                        call.name,
+                        call_required,
+                        ctx.event_channel,
+                        session_decision.allowed_permission
+                    ),
+                    false,
+                )
+            } else {
+                let context = ToolCallContext::session(
+                    ctx.event_session_id,
+                    ctx.event_channel,
+                    ctx.agent_definition_id.to_string(),
+                    call_id.clone(),
+                    (iteration + 1) as u32,
+                );
+                let mut policy_request =
+                    ToolPolicyRequest::new(call.name.clone(), call.arguments.clone(), context);
+                if let Some(generated_context) = tool.generated_runtime_context(&call.arguments) {
+                    policy_request = policy_request.with_generated_tool_context(generated_context);
+                }
+                let policy_decision = ctx.tool_policy.check(&policy_request).await;
+                if let Some(reason) = policy_decision.blocking_reason() {
+                    let blocked_action = match &policy_decision {
+                        ToolPolicyDecision::RequireApproval { .. } => "requires approval",
+                        ToolPolicyDecision::Deny { .. } => "denied",
+                        ToolPolicyDecision::Allow => "allowed",
+                    };
+                    crate::openhuman::tool_registry::denials::record(
+                        call.name.as_str(),
+                        ctx.tool_policy.name(),
+                        blocked_action,
+                        reason,
+                    );
+                    tracing::debug!(
+                        tool = call.name.as_str(),
+                        policy = ctx.tool_policy.name(),
+                        action = blocked_action,
+                        reason = %reason,
+                        "[agent_loop] tool blocked by policy"
+                    );
+                    (
+                        format!(
+                            "Tool '{}' {blocked_action} by policy '{}': {reason}",
+                            call.name,
+                            ctx.tool_policy.name()
+                        ),
+                        false,
+                    )
+                } else {
+                    let options = ToolCallOptions {
+                        prefer_markdown: ctx.prefer_markdown,
+                    };
+                    let outcome = tool
+                        .execute_with_options(call.arguments.clone(), options)
+                        .await;
+                    match outcome {
+                        Ok(r) => {
+                            if !r.is_error {
+                                let mut output = r.output_for_llm(ctx.prefer_markdown);
+                                if ctx.prefer_markdown && r.markdown_formatted.is_some() {
+                                    log::debug!(
+                                        "[agent_loop] tool={} returned markdown payload bytes={}",
+                                        call.name,
+                                        output.len()
+                                    );
+                                }
+                                if let Some(ps) = ctx.payload_summarizer {
+                                    log::debug!(
+                                        "[agent_loop] payload_summarizer intercepting tool={} bytes={}",
+                                        call.name,
+                                        output.len()
+                                    );
+                                    match ps.maybe_summarize(&call.name, None, &output).await {
+                                        Ok(Some(payload)) => {
+                                            log::info!(
+                                                "[agent_loop] payload_summarizer compressed tool={} {}->{} bytes",
+                                                call.name,
+                                                payload.original_bytes,
+                                                payload.summary_bytes
+                                            );
+                                            output = payload.summary;
+                                        }
+                                        Ok(None) => {
+                                            log::debug!(
+                                                "[agent_loop] payload_summarizer pass-through tool={} bytes={}",
+                                                call.name,
+                                                output.len()
+                                            );
+                                        }
+                                        Err(e) => {
+                                            log::warn!(
+                                                "[agent_loop] payload_summarizer error tool={} err={} (passing raw payload through)",
+                                                call.name,
+                                                e
+                                            );
+                                        }
+                                    }
+                                }
+                                (output, true)
+                            } else {
+                                (
+                                    format!("Error: {}", r.output_for_llm(ctx.prefer_markdown)),
+                                    false,
+                                )
+                            }
+                        }
+                        Err(e) => (format!("Error executing {}: {e}", call.name), false),
+                    }
+                }
+            }
+        }
+    } else {
+        (format!("Unknown tool: {}", call.name), false)
+    };
+
+    // Per-result byte budget — the only cache-safe reduction stage (the
+    // truncated body has never been sent to the backend).
+    let (result, budget_outcome) =
+        crate::openhuman::context::apply_tool_result_budget(raw_result, ctx.budget_bytes);
+    if budget_outcome.truncated {
+        log::info!(
+            "[agent_loop] tool_result_budget applied name={} original_bytes={} final_bytes={} dropped_bytes={}",
+            call.name,
+            budget_outcome.original_bytes,
+            budget_outcome.final_bytes,
+            budget_outcome.original_bytes - budget_outcome.final_bytes
+        );
+    }
+
+    let elapsed_ms = started.elapsed().as_millis() as u64;
+    publish_global(DomainEvent::ToolExecutionCompleted {
+        tool_name: call.name.clone(),
+        session_id: ctx.event_session_id.to_string(),
+        success,
+        elapsed_ms,
+    });
+    progress
+        .tool_completed(
+            &call_id,
+            &call.name,
+            success,
+            result.chars().count(),
+            elapsed_ms,
+            (iteration + 1) as u32,
+        )
+        .await;
+    log::info!(
+        "[agent] tool completed: {} success={} elapsed_ms={}",
+        call.name,
+        success,
+        elapsed_ms
+    );
+    log::debug!(
+        "[agent] tool output for {}: {}",
+        call.name,
+        truncate_with_ellipsis(&result, 500)
+    );
+
+    let output_summary = hooks::sanitize_tool_output(&result, &call.name, success);
+    let record = ToolCallRecord {
+        name: call.name.clone(),
+        arguments: call.arguments.clone(),
+        success,
+        output_summary,
+        duration_ms: elapsed_ms,
+    };
+    let exec_result = ToolExecutionResult {
+        name: call.name.clone(),
+        output: result,
+        success,
+        tool_call_id: call.tool_call_id.clone(),
+    };
+    (exec_result, record)
+}
diff --git a/src/openhuman/agent/harness/session/builder.rs b/src/openhuman/agent/harness/session/builder.rs
index 4897e325f6..9c13ead4ca 100644
--- a/src/openhuman/agent/harness/session/builder.rs
+++ b/src/openhuman/agent/harness/session/builder.rs
@@ -546,9 +546,10 @@ impl AgentBuilder {
             memory: self
                 .memory
                 .ok_or_else(|| anyhow::anyhow!("memory is required"))?,
-            tool_dispatcher: self
-                .tool_dispatcher
-                .ok_or_else(|| anyhow::anyhow!("tool_dispatcher is required"))?,
+            tool_dispatcher: std::sync::Arc::from(
+                self.tool_dispatcher
+                    .ok_or_else(|| anyhow::anyhow!("tool_dispatcher is required"))?,
+            ),
             memory_loader: self
                 .memory_loader
                 .unwrap_or_else(|| Box::new(DefaultMemoryLoader::default())),
diff --git a/src/openhuman/agent/harness/session/mod.rs b/src/openhuman/agent/harness/session/mod.rs
index 16d9ec3c55..4a0b3697e9 100644
--- a/src/openhuman/agent/harness/session/mod.rs
+++ b/src/openhuman/agent/harness/session/mod.rs
@@ -20,11 +20,14 @@
 //! `crate::openhuman::agent`, which re-exports them from this module.
 //! The child files are an implementation detail.
 
+mod agent_tool_exec;
 mod builder;
 pub mod migration;
 mod runtime;
 pub(crate) mod transcript;
 mod turn;
+mod turn_checkpoint;
+mod turn_engine_adapter;
 mod types;
 
 pub use migration::{migrate_session_layout_if_needed, MigrationOutcome};
diff --git a/src/openhuman/agent/harness/session/turn.rs b/src/openhuman/agent/harness/session/turn.rs
index e65b070e56..121816df57 100644
--- a/src/openhuman/agent/harness/session/turn.rs
+++ b/src/openhuman/agent/harness/session/turn.rs
@@ -18,17 +18,13 @@
 //!   background archivist fork.
 
 use super::transcript;
+use super::turn_engine_adapter::{AgentCheckpoint, AgentObserver, AgentToolSource};
 use super::types::Agent;
-use crate::core::event_bus::{publish_global, DomainEvent};
 use crate::openhuman::agent::dispatcher::{ParsedToolCall, ToolExecutionResult};
-use crate::openhuman::agent::error::AgentError;
 use crate::openhuman::agent::harness;
 use crate::openhuman::agent::hooks::{self, ToolCallRecord, TurnContext};
 use crate::openhuman::agent::memory_loader::collect_recall_citations;
 use crate::openhuman::agent::progress::AgentProgress;
-use crate::openhuman::agent::tool_policy::{
-    ToolCallContext, ToolPolicyDecision, ToolPolicyRequest,
-};
 use crate::openhuman::agent_experience::{
     prepend_experience_block, render_experience_hits, AgentExperienceStore, ExperienceQuery,
 };
@@ -36,19 +32,14 @@ use crate::openhuman::agent_tool_policy::render_tool_policy_boundary;
 use crate::openhuman::context::prompt::{
     LearnedContextData, NamespaceSummary, PromptContext, PromptTool,
 };
-use crate::openhuman::context::{ReductionOutcome, ARCHIVIST_EXTRACTION_PROMPT};
-use crate::openhuman::inference::model_context::context_window_for_model;
+use crate::openhuman::context::ARCHIVIST_EXTRACTION_PROMPT;
 use crate::openhuman::inference::provider::{
     ChatMessage, ChatRequest, ConversationMessage, ProviderDelta, UsageInfo,
 };
 use crate::openhuman::memory::MemoryCategory;
-use crate::openhuman::tools::traits::ToolCallOptions;
 use crate::openhuman::tools::Tool;
 use crate::openhuman::util::truncate_with_ellipsis;
 
-use crate::openhuman::agent::harness::token_budget::{
-    trim_chat_messages_to_budget, trim_conversation_history_to_budget,
-};
 use anyhow::Result;
 use std::hash::{Hash, Hasher};
 use std::sync::Arc;
@@ -62,12 +53,7 @@ use std::sync::Arc;
 /// detect those at the `ChatMessage` boundary (where `bound_cached_transcript_messages`
 /// operates) we have to peek inside the JSON. See TAURI-RUST-7 for the
 /// failure mode this guards against.
-#[path = "turn_checkpoint.rs"]
-mod turn_checkpoint;
-use turn_checkpoint::{
-    assistant_message_has_tool_calls, build_deterministic_checkpoint,
-    MAX_ITER_CHECKPOINT_INSTRUCTION,
-};
+use super::turn_checkpoint::{assistant_message_has_tool_calls, MAX_ITER_CHECKPOINT_INSTRUCTION};
 
 impl Agent {
     /// Executes a single interaction "turn" with the agent.
@@ -459,704 +445,132 @@ impl Agent {
         // background archivist fork at end-of-turn.
         self.context.tick_turn();
 
-        // Collect tool call records across all iterations for post-turn hooks
-        let mut all_tool_records: Vec<ToolCallRecord> = Vec::new();
-
-        // Trim-robust digest of THIS turn's tool calls + results, compiled as
-        // the loop runs. Used as the *only* context for the max-iteration
-        // checkpoint summary, so it compiles "what I did this turn" without
-        // the prior conversation or system prompt bleeding in — and it's
-        // immune to history trimming (which drops/reorders from the front).
-        // The persisted transcript is unaffected (bug-report-2026-05-26 A1).
-        // Bounded: each entry truncates the result to 800 chars, so at the
-        // default 10-iteration cap the digest is ~8 KB — revisit if
-        // `max_tool_iterations` is raised substantially.
-        let mut turn_tool_digest = String::new();
-
-        // Capture the last `Vec<ChatMessage>` sent to the provider so we
-        // can persist it as a session transcript after the turn completes.
-        let mut last_provider_messages: Option<Vec<ChatMessage>> = None;
-
-        // Accumulate usage stats across iterations for the transcript.
-        let mut cumulative_input_tokens: u64 = 0;
-        let mut cumulative_output_tokens: u64 = 0;
-        let mut cumulative_cached_input_tokens: u64 = 0;
-        let mut cumulative_charged_usd: f64 = 0.0;
-
-        // Per-turn usage from the final provider response, attached to the
-        // last assistant message in the persisted transcript.
-        let mut last_turn_usage: Option<transcript::TurnUsage> = None;
-
         let turn_body = async {
-            for iteration in 0..self.config.max_tool_iterations {
-                self.emit_progress(AgentProgress::IterationStarted {
-                    iteration: (iteration + 1) as u32,
-                    max_iterations: self.config.max_tool_iterations as u32,
-                })
-                .await;
-                log::info!(
-                    "[agent_loop] iteration start i={} history_len={}",
-                    iteration + 1,
-                    self.history.len()
-                );
-
-                if let Some(context_window) = context_window_for_model(&effective_model) {
-                    let budget_outcome =
-                        trim_conversation_history_to_budget(&mut self.history, context_window);
-                    if budget_outcome.trimmed {
-                        log::warn!(
-                            "[agent_loop] pre-dispatch history trimmed model={} context_window={} original_tokens={} final_tokens={} messages_removed={}",
-                            effective_model,
-                            context_window,
-                            budget_outcome.original_tokens,
-                            budget_outcome.final_tokens,
-                            budget_outcome.messages_removed
-                        );
-                    }
-                }
-
-                // Global context management: run the reduction chain
-                // before every provider hit. Cheap when the guard is
-                // healthy; executes the summarizer LLM call
-                // internally when the pipeline asks for autocompaction
-                // (summarization, microcompact, and the circuit
-                // breaker all live inside [`ContextManager`]).
-                let outcome = self.context.reduce_before_call(&mut self.history).await?;
-                match &outcome {
-                    ReductionOutcome::NoOp => {}
-                    ReductionOutcome::Microcompacted {
-                        envelopes_cleared,
-                        entries_cleared,
-                        bytes_freed,
-                    } => {
-                        log::info!(
-                            "[agent_loop] context microcompact i={} envelopes={} entries={} bytes_freed={}",
-                            iteration + 1,
-                            envelopes_cleared,
-                            entries_cleared,
-                            bytes_freed
-                        );
-                    }
-                    ReductionOutcome::Summarized(stats) => {
-                        log::info!(
-                            "[agent_loop] context autocompact summarized i={} messages_removed={} approx_tokens_freed={} summary_chars={}",
-                            iteration + 1,
-                            stats.messages_removed,
-                            stats.approx_tokens_freed,
-                            stats.summary_chars
-                        );
-                    }
-                    ReductionOutcome::SummarizationFailed {
-                        utilisation_pct,
-                        reason,
-                    } => {
-                        log::warn!(
-                            "[agent_loop] context summarizer failed i={} utilisation_pct={} reason={}",
-                            iteration + 1,
-                            utilisation_pct,
-                            reason
-                        );
-                    }
-                    ReductionOutcome::NotAttempted { utilisation_pct } => {
-                        log::warn!(
-                            "[agent_loop] context autocompact disabled in config i={} utilisation_pct={}",
-                            iteration + 1,
-                            utilisation_pct
-                        );
-                    }
-                    ReductionOutcome::Exhausted {
-                        utilisation_pct,
-                        reason,
-                    } => {
-                        log::error!(
-                            "[agent_loop] context exhausted i={} utilisation_pct={} reason={}",
-                            iteration + 1,
-                            utilisation_pct,
-                            reason
-                        );
-                        return Err(anyhow::anyhow!(
-                            "Context window exhausted ({utilisation_pct}% full): {reason}"
-                        ));
-                    }
-                }
-
-                // Use cached transcript messages on the first iteration of
-                // a resumed session to provide a byte-identical prefix for
-                // KV cache reuse. After `.take()` the cache is consumed;
-                // subsequent iterations rebuild from history normally.
-                let mut messages = if let Some(mut cached) = self.cached_transcript_messages.take()
-                {
-                    // Append only the delta (new user message) from the
-                    // end of the current history.
-                    let new_tail = self.tool_dispatcher.to_provider_messages(
-                        &self.history[self.history.len().saturating_sub(1)..],
-                    );
-                    cached.extend(new_tail);
-                    log::info!(
-                        "[transcript] resumed from cached transcript prefix_len={} new_tail={}",
-                        cached.len() - 1,
-                        1
-                    );
-                    cached
-                } else {
-                    self.tool_dispatcher.to_provider_messages(&self.history)
-                };
-                if let Some(context_window) = context_window_for_model(&effective_model) {
-                    let budget_outcome =
-                        trim_chat_messages_to_budget(&mut messages, context_window);
-                    if budget_outcome.trimmed {
-                        log::warn!(
-                            "[agent_loop] pre-dispatch provider messages trimmed model={} context_window={} original_tokens={} final_tokens={} messages_removed={}",
-                            effective_model,
-                            context_window,
-                            budget_outcome.original_tokens,
-                            budget_outcome.final_tokens,
-                            budget_outcome.messages_removed
-                        );
-                    }
-                }
-
-                last_provider_messages = Some(messages.clone());
-
-                log::info!(
-                    "[agent] iteration {}/{} — sending request to provider model={}",
-                    iteration + 1,
-                    self.config.max_tool_iterations,
-                    effective_model
-                );
-                log::info!(
-                    "[agent_loop] provider request i={} messages={} send_tool_specs={}",
-                    iteration + 1,
-                    messages.len(),
-                    self.tool_dispatcher.should_send_tool_specs()
-                );
-                let provider_started = std::time::Instant::now();
-                // Only set up the streaming sink when someone is
-                // listening for progress events. Without a listener the
-                // channel buffer would fill up and back-pressure the
-                // provider; skipping it also keeps the non-streaming
-                // HTTP path alive for providers that don't implement
-                // SSE.
-                let iteration_for_stream = (iteration + 1) as u32;
-                let (delta_tx_opt, delta_forwarder) = if self.on_progress.is_some() {
-                    let (tx, mut rx) = tokio::sync::mpsc::channel::<ProviderDelta>(128);
-                    let progress_tx = self.on_progress.clone();
-                    let forwarder = tokio::spawn(async move {
-                        while let Some(event) = rx.recv().await {
-                            let Some(ref sink) = progress_tx else {
-                                continue;
-                            };
-                            let mapped = match event {
-                                ProviderDelta::TextDelta { delta } => AgentProgress::TextDelta {
-                                    delta,
-                                    iteration: iteration_for_stream,
-                                },
-                                ProviderDelta::ThinkingDelta { delta } => {
-                                    AgentProgress::ThinkingDelta {
-                                        delta,
-                                        iteration: iteration_for_stream,
-                                    }
-                                }
-                                ProviderDelta::ToolCallStart { call_id, tool_name } => {
-                                    AgentProgress::ToolCallArgsDelta {
-                                        call_id,
-                                        tool_name,
-                                        delta: String::new(),
-                                        iteration: iteration_for_stream,
-                                    }
-                                }
-                                ProviderDelta::ToolCallArgsDelta { call_id, delta } => {
-                                    AgentProgress::ToolCallArgsDelta {
-                                        call_id,
-                                        tool_name: String::new(),
-                                        delta,
-                                        iteration: iteration_for_stream,
-                                    }
-                                }
-                            };
-                            // Await backpressure so streamed deltas arrive
-                            // in order and aren't silently dropped when the
-                            // downstream progress bridge is slow.
-                            if sink.send(mapped).await.is_err() {
-                                break;
-                            }
-                        }
-                    });
-                    (Some(tx), Some(forwarder))
-                } else {
-                    (None, None)
-                };
-                let response = match self
-                    .provider
-                    .chat(
-                        ChatRequest {
-                            messages: &messages,
-                            tools: if self.tool_dispatcher.should_send_tool_specs() {
-                                Some(self.visible_tool_specs.as_slice())
-                            } else {
-                                None
-                            },
-                            stream: delta_tx_opt.as_ref(),
-                        },
-                        &effective_model,
-                        self.temperature,
-                    )
-                    .await
-                {
-                    Ok(resp) => {
-                        log::info!(
-                            "[agent_loop] provider response i={} elapsed_ms={} text_chars={} native_tool_calls={}",
-                            iteration + 1,
-                            provider_started.elapsed().as_millis(),
-                            resp.text.as_ref().map_or(0, |t| t.chars().count()),
-                            resp.tool_calls.len()
-                        );
-                        log::debug!("[agent_loop] provider response: {resp:?}");
-                        // Feed the context manager (guard +
-                        // session-memory token accounting). No-op when
-                        // the provider doesn't return usage.
-                        if let Some(ref usage) = resp.usage {
-                            self.context.record_usage(usage);
-                            // Feed the dashboard tracker. This always records
-                            // (model + usage) when the process-global tracker
-                            // is available — independent of `cost.enabled`,
-                            // which gates budget enforcement only. The call
-                            // is a no-op only when `init_global` has not yet
-                            // run (before bootstrap) or failed; errors are
-                            // logged and swallowed so cost telemetry never
-                            // breaks a turn.
-                            crate::openhuman::cost::record_provider_usage(&effective_model, usage);
-                            cumulative_input_tokens += usage.input_tokens;
-                            cumulative_output_tokens += usage.output_tokens;
-                            cumulative_cached_input_tokens += usage.cached_input_tokens;
-                            cumulative_charged_usd += usage.charged_amount_usd;
-                            // Snapshot this turn's usage so the transcript
-                            // writer can attribute it to the last assistant
-                            // message.
-                            last_turn_usage = Some(transcript::TurnUsage {
-                                model: effective_model.clone(),
-                                usage: transcript::MessageUsage {
-                                    input: usage.input_tokens,
-                                    output: usage.output_tokens,
-                                    cached_input: usage.cached_input_tokens,
-                                    cost_usd: usage.charged_amount_usd,
-                                },
-                                ts: chrono::Utc::now().to_rfc3339(),
-                            });
-                        } else {
-                            // Missing usage on this iteration: clear any
-                            // snapshot carried from a prior iteration so
-                            // the transcript doesn't attribute stale
-                            // numbers to the final assistant message.
-                            last_turn_usage = None;
-                        }
-                        resp
-                    }
-                    Err(err) => {
-                        drop(delta_tx_opt);
-                        if let Some(handle) = delta_forwarder {
-                            let _ = handle.await;
-                        }
-                        return Err(err);
-                    }
-                };
-                drop(delta_tx_opt);
-                if let Some(handle) = delta_forwarder {
-                    let _ = handle.await;
-                }
-
-                let (text, calls) = self.tool_dispatcher.parse_response(&response);
-                let calls = Self::with_fallback_tool_call_ids(calls, iteration);
-                log::info!(
-                    "[agent] provider responded — parsed tool_calls={} text_chars={}",
-                    calls.len(),
-                    text.chars().count()
-                );
-                log::info!(
-                    "[agent_loop] parsed response i={} parsed_text_chars={} parsed_tool_calls={}",
-                    iteration + 1,
-                    text.chars().count(),
-                    calls.len()
-                );
-                if calls.is_empty() {
-                    // Capture reasoning_content before response.text is moved.
-                    // Thinking models (DeepSeek-R1, Qwen3, GLM-4) return
-                    // chain-of-thought in this field; the API contract requires
-                    // it to be echoed back verbatim in subsequent turns or it
-                    // returns HTTP 400. We stash it in extra_metadata so
-                    // convert_messages_for_native can include it when building
-                    // the next request's message list.
-                    let turn_reasoning_content = response.reasoning_content.clone();
-                    let final_text = if text.is_empty() {
-                        response.text.unwrap_or_default()
-                    } else {
-                        text
-                    };
-                    // Defense-in-depth (bug-report-2026-05-26 A1): a
-                    // completion with no text *and* no tool calls is never a
-                    // valid final answer — it's a degenerate/poisoned
-                    // response. Surfacing it as an error is visible; the old
-                    // behaviour returned `Ok("")`, which rendered as a blank
-                    // reply and silently wedged the thread.
-                    if final_text.trim().is_empty() {
-                        log::warn!(
-                            "[agent_loop] provider returned an empty final response (i={}, no text, no tool calls) — surfacing as error instead of a silent blank reply",
-                            iteration + 1
-                        );
-                        // Typed variant so `run_single` can route this
-                        // through `AgentError::skips_sentry()` and demote
-                        // to a `log::info!` instead of escalating to
-                        // Sentry (TAURI-RUST-4JX). The `Display` impl
-                        // still renders the canonical user-facing string
-                        // for UI surfaces, so the user behaviour is
-                        // unchanged.
-                        return Err(AgentError::EmptyProviderResponse {
-                            iteration: iteration + 1,
-                        }
-                        .into());
-                    }
-                    log::info!(
-                        "[agent] no tool calls — returning final response after {} iteration(s)",
-                        iteration + 1
-                    );
-                    log::info!(
-                        "[agent_loop] final response i={} final_chars={} has_reasoning_content={}",
-                        iteration + 1,
-                        final_text.chars().count(),
-                        turn_reasoning_content.is_some()
-                    );
-
-                    self.emit_progress(AgentProgress::TurnCompleted {
-                        iterations: (iteration + 1) as u32,
-                    })
-                    .await;
-
-                    let mut assistant_msg = ChatMessage::assistant(final_text.clone());
-                    if let Some(rc) = turn_reasoning_content {
-                        // Store reasoning_content in extra_metadata so it
-                        // survives in history and is passed back to the
-                        // provider on the next turn.
-                        assistant_msg.extra_metadata =
-                            Some(serde_json::json!({ "reasoning_content": rc }));
-                        log::debug!(
-                            "[agent_loop] stored reasoning_content in extra_metadata for next turn (chars={})",
-                            assistant_msg
-                                .extra_metadata
-                                .as_ref()
-                                .and_then(|m| m.get("reasoning_content"))
-                                .and_then(|v| v.as_str())
-                                .map_or(0, |s| s.chars().count())
-                        );
-                    }
-                    self.history.push(ConversationMessage::Chat(assistant_msg));
-                    self.trim_history();
-
-                    // Mirror the final assistant reply into the transcript
-                    // snapshot so the JSONL persisted below captures the
-                    // response (not just the prompt that was sent).
-                    if let Some(ref mut msgs) = last_provider_messages {
-                        msgs.push(ChatMessage::assistant(final_text.clone()));
-                    }
-
-                    // Persist the transcript **now** — right after the
-                    // provider response lands — so a crash during hooks
-                    // / memory-extraction / the outer epilogue can't
-                    // lose the assistant's reply.
-                    if let Some(ref messages) = last_provider_messages {
-                        self.persist_session_transcript(
-                            messages,
-                            cumulative_input_tokens,
-                            cumulative_output_tokens,
-                            cumulative_cached_input_tokens,
-                            cumulative_charged_usd,
-                            last_turn_usage.as_ref(),
-                        );
-                    }
-
-                    if self.auto_save {
-                        let summary = truncate_with_ellipsis(&final_text, 100);
-                        let _ = self
-                            .memory
-                            .store("", "assistant_resp", &summary, MemoryCategory::Daily, None)
-                            .await;
-                    }
-
-                    // Session-memory tool-call accounting. The actual
-                    // background extraction spawn happens *outside*
-                    // `turn_body` so the spawned task can take an owned
-                    // parent context without fighting the borrow
-                    // checker against `self`. We capture the decision
-                    // here and surface it via the manager's session
-                    // state — the epilogue (below) reads
-                    // `should_extract_session_memory()`.
-                    self.context.record_tool_calls(all_tool_records.len());
-
-                    // Fire post-turn hooks (non-blocking)
-                    if !self.post_turn_hooks.is_empty() {
-                        let ctx = TurnContext {
-                            user_message: user_message.to_string(),
-                            assistant_response: final_text.clone(),
-                            tool_calls: all_tool_records,
-                            turn_duration_ms: turn_started.elapsed().as_millis() as u64,
-                            session_id: Some(self.event_session_id.clone())
-                                .filter(|session_id| !session_id.trim().is_empty()),
-                            agent_id: Some(self.agent_definition_id.clone())
-                                .filter(|agent_id| !agent_id.trim().is_empty()),
-                            entrypoint: Some(self.event_channel.clone())
-                                .filter(|entrypoint| !entrypoint.trim().is_empty()),
-                            iteration_count: iteration + 1,
-                        };
-                        hooks::fire_hooks(&self.post_turn_hooks, ctx);
-                    }
-
-                    return Ok(final_text);
-                }
-
-                if !text.is_empty() {
-                    log::info!(
-                        "[agent_loop] assistant pre-tool text i={} chars={}",
-                        iteration + 1,
-                        text.chars().count()
-                    );
-                    // Push the assistant text into history; rendering is
-                    // the caller's responsibility (the CLI loop walks
-                    // `agent.history()` after each turn, sub-agents and
-                    // library consumers get whatever they need through
-                    // the returned value / history accessors).
-                    self.history
-                        .push(ConversationMessage::Chat(ChatMessage::assistant(
-                            text.clone(),
-                        )));
-                }
-                let tool_names: Vec<&str> = calls.iter().map(|call| call.name.as_str()).collect();
-                log::info!(
-                    "[agent] dispatching {} tool(s): {:?}",
-                    calls.len(),
-                    tool_names
-                );
-                log::info!(
-                    "[agent_loop] executing tools i={} names={:?}",
-                    iteration + 1,
-                    tool_names
-                );
-                let persisted_tool_calls =
-                    Self::persisted_tool_calls_for_history(&response, &calls, iteration);
-                log::info!(
-                    "[agent_loop] persisting assistant tool calls i={} persisted_tool_calls={} parsed_tool_calls={}",
-                    iteration + 1,
-                    persisted_tool_calls.len(),
-                    calls.len()
-                );
-                self.history.push(ConversationMessage::AssistantToolCalls {
-                    text: if text.is_empty() {
-                        None
-                    } else {
-                        Some(text.clone())
-                    },
-                    tool_calls: persisted_tool_calls,
-                    reasoning_content: response
-                        .reasoning_content
-                        .as_deref()
-                        .map(str::trim)
-                        .filter(|s| !s.is_empty())
-                        .map(ToString::to_string),
-                });
-
-                // Persist the transcript **right after** the provider
-                // response lands — before executing tools — so if the
-                // session crashes mid-tool-call we still have the
-                // assistant's response + tool-call intents on disk.
-                // Rebuild `last_provider_messages` from the current
-                // history so the snapshot includes whatever the
-                // assistant just emitted (plain text + tool calls).
-                last_provider_messages =
-                    Some(self.tool_dispatcher.to_provider_messages(&self.history));
-                if let Some(ref messages) = last_provider_messages {
-                    self.persist_session_transcript(
-                        messages,
-                        cumulative_input_tokens,
-                        cumulative_output_tokens,
-                        cumulative_cached_input_tokens,
-                        cumulative_charged_usd,
-                        last_turn_usage.as_ref(),
-                    );
-                }
-
-                let (results, records) = self.execute_tools(&calls, iteration).await;
-                all_tool_records.extend(records);
-                log::info!(
-                    "[agent_loop] tool results complete i={} result_count={}",
-                    iteration + 1,
-                    results.len()
-                );
-                for r in &results {
-                    log::info!(
-                        "[agent] tool response name={} success={} output_chars={}",
-                        r.name,
-                        r.success,
-                        r.output.chars().count(),
-                    );
-                    log::debug!(
-                        "[agent] tool response body name={}: {}",
-                        r.name,
-                        truncate_with_ellipsis(&r.output, 300)
-                    );
-                    // Record this call in the turn digest (output truncated to
-                    // bound size) for a possible max-iteration checkpoint.
-                    turn_tool_digest.push_str(&format!(
-                        "- {} [{}]: {}\n",
-                        r.name,
-                        if r.success { "ok" } else { "failed" },
-                        truncate_with_ellipsis(&r.output, 800)
-                    ));
-                }
-                log::info!(
-                    "[agent] all tools complete for iteration {} — looping back to provider",
-                    iteration + 1
-                );
-                let formatted = self.tool_dispatcher.format_results(&results);
-                self.history.push(formatted);
+            // Capture everything the engine seams need as locals/clones *before*
+            // the observer takes `&mut self`, so the borrow checker is happy:
+            // the tool source + parser + checkpoint hold clones disjoint from
+            // the `Agent`, and the observer alone borrows it mutably.
+            let dispatcher = self.tool_dispatcher.clone();
+            let provider = self.provider.clone();
+            let provider_name = self.event_channel().to_string();
+            let temperature = self.temperature;
+            let max_iterations = self.config.max_tool_iterations;
+            let multimodal = crate::openhuman::config::MultimodalConfig::default();
+            let mut tool_source = AgentToolSource {
+                tools: self.tools.clone(),
+                visible_tool_names: self.visible_tool_names.clone(),
+                tool_policy_session: self.tool_policy_session.clone(),
+                tool_policy: self.tool_policy.clone(),
+                payload_summarizer: self.payload_summarizer.clone(),
+                event_session_id: self.event_session_id().to_string(),
+                event_channel: self.event_channel().to_string(),
+                agent_definition_id: self.agent_definition_id.clone(),
+                prefer_markdown: self.context.prefer_markdown_tool_output(),
+                budget_bytes: self.context.tool_result_budget_bytes(),
+                should_send_specs: self.tool_dispatcher.should_send_tool_specs(),
+                advertised_specs: self.visible_tool_specs.as_ref().clone(),
+                records: Vec::new(),
+            };
+            let progress = super::super::engine::TurnProgress::new(self.on_progress.clone());
+            let parser = super::super::engine::DispatcherParser {
+                dispatcher: dispatcher.as_ref(),
+            };
+            let checkpoint = AgentCheckpoint {
+                provider: self.provider.clone(),
+                dispatcher: self.tool_dispatcher.clone(),
+                model: effective_model.clone(),
+                temperature,
+                on_progress: self.on_progress.clone(),
+                user_message: user_message.to_string(),
+                max_iterations,
+            };
+            let cached_prefix = self.cached_transcript_messages.take();
+            let mut observer = AgentObserver {
+                agent: self,
+                effective_model: effective_model.clone(),
+                cumulative_input: 0,
+                cumulative_output: 0,
+                cumulative_cached: 0,
+                cumulative_charged: 0.0,
+                last_turn_usage: None,
+                cached_prefix,
+                pending_results: Vec::new(),
+                did_push_final: false,
+            };
+            let mut buf: Vec<ChatMessage> = Vec::new();
+
+            let outcome = super::super::engine::run_turn_engine(
+                provider.as_ref(),
+                &mut buf,
+                &mut tool_source,
+                &progress,
+                &mut observer,
+                &checkpoint,
+                &parser,
+                &provider_name,
+                &effective_model,
+                temperature,
+                true, // silent — the channel/UI renders via progress + the return value
+                &multimodal,
+                max_iterations,
+                None, // the web bridge streams via on_progress deltas, not on_delta
+            )
+            .await?;
+
+            // Pull the observer's accounting out, then drop it to release the
+            // `&mut self` borrow so the epilogue can use `self`.
+            let did_push_final = observer.did_push_final;
+            let cumulative_input = observer.cumulative_input;
+            let cumulative_output = observer.cumulative_output;
+            let cumulative_cached = observer.cumulative_cached;
+            let cumulative_charged = observer.cumulative_charged;
+            let last_turn_usage = observer.last_turn_usage.take();
+            drop(observer);
+            let records = std::mem::take(&mut tool_source.records);
+
+            self.context.record_tool_calls(records.len());
+
+            // For a clean final response the observer already pushed the
+            // assistant message + persisted. For a max-iteration checkpoint or
+            // circuit-breaker halt the engine returned the text without pushing
+            // it, so finish the history + transcript here (mirrors the old
+            // final/max-iter branches).
+            if !did_push_final {
+                self.history
+                    .push(ConversationMessage::Chat(ChatMessage::assistant(
+                        outcome.text.clone(),
+                    )));
                 self.trim_history();
-                // Flush the transcript again now that tool results have
-                // been appended — the pre-tool persist above only
-                // captured the assistant's tool-call intents. A crash
-                // or early-exit between iterations would otherwise lose
-                // the tool output from the on-disk session record.
-                let post_tool_messages = self.tool_dispatcher.to_provider_messages(&self.history);
+                // Note: the engine already emits `TurnCompleted` on the
+                // checkpoint exit (and every other terminal path), so we don't
+                // re-emit it here — doing so would double-fire for the UI.
+                let messages = self.tool_dispatcher.to_provider_messages(&self.history);
                 self.persist_session_transcript(
-                    &post_tool_messages,
-                    cumulative_input_tokens,
-                    cumulative_output_tokens,
-                    cumulative_cached_input_tokens,
-                    cumulative_charged_usd,
+                    &messages,
+                    cumulative_input,
+                    cumulative_output,
+                    cumulative_cached,
+                    cumulative_charged,
                     last_turn_usage.as_ref(),
                 );
-                last_provider_messages = Some(post_tool_messages);
-                log::info!(
-                    "[agent_loop] iteration end i={} history_len={}",
-                    iteration + 1,
-                    self.history.len()
-                );
             }
 
-            // Tool-call iteration cap reached. Instead of aborting the turn
-            // — which left the persisted transcript on an unterminated tool
-            // cycle and silently wedged the thread on the next message
-            // (bug-report-2026-05-26 A1) — emit a *resumable checkpoint*:
-            // ask the model (tools disabled) to summarize what it did and
-            // what comes next, persist that as the final assistant message,
-            // and return it. The full tool-call history stays in the
-            // transcript, so the user's next message naturally resumes the
-            // task — no heuristic "continue" detection needed.
-            log::warn!(
-                "[agent_loop] reached max tool iterations max={} — emitting resumable checkpoint instead of aborting",
-                self.config.max_tool_iterations
-            );
-
-            let base_messages = last_provider_messages
-                .clone()
-                .unwrap_or_else(|| self.tool_dispatcher.to_provider_messages(&self.history));
-            // Summarize ONLY this turn's work: feed the compiled tool-call
-            // digest (no system prompt, no prior conversation), not the full
-            // conversation. `base_messages` above is still used for the
-            // transcript persist below, so the saved transcript is unchanged
-            // (bug-report-2026-05-26 A1). `user_message` below is the
-            // `turn(&mut self, message: &str)` parameter (the turn's request).
-            let turn_summary_input = vec![ChatMessage::user(format!(
-                "You were working on this user request:\n{user_message}\n\nHere are the tool calls you made this turn and their results — compile your checkpoint from these:\n{}",
-                if turn_tool_digest.is_empty() {
-                    "(no tool calls recorded)"
-                } else {
-                    turn_tool_digest.as_str()
-                }
-            ))];
-            let checkpoint_iteration = (self.config.max_tool_iterations + 1) as u32;
-            let (mut checkpoint, checkpoint_usage) = self
-                .summarize_iteration_checkpoint(
-                    &turn_summary_input,
-                    &effective_model,
-                    checkpoint_iteration,
-                )
-                .await;
-
-            // Fold the checkpoint call's usage into the turn's cumulative
-            // accounting. The provider call happens regardless of whether we
-            // keep its prose, so dropping its tokens would undercount the
-            // turn and mis-attribute the prior iteration's usage to the
-            // checkpoint message (mirrors the normal final-response path).
-            if let Some(ref usage) = checkpoint_usage {
-                self.context.record_usage(usage);
-                crate::openhuman::cost::record_provider_usage(&effective_model, usage);
-                cumulative_input_tokens += usage.input_tokens;
-                cumulative_output_tokens += usage.output_tokens;
-                cumulative_cached_input_tokens += usage.cached_input_tokens;
-                cumulative_charged_usd += usage.charged_amount_usd;
-                last_turn_usage = Some(transcript::TurnUsage {
-                    model: effective_model.clone(),
-                    usage: transcript::MessageUsage {
-                        input: usage.input_tokens,
-                        output: usage.output_tokens,
-                        cached_input: usage.cached_input_tokens,
-                        cost_usd: usage.charged_amount_usd,
-                    },
-                    ts: chrono::Utc::now().to_rfc3339(),
-                });
-            } else {
-                // No usage on the checkpoint call: don't attribute a stale
-                // prior-iteration snapshot to the checkpoint assistant message.
-                last_turn_usage = None;
-            }
-
-            if checkpoint.trim().is_empty() {
-                log::warn!("[agent_loop] checkpoint summary empty — using deterministic fallback");
-                checkpoint = build_deterministic_checkpoint(
-                    &all_tool_records,
-                    self.config.max_tool_iterations,
-                );
+            // Auto-save a short memory of the final reply (not on a capped turn,
+            // matching the prior behavior).
+            if self.auto_save && !outcome.hit_cap {
+                let summary = truncate_with_ellipsis(&outcome.text, 100);
+                let _ = self
+                    .memory
+                    .store("", "assistant_resp", &summary, MemoryCategory::Daily, None)
+                    .await;
             }
-            log::info!(
-                "[agent_loop] max-iter checkpoint emitted chars={}",
-                checkpoint.chars().count()
-            );
-
-            self.emit_progress(AgentProgress::TurnCompleted {
-                iterations: self.config.max_tool_iterations as u32,
-            })
-            .await;
-
-            self.history
-                .push(ConversationMessage::Chat(ChatMessage::assistant(
-                    checkpoint.clone(),
-                )));
-            self.trim_history();
-
-            // Persist the checkpoint so the transcript ends on a
-            // well-formed assistant message (never a dangling tool cycle).
-            // Note: `base_messages` ends before the final (capped) iteration's
-            // tool results — those landed after the last `last_provider_messages`
-            // snapshot — so the persisted transcript omits them. That's fine:
-            // the checkpoint prose covers the work done, and the transcript
-            // stays structurally correct (ends on an assistant message).
-            let mut checkpoint_messages = base_messages;
-            checkpoint_messages.push(ChatMessage::assistant(checkpoint.clone()));
-            self.persist_session_transcript(
-                &checkpoint_messages,
-                cumulative_input_tokens,
-                cumulative_output_tokens,
-                cumulative_cached_input_tokens,
-                cumulative_charged_usd,
-                last_turn_usage.as_ref(),
-            );
-
-            self.context.record_tool_calls(all_tool_records.len());
 
-            // Fire post-turn hooks with the checkpoint as the assistant
-            // response (mirrors the normal final-response path).
+            // Fire post-turn hooks (non-blocking).
             if !self.post_turn_hooks.is_empty() {
                 let ctx = TurnContext {
                     user_message: user_message.to_string(),
-                    assistant_response: checkpoint.clone(),
-                    tool_calls: all_tool_records,
+                    assistant_response: outcome.text.clone(),
+                    tool_calls: records,
                     turn_duration_ms: turn_started.elapsed().as_millis() as u64,
                     session_id: Some(self.event_session_id.clone())
                         .filter(|session_id| !session_id.trim().is_empty()),
@@ -1164,12 +578,12 @@ impl Agent {
                         .filter(|agent_id| !agent_id.trim().is_empty()),
                     entrypoint: Some(self.event_channel.clone())
                         .filter(|entrypoint| !entrypoint.trim().is_empty()),
-                    iteration_count: self.config.max_tool_iterations,
+                    iteration_count: outcome.iterations as usize,
                 };
                 hooks::fire_hooks(&self.post_turn_hooks, ctx);
             }
 
-            Ok(checkpoint)
+            Ok(outcome.text)
         }; // end of `turn_body` async block
 
         // Run the turn body inside the parent-execution-context scope so
@@ -1283,282 +697,25 @@ impl Agent {
         call: &ParsedToolCall,
         iteration: usize,
     ) -> (ToolExecutionResult, ToolCallRecord) {
-        let started = std::time::Instant::now();
-        publish_global(DomainEvent::ToolExecutionStarted {
-            tool_name: call.name.clone(),
-            session_id: self.event_session_id().to_string(),
-        });
-        // Synthesise a fallback id for prompt-guided (non-native) tool
-        // calls so downstream consumers always have a stable key to
-        // reconcile tool_call / tool_args_delta / tool_result rows by.
-        // A random uuid guarantees uniqueness even when the same tool
-        // name appears multiple times in the same iteration's parsed
-        // calls.
-        let call_id = call.tool_call_id.clone().unwrap_or_else(|| {
-            format!(
-                "turn-{iteration}-{}-{}",
-                call.name,
-                uuid::Uuid::new_v4().simple()
-            )
-        });
-        self.emit_progress(AgentProgress::ToolCallStarted {
-            call_id: call_id.clone(),
-            tool_name: call.name.clone(),
-            arguments: call.arguments.clone(),
-            iteration: (iteration + 1) as u32,
-        })
-        .await;
-        log::info!("[agent] executing tool: {}", call.name);
-        log::info!("[agent_loop] tool start name={}", call.name);
-
-        let (raw_result, success) = if !self.visible_tool_names.is_empty()
-            && !self.visible_tool_names.contains(&call.name)
-        {
-            log::warn!(
-                "[agent] blocked tool call '{}' — not in visible tool set",
-                call.name
-            );
-            (
-                format!("Tool '{}' is not available to this agent", call.name),
-                false,
-            )
-        } else if let Some(tool) = self.tools.iter().find(|t| t.name() == call.name) {
-            let session_decision = self.tool_policy_session.decision_for(&call.name);
-            if session_decision.is_denied() {
-                let required = session_decision
-                    .required_permission
-                    .map(|permission| permission.to_string())
-                    .unwrap_or_else(|| "unknown".to_string());
-                (
-                    format!(
-                        "Tool '{}' blocked by tool policy: requires {}, channel '{}' allows {}",
-                        call.name,
-                        required,
-                        self.event_channel,
-                        session_decision.allowed_permission
-                    ),
-                    false,
-                )
-            } else {
-                // Per-call args-aware permission check: tools that expose
-                // multi-level actions (e.g. schedule list vs schedule create)
-                // set a low static permission_level() so the tool is visible
-                // on read-capable channels, but declare the true per-action
-                // level via permission_level_with_args.
-                let call_required = tool.permission_level_with_args(&call.arguments);
-                if call_required > session_decision.allowed_permission {
-                    tracing::debug!(
-                        tool = call.name.as_str(),
-                        call_required = %call_required,
-                        allowed = %session_decision.allowed_permission,
-                        "[agent_loop] tool action blocked by per-call permission check"
-                    );
-                    (
-                        format!(
-                            "Tool '{}' action requires {} permission, channel '{}' allows {}",
-                            call.name,
-                            call_required,
-                            self.event_channel,
-                            session_decision.allowed_permission
-                        ),
-                        false,
-                    )
-                } else {
-                    let context = ToolCallContext::session(
-                        self.event_session_id(),
-                        self.event_channel(),
-                        self.agent_definition_id.to_string(),
-                        call_id.clone(),
-                        (iteration + 1) as u32,
-                    );
-                    let mut policy_request =
-                        ToolPolicyRequest::new(call.name.clone(), call.arguments.clone(), context);
-                    if let Some(generated_context) = tool.generated_runtime_context(&call.arguments)
-                    {
-                        policy_request =
-                            policy_request.with_generated_tool_context(generated_context);
-                    }
-                    let policy_decision = self.tool_policy.check(&policy_request).await;
-                    if let Some(reason) = policy_decision.blocking_reason() {
-                        let blocked_action = match &policy_decision {
-                            ToolPolicyDecision::RequireApproval { .. } => "requires approval",
-                            ToolPolicyDecision::Deny { .. } => "denied",
-                            ToolPolicyDecision::Allow => "allowed",
-                        };
-                        crate::openhuman::tool_registry::denials::record(
-                            call.name.as_str(),
-                            self.tool_policy.name(),
-                            blocked_action,
-                            reason,
-                        );
-                        tracing::debug!(
-                            tool = call.name.as_str(),
-                            policy = self.tool_policy.name(),
-                            action = blocked_action,
-                            reason = %reason,
-                            "[agent_loop] tool blocked by policy"
-                        );
-                        (
-                            format!(
-                                "Tool '{}' {blocked_action} by policy '{}': {reason}",
-                                call.name,
-                                self.tool_policy.name()
-                            ),
-                            false,
-                        )
-                    } else {
-                        // Per-call options: ask the tool for markdown output when the
-                        // context manager is configured to prefer it. Tools that
-                        // implement `execute_with_options` will populate
-                        // `markdown_formatted`; others fall through to the default
-                        // implementation which forwards to `execute`.
-                        let prefer_markdown = self.context.prefer_markdown_tool_output();
-                        let options = ToolCallOptions { prefer_markdown };
-                        let outcome = tool
-                            .execute_with_options(call.arguments.clone(), options)
-                            .await;
-                        match outcome {
-                            Ok(r) => {
-                                if !r.is_error {
-                                    let mut output = r.output_for_llm(prefer_markdown);
-                                    if prefer_markdown && r.markdown_formatted.is_some() {
-                                        log::debug!(
-                                        "[agent_loop] tool={} returned markdown payload bytes={}",
-                                        call.name,
-                                        output.len()
-                                    );
-                                    }
-                                    // Issue #574 — if a payload summarizer is wired
-                                    // in (orchestrator session only) and the output
-                                    // exceeds the configured threshold, hand it to
-                                    // the summarizer sub-agent before it enters
-                                    // history. On any failure or below-threshold
-                                    // payload, leave `output` untouched and let the
-                                    // existing tool_result_budget_bytes truncation
-                                    // pipeline handle it downstream.
-                                    if let Some(ps) = self.payload_summarizer.as_ref() {
-                                        log::debug!(
-                                    "[agent_loop] payload_summarizer intercepting tool={} bytes={}",
-                                    call.name,
-                                    output.len()
-                                );
-                                        match ps.maybe_summarize(&call.name, None, &output).await {
-                                            Ok(Some(payload)) => {
-                                                log::info!(
-                                            "[agent_loop] payload_summarizer compressed tool={} {}->{} bytes",
-                                            call.name,
-                                            payload.original_bytes,
-                                            payload.summary_bytes
-                                        );
-                                                output = payload.summary;
-                                            }
-                                            Ok(None) => {
-                                                log::debug!(
-                                            "[agent_loop] payload_summarizer pass-through tool={} bytes={}",
-                                            call.name,
-                                            output.len()
-                                        );
-                                            }
-                                            Err(e) => {
-                                                log::warn!(
-                                            "[agent_loop] payload_summarizer error tool={} err={} (passing raw payload through)",
-                                            call.name,
-                                            e
-                                        );
-                                            }
-                                        }
-                                    }
-                                    (output, true)
-                                } else {
-                                    (
-                                        format!("Error: {}", r.output_for_llm(prefer_markdown)),
-                                        false,
-                                    )
-                                }
-                            }
-                            Err(e) => (format!("Error executing {}: {e}", call.name), false),
-                        }
-                    }
-                } // end else { // per-call permission ok
-            }
-        } else {
-            (format!("Unknown tool: {}", call.name), false)
+        // The per-call execution path lives in the shared
+        // [`super::agent_tool_exec::run_agent_tool_call`] so `Agent::turn`
+        // (when migrated to the turn engine, via `AgentToolSource`) and any
+        // direct caller run the identical logic. Progress is emitted through a
+        // `TurnProgress` over this agent's sink.
+        let progress = super::super::engine::TurnProgress::new(self.on_progress.clone());
+        let ctx = super::agent_tool_exec::AgentToolExecCtx {
+            tools: &self.tools,
+            visible_tool_names: &self.visible_tool_names,
+            tool_policy_session: &self.tool_policy_session,
+            tool_policy: self.tool_policy.as_ref(),
+            payload_summarizer: self.payload_summarizer.as_deref(),
+            event_session_id: self.event_session_id(),
+            event_channel: self.event_channel(),
+            agent_definition_id: &self.agent_definition_id,
+            prefer_markdown: self.context.prefer_markdown_tool_output(),
+            budget_bytes: self.context.tool_result_budget_bytes(),
         };
-
-        // Context pipeline stage 1: apply the per-result byte budget
-        // *inline* before the result enters history. This is the only
-        // cache-safe reduction stage — the truncated body has never
-        // been sent to the backend so it creates no cache invalidation.
-        // Source the budget from the context manager so it tracks the
-        // resolved `context.tool_result_budget_bytes` (including any
-        // env/config overrides) rather than the deprecated
-        // `agent.tool_result_budget_bytes` field.
-        let budget_bytes = self.context.tool_result_budget_bytes();
-        let (result, budget_outcome) =
-            crate::openhuman::context::apply_tool_result_budget(raw_result, budget_bytes);
-        if budget_outcome.truncated {
-            log::info!(
-                "[agent_loop] tool_result_budget applied name={} original_bytes={} final_bytes={} dropped_bytes={}",
-                call.name,
-                budget_outcome.original_bytes,
-                budget_outcome.final_bytes,
-                budget_outcome.original_bytes - budget_outcome.final_bytes
-            );
-        }
-
-        let elapsed_ms = started.elapsed().as_millis() as u64;
-        publish_global(DomainEvent::ToolExecutionCompleted {
-            tool_name: call.name.clone(),
-            session_id: self.event_session_id().to_string(),
-            success,
-            elapsed_ms,
-        });
-        self.emit_progress(AgentProgress::ToolCallCompleted {
-            call_id: call_id.clone(),
-            tool_name: call.name.clone(),
-            success,
-            output_chars: result.chars().count(),
-            elapsed_ms,
-            iteration: (iteration + 1) as u32,
-        })
-        .await;
-        log::info!(
-            "[agent] tool completed: {} success={} elapsed_ms={}",
-            call.name,
-            success,
-            elapsed_ms
-        );
-        log::debug!(
-            "[agent] tool output for {}: {}",
-            call.name,
-            truncate_with_ellipsis(&result, 500)
-        );
-        log::info!(
-            "[agent_loop] tool finish name={} elapsed_ms={} output_chars={} success={}",
-            call.name,
-            elapsed_ms,
-            result.chars().count(),
-            success
-        );
-
-        let output_summary = hooks::sanitize_tool_output(&result, &call.name, success);
-
-        let record = ToolCallRecord {
-            name: call.name.clone(),
-            arguments: call.arguments.clone(),
-            success,
-            output_summary,
-            duration_ms: elapsed_ms,
-        };
-
-        let exec_result = ToolExecutionResult {
-            name: call.name.clone(),
-            output: result,
-            success,
-            tool_call_id: call.tool_call_id.clone(),
-        };
-
-        (exec_result, record)
+        super::agent_tool_exec::run_agent_tool_call(&ctx, &progress, call, iteration).await
     }
 
     /// Executes multiple tool calls in sequence.
diff --git a/src/openhuman/agent/harness/session/turn_engine_adapter.rs b/src/openhuman/agent/harness/session/turn_engine_adapter.rs
new file mode 100644
index 0000000000..03dd8e67d9
--- /dev/null
+++ b/src/openhuman/agent/harness/session/turn_engine_adapter.rs
@@ -0,0 +1,449 @@
+//! Engine seams for the stateful `Agent::turn`.
+//!
+//! These adapt the `Agent` to the shared [`run_turn_engine`] so web/desktop
+//! chat runs the same loop as every other entry point, while preserving the
+//! Agent's richer state: typed `ConversationMessage` history (with structured
+//! tool calls + round-tripped `reasoning_content`), the `ContextManager`
+//! reduction chain, KV-cache transcript prefixes, transcript persistence, and
+//! the pluggable `ToolDispatcher` (incl. PFormat).
+//!
+//! * [`AgentToolSource`] owns `Arc`/value clones of the Agent's tool state
+//!   (disjoint from the `&mut Agent` the observer holds) and runs each call
+//!   through the shared [`run_agent_tool_call`], collecting `ToolCallRecord`s.
+//! * [`AgentObserver`] borrows the `Agent` mutably: it runs the context
+//!   reduction + re-materializes the engine's `ChatMessage` buffer from the
+//!   typed history each iteration, rebuilds the typed history from the engine's
+//!   per-iteration callbacks, accumulates usage, and persists the transcript.
+//! * [`AgentCheckpoint`] summarizes the turn-so-far into a resumable checkpoint
+//!   when the iteration cap is hit (mirrors `summarize_iteration_checkpoint`).
+
+use std::collections::HashSet;
+use std::sync::Arc;
+
+use anyhow::Result;
+use async_trait::async_trait;
+
+use super::agent_tool_exec::{run_agent_tool_call, AgentToolExecCtx};
+use super::transcript;
+use super::turn_checkpoint::MAX_ITER_CHECKPOINT_INSTRUCTION;
+use super::types::Agent;
+use crate::openhuman::agent::dispatcher::{
+    ParsedToolCall as DispatcherParsedToolCall, ToolDispatcher, ToolExecutionResult,
+};
+use crate::openhuman::agent::harness::engine::{
+    CheckpointOutcome, CheckpointStrategy, ProgressReporter, ToolRunResult, ToolSource,
+    TurnObserver,
+};
+use crate::openhuman::agent::harness::parse::ParsedToolCall;
+use crate::openhuman::agent::harness::payload_summarizer::PayloadSummarizer;
+use crate::openhuman::agent::hooks::ToolCallRecord;
+use crate::openhuman::agent::progress::AgentProgress;
+use crate::openhuman::agent::tool_policy::ToolPolicy;
+use crate::openhuman::agent_tool_policy::ToolPolicySession;
+use crate::openhuman::context::ReductionOutcome;
+use crate::openhuman::inference::model_context::context_window_for_model;
+use crate::openhuman::inference::provider::{
+    ChatMessage, ChatRequest, ConversationMessage, Provider, ProviderDelta, ToolCall, UsageInfo,
+};
+use crate::openhuman::tools::{Tool, ToolSpec};
+
+/// Rebuild the persisted `Vec<ToolCall>` for an assistant-with-tools history
+/// entry: prefer the provider's native calls, else synthesise from the parsed
+/// calls (mirrors `Agent::persisted_tool_calls_for_history`).
+fn persisted_tool_calls(
+    native: &[ToolCall],
+    parsed: &[ParsedToolCall],
+    results: &[ToolExecutionResult],
+    iteration: usize,
+) -> Vec<ToolCall> {
+    if !native.is_empty() {
+        return native.to_vec();
+    }
+    // Synthesise from the parsed calls, reusing the *exact* id each result was
+    // recorded under (`results[i].tool_call_id`) so the persisted assistant
+    // tool-call id matches its `ToolResults` entry — what the next provider
+    // turn (and history-fidelity tests) rely on.
+    parsed
+        .iter()
+        .enumerate()
+        .map(|(idx, c)| {
+            let id = results
+                .get(idx)
+                .and_then(|r| r.tool_call_id.clone())
+                .or_else(|| c.id.clone())
+                .unwrap_or_else(|| format!("parsed-{}-{}", iteration + 1, idx + 1));
+            ToolCall {
+                id,
+                name: c.name.clone(),
+                arguments: c.arguments.to_string(),
+            }
+        })
+        .collect()
+}
+
+/// Tool source for `Agent::turn`. Owns clones of the Agent's tool state so it
+/// doesn't borrow the `Agent` (which [`AgentObserver`] holds mutably).
+pub(super) struct AgentToolSource {
+    pub tools: Arc<Vec<Box<dyn Tool>>>,
+    pub visible_tool_names: HashSet<String>,
+    pub tool_policy_session: ToolPolicySession,
+    pub tool_policy: Arc<dyn ToolPolicy>,
+    pub payload_summarizer: Option<Arc<dyn PayloadSummarizer>>,
+    pub event_session_id: String,
+    pub event_channel: String,
+    pub agent_definition_id: String,
+    pub prefer_markdown: bool,
+    pub budget_bytes: usize,
+    pub should_send_specs: bool,
+    pub advertised_specs: Vec<ToolSpec>,
+    /// Collected per-call records, drained by the post-loop epilogue for hooks.
+    pub records: Vec<ToolCallRecord>,
+}
+
+#[async_trait]
+impl ToolSource for AgentToolSource {
+    fn request_specs(&self) -> &[ToolSpec] {
+        if self.should_send_specs {
+            &self.advertised_specs
+        } else {
+            &[]
+        }
+    }
+
+    async fn execute_call(
+        &mut self,
+        call: &ParsedToolCall,
+        iteration: usize,
+        progress: &dyn ProgressReporter,
+        _progress_call_id: &str,
+    ) -> ToolRunResult {
+        // `run_agent_tool_call` takes the dispatcher's `ParsedToolCall` shape;
+        // convert from the engine's internal one.
+        let dispatcher_call = DispatcherParsedToolCall {
+            name: call.name.clone(),
+            arguments: call.arguments.clone(),
+            tool_call_id: call.id.clone(),
+        };
+        let ctx = AgentToolExecCtx {
+            tools: &self.tools,
+            visible_tool_names: &self.visible_tool_names,
+            tool_policy_session: &self.tool_policy_session,
+            tool_policy: self.tool_policy.as_ref(),
+            payload_summarizer: self.payload_summarizer.as_deref(),
+            event_session_id: &self.event_session_id,
+            event_channel: &self.event_channel,
+            agent_definition_id: &self.agent_definition_id,
+            prefer_markdown: self.prefer_markdown,
+            budget_bytes: self.budget_bytes,
+        };
+        let (exec_result, record) =
+            run_agent_tool_call(&ctx, progress, &dispatcher_call, iteration).await;
+        self.records.push(record);
+        ToolRunResult {
+            text: exec_result.output,
+            success: exec_result.success,
+        }
+    }
+}
+
+/// Turn observer for `Agent::turn`: owns the typed-history rebuild, context
+/// management, usage accounting, and transcript persistence.
+pub(super) struct AgentObserver<'a> {
+    pub agent: &'a mut Agent,
+    pub effective_model: String,
+    pub cumulative_input: u64,
+    pub cumulative_output: u64,
+    pub cumulative_cached: u64,
+    pub cumulative_charged: f64,
+    pub last_turn_usage: Option<transcript::TurnUsage>,
+    /// Cached transcript prefix for KV-cache reuse on a resumed session,
+    /// consumed on the first iteration.
+    pub cached_prefix: Option<Vec<ChatMessage>>,
+    /// Tool results buffered during the per-call loop, flushed to typed history
+    /// via the dispatcher's `format_results` once the assistant turn lands.
+    pub pending_results: Vec<ToolExecutionResult>,
+    /// Whether the engine reported a clean final response (so the post-loop
+    /// epilogue knows not to push `outcome.text` itself).
+    pub did_push_final: bool,
+}
+
+impl AgentObserver<'_> {
+    fn persist(&mut self) {
+        let messages = self
+            .agent
+            .tool_dispatcher
+            .to_provider_messages(&self.agent.history);
+        self.agent.persist_session_transcript(
+            &messages,
+            self.cumulative_input,
+            self.cumulative_output,
+            self.cumulative_cached,
+            self.cumulative_charged,
+            self.last_turn_usage.as_ref(),
+        );
+    }
+}
+
+#[async_trait]
+impl TurnObserver for AgentObserver<'_> {
+    async fn before_dispatch(
+        &mut self,
+        buf: &mut Vec<ChatMessage>,
+        _iteration: usize,
+    ) -> Result<()> {
+        // Pre-dispatch token-budget trim on the typed history.
+        if let Some(context_window) = context_window_for_model(&self.effective_model) {
+            super::super::token_budget::trim_conversation_history_to_budget(
+                &mut self.agent.history,
+                context_window,
+            );
+        }
+        // Global context-management reduction chain.
+        let outcome = self
+            .agent
+            .context
+            .reduce_before_call(&mut self.agent.history)
+            .await?;
+        if let ReductionOutcome::Exhausted {
+            utilisation_pct,
+            reason,
+        } = &outcome
+        {
+            return Err(anyhow::anyhow!(
+                "Context window exhausted ({utilisation_pct}% full): {reason}"
+            ));
+        }
+
+        // Re-materialize the engine's ChatMessage buffer from the typed
+        // history. On the first iteration of a resumed session, splice the
+        // byte-identical cached prefix + the new user-message tail for KV-cache
+        // reuse; otherwise rebuild from scratch.
+        let messages = if let Some(mut cached) = self.cached_prefix.take() {
+            let tail = self.agent.tool_dispatcher.to_provider_messages(
+                &self.agent.history[self.agent.history.len().saturating_sub(1)..],
+            );
+            cached.extend(tail);
+            cached
+        } else {
+            self.agent
+                .tool_dispatcher
+                .to_provider_messages(&self.agent.history)
+        };
+        *buf = messages;
+        // Second-pass trim on the materialized provider messages (mirrors the
+        // legacy `Agent::turn`, which trimmed both the typed history and the
+        // built `ChatMessage` list).
+        if let Some(context_window) = context_window_for_model(&self.effective_model) {
+            super::super::token_budget::trim_chat_messages_to_budget(buf, context_window);
+        }
+        Ok(())
+    }
+
+    fn allow_empty_final(&self) -> bool {
+        false
+    }
+
+    fn record_usage(&mut self, model: &str, usage: &UsageInfo) {
+        self.agent.context.record_usage(usage);
+        crate::openhuman::cost::record_provider_usage(model, usage);
+        self.cumulative_input += usage.input_tokens;
+        self.cumulative_output += usage.output_tokens;
+        self.cumulative_cached += usage.cached_input_tokens;
+        self.cumulative_charged += usage.charged_amount_usd;
+        self.last_turn_usage = Some(transcript::TurnUsage {
+            model: model.to_string(),
+            usage: transcript::MessageUsage {
+                input: usage.input_tokens,
+                output: usage.output_tokens,
+                cached_input: usage.cached_input_tokens,
+                cost_usd: usage.charged_amount_usd,
+            },
+            ts: chrono::Utc::now().to_rfc3339(),
+        });
+    }
+
+    fn on_assistant(
+        &mut self,
+        display_text: &str,
+        _response_text: &str,
+        reasoning_content: Option<&str>,
+        native_tool_calls: &[ToolCall],
+        parsed_calls: &[ParsedToolCall],
+        iteration: usize,
+        is_final: bool,
+    ) {
+        if is_final {
+            let mut assistant_msg = ChatMessage::assistant(display_text.to_string());
+            if let Some(rc) = reasoning_content {
+                assistant_msg.extra_metadata = Some(serde_json::json!({ "reasoning_content": rc }));
+            }
+            self.agent
+                .history
+                .push(ConversationMessage::Chat(assistant_msg));
+            self.agent.trim_history();
+            self.did_push_final = true;
+            return;
+        }
+
+        // Assistant turn with tool calls. Mirror `Agent::turn` exactly: push the
+        // pre-tool narrative text (if any) as a standalone Chat message, then
+        // the structured AssistantToolCalls, then the dispatcher-formatted
+        // results buffered during the per-call loop.
+        if !display_text.is_empty() {
+            self.agent
+                .history
+                .push(ConversationMessage::Chat(ChatMessage::assistant(
+                    display_text.to_string(),
+                )));
+        }
+        let tool_calls = persisted_tool_calls(
+            native_tool_calls,
+            parsed_calls,
+            &self.pending_results,
+            iteration,
+        );
+        self.agent
+            .history
+            .push(ConversationMessage::AssistantToolCalls {
+                text: if display_text.is_empty() {
+                    None
+                } else {
+                    Some(display_text.to_string())
+                },
+                tool_calls,
+                reasoning_content: reasoning_content
+                    .map(str::trim)
+                    .filter(|s| !s.is_empty())
+                    .map(ToString::to_string),
+            });
+        let results = std::mem::take(&mut self.pending_results);
+        let formatted = self.agent.tool_dispatcher.format_results(&results);
+        self.agent.history.push(formatted);
+        self.agent.trim_history();
+    }
+
+    fn on_tool_result(
+        &mut self,
+        call_id: &str,
+        tool_name: &str,
+        result_text: &str,
+        success: bool,
+        _iteration: usize,
+    ) {
+        self.pending_results.push(ToolExecutionResult {
+            name: tool_name.to_string(),
+            output: result_text.to_string(),
+            success,
+            tool_call_id: Some(call_id.to_string()),
+        });
+    }
+
+    fn after_iteration(&mut self, _buf: &[ChatMessage], _iteration: usize) {
+        self.persist();
+    }
+}
+
+/// Max-iteration checkpoint for `Agent::turn`: summarize the turn's tool digest
+/// into a resumable checkpoint (streaming text deltas through the progress
+/// sink), with a deterministic fallback.
+pub(super) struct AgentCheckpoint {
+    pub provider: Arc<dyn Provider>,
+    pub dispatcher: Arc<dyn ToolDispatcher>,
+    pub model: String,
+    pub temperature: f64,
+    pub on_progress: Option<tokio::sync::mpsc::Sender<AgentProgress>>,
+    pub user_message: String,
+    pub max_iterations: usize,
+}
+
+#[async_trait]
+impl CheckpointStrategy for AgentCheckpoint {
+    async fn on_max_iter(&self, digest: &str, max_iterations: usize) -> Result<CheckpointOutcome> {
+        let deterministic = format!(
+            "I reached the tool-call limit for this turn ({max_iterations} steps), so I paused here.\n\n\
+             **Done so far:**\n{digest}\n\
+             **Next steps:** I'll continue from here — just reply (e.g. \"continue\") and I'll pick up \
+             where I left off."
+        );
+        let mut messages = vec![ChatMessage::user(format!(
+            "You were working on this user request:\n{}\n\nHere are the tool calls you made this turn \
+             and their results — compile your checkpoint from these:\n{}",
+            self.user_message, digest
+        ))];
+        messages.push(ChatMessage::user(MAX_ITER_CHECKPOINT_INSTRUCTION));
+
+        let checkpoint_iteration = (self.max_iterations + 1) as u32;
+        // Stream the checkpoint prose as text deltas (tools disabled).
+        let (delta_tx_opt, delta_forwarder) = if self.on_progress.is_some() {
+            let (tx, mut rx) = tokio::sync::mpsc::channel::<ProviderDelta>(128);
+            let progress_tx = self.on_progress.clone();
+            let forwarder = tokio::spawn(async move {
+                while let Some(event) = rx.recv().await {
+                    let Some(ref sink) = progress_tx else {
+                        continue;
+                    };
+                    if let ProviderDelta::TextDelta { delta } = event {
+                        if sink
+                            .send(AgentProgress::TextDelta {
+                                delta,
+                                iteration: checkpoint_iteration,
+                            })
+                            .await
+                            .is_err()
+                        {
+                            break;
+                        }
+                    }
+                }
+            });
+            (Some(tx), Some(forwarder))
+        } else {
+            (None, None)
+        };
+
+        let result = self
+            .provider
+            .chat(
+                ChatRequest {
+                    messages: &messages,
+                    tools: None,
+                    stream: delta_tx_opt.as_ref(),
+                },
+                &self.model,
+                self.temperature,
+            )
+            .await;
+        drop(delta_tx_opt);
+        if let Some(handle) = delta_forwarder {
+            let _ = handle.await;
+        }
+
+        match result {
+            Ok(resp) => {
+                let usage = resp.usage.clone();
+                // Strip any stray tool-call markup; keep only prose.
+                let (text, calls) = self.dispatcher.parse_response(&resp);
+                let checkpoint = if !text.trim().is_empty() {
+                    text
+                } else if calls.is_empty() {
+                    resp.text.unwrap_or_default()
+                } else {
+                    String::new()
+                };
+                let text = if checkpoint.trim().is_empty() {
+                    deterministic
+                } else {
+                    checkpoint
+                };
+                Ok(CheckpointOutcome { text, usage })
+            }
+            Err(e) => {
+                log::warn!("[agent_loop] checkpoint summary call failed: {e:#}");
+                Ok(CheckpointOutcome {
+                    text: deterministic,
+                    usage: None,
+                })
+            }
+        }
+    }
+}
diff --git a/src/openhuman/agent/harness/session/types.rs b/src/openhuman/agent/harness/session/types.rs
index 651cee9ff0..da821f5a6f 100644
--- a/src/openhuman/agent/harness/session/types.rs
+++ b/src/openhuman/agent/harness/session/types.rs
@@ -45,7 +45,10 @@ pub struct Agent {
     pub(super) visible_tool_names: std::collections::HashSet<String>,
     pub(super) tool_policy_session: ToolPolicySession,
     pub(super) memory: Arc<dyn Memory>,
-    pub(super) tool_dispatcher: Box<dyn ToolDispatcher>,
+    // `Arc` (not `Box`) so the turn engine's parser seam can hold a cheap clone
+    // of the dispatcher without borrowing the `Agent` (which the turn observer
+    // borrows mutably) — see `engine::DispatcherParser`.
+    pub(super) tool_dispatcher: Arc<dyn ToolDispatcher>,
     pub(super) memory_loader: Box<dyn MemoryLoader>,
     pub(super) config: crate::openhuman::config::AgentConfig,
     pub(super) model_name: String,
diff --git a/src/openhuman/agent/harness/subagent_runner/ops.rs b/src/openhuman/agent/harness/subagent_runner/ops.rs
index 166b8dba38..97dd618534 100644
--- a/src/openhuman/agent/harness/subagent_runner/ops.rs
+++ b/src/openhuman/agent/harness/subagent_runner/ops.rs
@@ -27,16 +27,12 @@ use crate::openhuman::agent::harness::definition::{AgentDefinition, PromptSource
 use crate::openhuman::agent::harness::{
     current_spawn_depth, with_current_sandbox_mode, with_spawn_depth, MAX_SPAWN_DEPTH,
 };
-use crate::openhuman::agent::progress::AgentProgress;
 use crate::openhuman::context::prompt::{
     render_subagent_system_prompt, PromptContext, PromptTool, SubagentRenderOptions,
 };
-use crate::openhuman::inference::provider::{
-    ChatMessage, ChatRequest, Provider, ProviderDelta, ToolCall,
-};
+use crate::openhuman::inference::provider::{ChatMessage, ChatRequest, Provider};
 use crate::openhuman::memory_conversations::ConversationMessage;
 use crate::openhuman::tools::{Tool, ToolCategory, ToolSpec};
-use crate::openhuman::util::truncate_with_ellipsis;
 
 /// Prompt suffix injected into every typed sub-agent run.
 ///
@@ -1223,9 +1219,9 @@ async fn run_inner_loop(
     provider: &dyn Provider,
     history: &mut Vec<ChatMessage>,
     parent_tools: &[Box<dyn Tool>],
-    mut extra_tools: Vec<Box<dyn Tool>>,
+    extra_tools: Vec<Box<dyn Tool>>,
     tool_specs: &[ToolSpec],
-    mut allowed_names: HashSet<String>,
+    allowed_names: HashSet<String>,
     lazy_resolver: Option<LazyToolkitResolver>,
     model: &str,
     temperature: f64,
@@ -1238,32 +1234,18 @@ async fn run_inner_loop(
 ) -> Result<(String, usize, AggregatedUsage), SubagentRunError> {
     // An autonomous skill run (set via `with_autonomous_iter_cap`) lifts the
     // per-agent cap so sub-agents run until done / the circuit breaker trips.
-    // Take the larger of the two so a sub-agent that already wants more keeps it.
     let max_iterations = super::autonomous::autonomous_iter_cap()
         .map(|cap| cap.max(max_iterations))
         .unwrap_or(max_iterations)
         .max(1);
 
-    // Compiled digest of this sub-agent run's tool calls + results, for a
-    // graceful checkpoint if it hits the iteration cap (mirrors the main
-    // agent — bug-report-2026-05-26 A1). Accumulated as the loop runs so it's
-    // robust to history trimming.
-    let mut run_tool_digest = String::new();
-
-    // Sub-agent transcript stem — mirrors what
-    // `persist_subagent_transcript` used to compute on one-shot
-    // post-loop writes. We compute it once up front so **every
-    // iteration's** persist call resolves to the same file on disk:
-    //   `{parent_chain}__{unix_ts}_{agent_id}.jsonl`.
+    // Sub-agent transcript stem — computed once up front so every iteration's
+    // persist resolves to the same file: `{parent_chain}__{unix_ts}_{agent_id}`.
     let child_session_key = {
         let now = std::time::SystemTime::now()
             .duration_since(std::time::UNIX_EPOCH)
             .unwrap_or_default();
         let unix_ts = now.as_secs();
-        // Nanos component + task_id suffix disambiguate sibling sub-agents
-        // spawned within the same wall-clock second (tests and fan-out
-        // flows routinely do this, and a shared stem would overwrite the
-        // earlier sibling's transcript file).
         let nanos = now.subsec_nanos();
         let sanitized: String = agent_id
             .chars()
@@ -1294,47 +1276,15 @@ async fn run_inner_loop(
         format!("{parent_chain}__{child_session_key}")
     };
 
-    // ── Text-mode override for integrations_agent ────────────────────────────
-    //
-    // Large Composio toolkits (Notion, Salesforce, HubSpot, GitHub) ship
-    // per-action JSON schemas that are extraordinarily dense — deeply
-    // nested object/block types, recursive refs, huge discriminated
-    // unions. Fireworks-style providers (which the backend forwards to)
-    // auto-compile every entry in `tools: [...]` into a grammar and
-    // index rules with a `uint16_t` — max 65 535 rules. Even with the
-    // upstream fuzzy filter narrowing Notion 48 → 16, a single request
-    // generates 100 000+ rules and the provider rejects it with 400
-    // before generation starts.
-    //
-    // The fuzzy filter can't fix this because the bound is per-action,
-    // not per-toolkit: one Notion schema alone can produce thousands of
-    // rules. The only client-side lever is to **not send `tools: [...]`
-    // at all** — the backend has nothing to compile, so no grammar, so
-    // no ceiling. We then describe the tools in the system prompt as
-    // prose (XmlToolDispatcher format) and parse `<tool_call>` tags out
-    // of the model's free-form response text.
-    //
-    // Scoped to `integrations_agent` because that's the only path where we
-    // pass Composio toolkit schemas. Every other typed sub-agent
-    // (welcome, researcher, summarizer, …) uses small built-in tool
-    // sets that stay well under the grammar ceiling and benefit from
-    // native mode's stricter formatting guarantees.
+    // ── Text-mode override for integrations_agent ──
+    // Large Composio toolkits compile into provider grammars that blow the
+    // 65 535-rule ceiling, so for `integrations_agent` we omit `tools: [...]`
+    // and describe them in the system prompt as prose, parsing `<tool_call>`
+    // tags out of the model's response. Forcing `request_specs() == &[]` makes
+    // the engine skip native tools and fall back to its XML parse + batched
+    // `[Tool results]` path — exactly what text mode needs.
     let force_text_mode = agent_id == "integrations_agent" && !tool_specs.is_empty();
-
-    let supports_native =
-        !force_text_mode && provider.supports_native_tools() && !tool_specs.is_empty();
-    let request_tools = if supports_native {
-        Some(tool_specs)
-    } else {
-        None
-    };
-
     if force_text_mode {
-        // Append the XML tool protocol + available-tool list to the
-        // existing system prompt. `history[0]` is the system message
-        // built by `run_typed_mode` upstream; we
-        // augment it in-place so the model learns the call format for
-        // this session without an extra message round-trip.
         if let Some(sys) = history.iter_mut().find(|m| m.role == "system") {
             sys.content.push_str("\n\n");
             sys.content
@@ -1348,24 +1298,282 @@ async fn run_inner_loop(
         );
     }
 
-    let mut usage = AggregatedUsage::default();
+    let advertised_specs: Vec<ToolSpec> = if force_text_mode {
+        Vec::new()
+    } else {
+        tool_specs.to_vec()
+    };
 
-    // Per-iteration transcript persistence. Mirrors the main-agent
-    // turn loop: right after each provider response lands (and again
-    // after the final response is pushed) we flush the full history
-    // to disk. A crash during tool execution no longer erases the
-    // sub-agent's response — the bytes are on disk before any tool
-    // runs. Best-effort: write failures are logged at `debug` and the
-    // loop continues.
-    let persist_transcript = |history: &[ChatMessage], usage: &AggregatedUsage| {
+    let mut tool_source = SubagentToolSource {
+        parent_tools,
+        extra_tools,
+        allowed_names,
+        lazy_resolver,
+        advertised_specs,
+        handoff_cache,
+        policy: crate::openhuman::tools::policy::DefaultToolPolicy,
+        agent_id: agent_id.to_string(),
+    };
+    let mut observer = SubagentObserver {
+        worker_thread_id,
+        workspace_dir: parent.workspace_dir.clone(),
+        transcript_stem,
+        agent_id: agent_id.to_string(),
+        task_id: task_id.to_string(),
+        force_text_mode,
+        usage: AggregatedUsage::default(),
+    };
+    let checkpoint = SubagentCheckpoint {
+        provider,
+        model: model.to_string(),
+        temperature,
+        agent_id: agent_id.to_string(),
+    };
+    let progress = super::super::engine::SubagentProgress {
+        sink: parent.on_progress.clone(),
+        agent_id: agent_id.to_string(),
+        task_id: task_id.to_string(),
+    };
+
+    let parser = super::super::engine::DefaultParser;
+    let outcome = super::super::engine::run_turn_engine(
+        provider,
+        history,
+        &mut tool_source,
+        &progress,
+        &mut observer,
+        &checkpoint,
+        &parser,
+        "subagent",
+        model,
+        temperature,
+        true, // silent — sub-agents never echo to stdout
+        &crate::openhuman::config::MultimodalConfig::default(),
+        max_iterations,
+        None, // sub-agents don't stream a draft
+    )
+    .await?;
+
+    Ok((outcome.text, outcome.iterations as usize, observer.usage))
+}
+
+/// Apply the progressive-disclosure handoff to a tool result. If a cache is
+/// present and the (cleaned) result is large and not an error / not from the
+/// extractor tool, stash the raw payload and substitute a short placeholder the
+/// sub-agent can drill into with `extract_from_result`. Errors and
+/// already-extracted output pass through unchanged.
+fn apply_handoff(
+    cache: &ResultHandoffCache,
+    tool_name: &str,
+    task_id: &str,
+    agent_id: &str,
+    result_text: String,
+) -> String {
+    let skip_cleaning = tool_name == "extract_from_result" || result_text.starts_with("Error");
+    let cleaned = if skip_cleaning {
+        result_text
+    } else {
+        let pre_len = result_text.len();
+        let cleaned = clean_tool_output(&result_text);
+        if cleaned.len() < pre_len {
+            tracing::debug!(
+                tool = %tool_name,
+                before_bytes = pre_len,
+                after_bytes = cleaned.len(),
+                saved_pct = ((pre_len - cleaned.len()) * 100) / pre_len.max(1),
+                "[subagent_runner:handoff] cleaned tool output (stripped markup/data-uris/whitespace)"
+            );
+        }
+        cleaned
+    };
+    let tokens = cleaned.len().div_ceil(4);
+    if !skip_cleaning && tokens > HANDOFF_OVERSIZE_THRESHOLD_TOKENS {
+        let id = cache.store(tool_name.to_string(), cleaned.clone());
+        let placeholder = build_handoff_placeholder(tool_name, &id, &cleaned);
+        tracing::info!(
+            task_id = %task_id,
+            agent_id = %agent_id,
+            tool = %tool_name,
+            raw_tokens = tokens,
+            raw_bytes = cleaned.len(),
+            threshold_tokens = HANDOFF_OVERSIZE_THRESHOLD_TOKENS,
+            result_id = %id,
+            "[subagent_runner:handoff] stashed oversized tool output; substituted placeholder into history"
+        );
+        placeholder
+    } else {
+        cleaned
+    }
+}
+
+/// Sub-agent [`ToolSource`]: looks up tools in `extra_tools` then the parent
+/// registry, lazily registers toolkit actions the fuzzy filter omitted, rejects
+/// names outside the allowlist, and routes execution through the shared
+/// [`run_one_tool`] (so sub-agents now get the same approval gate, audit,
+/// credential scrub, tokenjuice and timeout as the channel loop), then applies
+/// the progressive-disclosure handoff.
+struct SubagentToolSource<'a> {
+    parent_tools: &'a [Box<dyn Tool>],
+    extra_tools: Vec<Box<dyn Tool>>,
+    allowed_names: HashSet<String>,
+    lazy_resolver: Option<LazyToolkitResolver>,
+    advertised_specs: Vec<ToolSpec>,
+    handoff_cache: Option<&'a ResultHandoffCache>,
+    policy: crate::openhuman::tools::policy::DefaultToolPolicy,
+    agent_id: String,
+}
+
+#[async_trait::async_trait]
+impl super::super::engine::ToolSource for SubagentToolSource<'_> {
+    fn request_specs(&self) -> &[ToolSpec] {
+        &self.advertised_specs
+    }
+
+    async fn execute_call(
+        &mut self,
+        call: &super::super::parse::ParsedToolCall,
+        iteration: usize,
+        progress: &dyn super::super::engine::ProgressReporter,
+        progress_call_id: &str,
+    ) -> super::super::engine::ToolRunResult {
+        // Lazy registration: a call for an unknown tool that matches a real
+        // action slug in the bound toolkit gets built on the spot and admitted
+        // to the allowlist. The fuzzy top-K filter keeps schemas out of the
+        // prompt, not out of execution.
+        if !self.allowed_names.contains(&call.name) {
+            if let Some(resolver) = self.lazy_resolver.as_ref() {
+                if let Some(tool) = resolver.resolve(&call.name) {
+                    tracing::info!(
+                        agent_id = %self.agent_id,
+                        tool = %call.name,
+                        "[subagent_runner] lazily registered toolkit action outside fuzzy top-K"
+                    );
+                    self.allowed_names.insert(tool.name().to_string());
+                    self.extra_tools.push(tool);
+                }
+            }
+        }
+
+        if !self.allowed_names.contains(&call.name) {
+            tracing::warn!(
+                agent_id = %self.agent_id,
+                tool = %call.name,
+                "[subagent_runner] tool not in allowlist for this sub-agent"
+            );
+            let iteration_u32 = (iteration + 1) as u32;
+            progress
+                .tool_started(progress_call_id, &call.name, &call.arguments, iteration_u32)
+                .await;
+            let mut available: Vec<&str> = self.allowed_names.iter().map(|s| s.as_str()).collect();
+            if let Some(resolver) = self.lazy_resolver.as_ref() {
+                available.extend(resolver.known_slugs());
+            }
+            available.sort_unstable();
+            available.dedup();
+            let text = format!(
+                "Error: tool '{}' is not available to the {} sub-agent. Available tools: {}",
+                call.name,
+                self.agent_id,
+                available.join(", ")
+            );
+            progress
+                .tool_completed(
+                    progress_call_id,
+                    &call.name,
+                    false,
+                    text.chars().count(),
+                    0,
+                    iteration_u32,
+                )
+                .await;
+            return super::super::engine::ToolRunResult {
+                text,
+                success: false,
+            };
+        }
+
+        let tool_opt: Option<&dyn Tool> = self
+            .extra_tools
+            .iter()
+            .find(|t| t.name() == call.name)
+            .or_else(|| self.parent_tools.iter().find(|t| t.name() == call.name))
+            .map(|b| b.as_ref());
+        let outcome = super::super::engine::run_one_tool(
+            tool_opt,
+            call,
+            iteration,
+            progress,
+            &self.policy,
+            None,
+            progress_call_id,
+        )
+        .await;
+
+        let text = match self.handoff_cache {
+            Some(cache) => apply_handoff(cache, &call.name, "", &self.agent_id, outcome.text),
+            None => outcome.text,
+        };
+        super::super::engine::ToolRunResult {
+            text,
+            success: outcome.success,
+        }
+    }
+}
+
+/// Sub-agent [`TurnObserver`]: accumulates usage, persists the per-iteration
+/// transcript, and mirrors assistant intents / tool results / final responses
+/// to the spawn's worker thread (when one is attached).
+struct SubagentObserver {
+    worker_thread_id: Option<String>,
+    workspace_dir: std::path::PathBuf,
+    transcript_stem: String,
+    agent_id: String,
+    task_id: String,
+    force_text_mode: bool,
+    usage: AggregatedUsage,
+}
+
+impl SubagentObserver {
+    fn append_worker_message(
+        &self,
+        content: String,
+        sender: String,
+        extra_metadata: serde_json::Value,
+    ) {
+        let Some(ref thread_id) = self.worker_thread_id else {
+            return;
+        };
+        let message = ConversationMessage {
+            id: format!("{}:{}", sender, uuid::Uuid::new_v4()),
+            content,
+            message_type: "text".to_string(),
+            extra_metadata,
+            sender,
+            created_at: chrono::Utc::now().to_rfc3339(),
+        };
+        if let Err(err) = crate::openhuman::memory_conversations::append_message(
+            self.workspace_dir.clone(),
+            thread_id,
+            message,
+        ) {
+            tracing::debug!(
+                agent_id = %self.agent_id,
+                thread_id = %thread_id,
+                error = %err,
+                "[subagent_runner] failed to append message to worker thread"
+            );
+        }
+    }
+
+    fn persist_transcript(&self, history: &[ChatMessage]) {
         let path = match transcript::resolve_keyed_transcript_path(
-            &parent.workspace_dir,
-            &transcript_stem,
+            &self.workspace_dir,
+            &self.transcript_stem,
         ) {
             Ok(p) => p,
             Err(err) => {
                 tracing::debug!(
-                    agent_id = %agent_id,
+                    agent_id = %self.agent_id,
                     error = %err,
                     "[subagent_runner] failed to resolve transcript path"
                 );
@@ -1374,642 +1582,184 @@ async fn run_inner_loop(
         };
         let now = chrono::Utc::now().to_rfc3339();
         let meta = transcript::TranscriptMeta {
-            agent_name: agent_id.to_string(),
+            agent_name: self.agent_id.clone(),
             dispatcher: "native".into(),
             created: now.clone(),
             updated: now,
             turn_count: 1,
-            input_tokens: usage.input_tokens,
-            output_tokens: usage.output_tokens,
-            cached_input_tokens: usage.cached_input_tokens,
-            charged_amount_usd: usage.charged_amount_usd,
+            input_tokens: self.usage.input_tokens,
+            output_tokens: self.usage.output_tokens,
+            cached_input_tokens: self.usage.cached_input_tokens,
+            charged_amount_usd: self.usage.charged_amount_usd,
             thread_id: crate::openhuman::inference::provider::thread_context::current_thread_id(),
         };
         if let Err(err) = transcript::write_transcript(&path, history, &meta, None) {
             tracing::debug!(
-                agent_id = %agent_id,
+                agent_id = %self.agent_id,
                 error = %err,
                 "[subagent_runner] failed to write transcript"
             );
         }
-    };
-
-    let append_worker_message =
-        |content: String, sender: String, extra_metadata: serde_json::Value| {
-            if let Some(ref thread_id) = worker_thread_id {
-                let message = ConversationMessage {
-                    id: format!("{}:{}", sender, uuid::Uuid::new_v4()),
-                    content,
-                    message_type: "text".to_string(),
-                    extra_metadata,
-                    sender,
-                    created_at: chrono::Utc::now().to_rfc3339(),
-                };
-                if let Err(err) = crate::openhuman::memory_conversations::append_message(
-                    parent.workspace_dir.clone(),
-                    thread_id,
-                    message,
-                ) {
-                    tracing::debug!(
-                        agent_id = %agent_id,
-                        thread_id = %thread_id,
-                        error = %err,
-                        "[subagent_runner] failed to append message to worker thread"
-                    );
-                }
-            }
-        };
-
-    // Per-turn progress sink shared with the parent — `None` for runs
-    // that don't have a subscriber (CLI / triage / tests). Cloned upfront
-    // so the inner loop body doesn't repeatedly re-resolve `parent.on_progress`.
-    let progress_sink = parent.on_progress.clone();
-
-    // Repeated-failure circuit breaker (shared guard with run_tool_call_loop):
-    // halt the subagent with a root cause instead of grinding to
-    // MaxIterationsExceeded when it re-issues a doomed action or makes no
-    // progress (e.g. re-running `pip install` that keeps failing PEP 668).
-    let mut failure_guard = crate::openhuman::agent::harness::tool_loop::RepeatFailureGuard::new();
-    let mut halt_reason: Option<String> = None;
-    for iteration in 0..max_iterations {
-        tracing::debug!(
-            task_id = %task_id,
-            agent_id = %agent_id,
-            iteration,
-            history_len = history.len(),
-            "[subagent_runner] iteration start"
-        );
-
-        if let Some(ref tx) = progress_sink {
-            let _ = tx
-                .send(AgentProgress::SubagentIterationStarted {
-                    agent_id: agent_id.to_string(),
-                    task_id: task_id.to_string(),
-                    iteration: (iteration + 1) as u32,
-                    max_iterations: max_iterations as u32,
-                })
-                .await;
-        }
-
-        // Stream the child's tokens to the parent's progress sink so the
-        // UI can render the sub-agent's thinking/output live, attributed
-        // to this row via `task_id`. Mirrors the main turn loop
-        // (`session/turn.rs`): only set up the SSE sink when a listener
-        // exists, otherwise the channel buffer would back-pressure the
-        // provider and we'd lose the non-streaming HTTP fast path for
-        // providers that don't implement streaming.
-        let child_iteration_for_stream = (iteration + 1) as u32;
-        let (delta_tx_opt, delta_forwarder) = if let Some(ref sink) = progress_sink {
-            let (tx, mut rx) = tokio::sync::mpsc::channel::<ProviderDelta>(128);
-            let sink = sink.clone();
-            let agent_id_for_stream = agent_id.to_string();
-            let task_id_for_stream = task_id.to_string();
-            let forwarder = tokio::spawn(async move {
-                while let Some(event) = rx.recv().await {
-                    // Only visible text and reasoning deltas attribute to
-                    // the subagent transcript; tool-call arg fragments are
-                    // already surfaced via SubagentToolCall* lifecycle
-                    // events, so they're dropped here to avoid double-render.
-                    let mapped = match event {
-                        ProviderDelta::TextDelta { delta } => AgentProgress::SubagentTextDelta {
-                            agent_id: agent_id_for_stream.clone(),
-                            task_id: task_id_for_stream.clone(),
-                            delta,
-                            iteration: child_iteration_for_stream,
-                        },
-                        ProviderDelta::ThinkingDelta { delta } => {
-                            AgentProgress::SubagentThinkingDelta {
-                                agent_id: agent_id_for_stream.clone(),
-                                task_id: task_id_for_stream.clone(),
-                                delta,
-                                iteration: child_iteration_for_stream,
-                            }
-                        }
-                        ProviderDelta::ToolCallStart { .. }
-                        | ProviderDelta::ToolCallArgsDelta { .. } => continue,
-                    };
-                    // Await backpressure so streamed deltas arrive in order
-                    // and aren't silently dropped when the downstream
-                    // progress bridge is slow.
-                    if sink.send(mapped).await.is_err() {
-                        break;
-                    }
-                }
-            });
-            (Some(tx), Some(forwarder))
-        } else {
-            (None, None)
-        };
-
-        let chat_result = provider
-            .chat(
-                ChatRequest {
-                    messages: history.as_slice(),
-                    tools: request_tools,
-                    stream: delta_tx_opt.as_ref(),
-                },
-                model,
-                temperature,
-            )
-            .await;
-
-        // Drop the sender so the forwarder task observes channel close and
-        // terminates instead of leaking. This must run on BOTH the success
-        // and error paths — propagating the provider error with `?` before
-        // joining the forwarder would orphan the task and leak the sender.
-        drop(delta_tx_opt);
-        if let Some(forwarder) = delta_forwarder {
-            let _ = forwarder.await;
-        }
-        let resp = chat_result?;
+    }
+}
 
-        if let Some(ref u) = resp.usage {
-            usage.input_tokens += u.input_tokens;
-            usage.output_tokens += u.output_tokens;
-            usage.cached_input_tokens += u.cached_input_tokens;
-            usage.charged_amount_usd += u.charged_amount_usd;
-        }
+#[async_trait::async_trait]
+impl super::super::engine::TurnObserver for SubagentObserver {
+    fn record_usage(
+        &mut self,
+        _model: &str,
+        usage: &crate::openhuman::inference::provider::UsageInfo,
+    ) {
+        self.usage.input_tokens += usage.input_tokens;
+        self.usage.output_tokens += usage.output_tokens;
+        self.usage.cached_input_tokens += usage.cached_input_tokens;
+        self.usage.charged_amount_usd += usage.charged_amount_usd;
+    }
 
-        let response_text = resp.text.clone().unwrap_or_default();
-
-        // In text mode the model emits `<tool_call>{…}</tool_call>` tags
-        // inline inside `resp.text` (and `resp.tool_calls` is empty
-        // because we told the provider not to structure them). Parse
-        // them ourselves via the shared harness helper and synthesise a
-        // `ToolCall` per parsed block so the rest of the loop can stay
-        // uniform.
-        let native_calls: Vec<ToolCall> = if force_text_mode {
-            let (_cleaned, parsed) = super::super::parse::parse_tool_calls(&response_text);
-            parsed
-                .into_iter()
-                .enumerate()
-                .map(|(i, call)| {
-                    let args_str = if call.arguments.is_null() {
-                        "{}".to_string()
-                    } else {
-                        call.arguments.to_string()
-                    };
-                    ToolCall {
-                        id: call
-                            .id
-                            .clone()
-                            .unwrap_or_else(|| format!("call_text_{iteration}_{i}")),
-                        name: call.name,
-                        arguments: args_str,
-                    }
-                })
-                .collect()
+    fn on_assistant(
+        &mut self,
+        _display_text: &str,
+        response_text: &str,
+        _reasoning_content: Option<&str>,
+        _native_tool_calls: &[crate::openhuman::inference::provider::ToolCall],
+        parsed_calls: &[super::super::parse::ParsedToolCall],
+        iteration: usize,
+        is_final: bool,
+    ) {
+        let tool_calls = parsed_calls.len();
+        let extra = if is_final {
+            serde_json::json!({
+                "scope": "worker_thread",
+                "agent_id": self.agent_id,
+                "task_id": self.task_id,
+                "iteration": iteration + 1,
+                "final": true,
+            })
         } else {
-            resp.tool_calls.clone()
+            serde_json::json!({
+                "scope": "worker_thread",
+                "agent_id": self.agent_id,
+                "task_id": self.task_id,
+                "iteration": iteration + 1,
+                "tool_calls": tool_calls,
+            })
         };
+        self.append_worker_message(response_text.to_string(), "agent".to_string(), extra);
+    }
 
-        if native_calls.is_empty() {
-            tracing::debug!(
-                task_id = %task_id,
-                agent_id = %agent_id,
-                iteration,
-                final_chars = response_text.chars().count(),
-                "[subagent_runner] no tool calls — returning final response"
-            );
-            history.push(ChatMessage::assistant(response_text.clone()));
-            append_worker_message(
-                response_text.clone(),
-                "agent".to_string(),
-                serde_json::json!({
-                    "scope": "worker_thread",
-                    "agent_id": agent_id,
-                    "task_id": task_id,
-                    "iteration": iteration + 1,
-                    "final": true,
-                }),
-            );
-            // Persist the final response before returning so the
-            // transcript always captures the last provider reply.
-            persist_transcript(history, &usage);
-            return Ok((response_text, iteration + 1, usage));
-        }
-
-        // Persist the assistant turn. In native mode use the canonical
-        // serialiser (wraps text + structured tool_calls for the
-        // backend's jinja template). In text mode the raw response
-        // already contains the `<tool_call>` tags inline, so persist it
-        // verbatim — on the next turn the model sees its own prior
-        // emissions exactly as it wrote them.
-        if force_text_mode {
-            history.push(ChatMessage::assistant(response_text.clone()));
-        } else {
-            let assistant_history_content = super::super::parse::build_native_assistant_history(
-                &response_text,
-                resp.reasoning_content.as_deref(),
-                &native_calls,
-            );
-            history.push(ChatMessage::assistant(assistant_history_content));
+    fn on_tool_result(
+        &mut self,
+        call_id: &str,
+        tool_name: &str,
+        result_text: &str,
+        _success: bool,
+        iteration: usize,
+    ) {
+        // Native mode mirrors each tool result individually; text mode batches
+        // them in `on_results_batch` instead.
+        if self.force_text_mode {
+            return;
         }
-
-        append_worker_message(
-            response_text.clone(),
-            "agent".to_string(),
+        self.append_worker_message(
+            result_text.to_string(),
+            "user".to_string(),
             serde_json::json!({
                 "scope": "worker_thread",
-                "agent_id": agent_id,
-                "task_id": task_id,
+                "agent_id": self.agent_id,
+                "task_id": self.task_id,
                 "iteration": iteration + 1,
-                "tool_calls": native_calls.len(),
+                "tool_call_id": call_id,
+                "tool_name": tool_name,
             }),
         );
+    }
 
-        // Persist the assistant response + tool-call intents **before**
-        // executing tools. If the session crashes mid-tool-call we
-        // still have what the model emitted on disk.
-        persist_transcript(history, &usage);
-
-        // Execute each call, collect outputs. Native mode pushes one
-        // `role=tool` message per call with the structured `tool_call_id`
-        // reference. Text mode has no such reference (the model just
-        // emitted tags in prose), so we batch all results into a single
-        // user message formatted with `<tool_result>` tags — mirroring
-        // XmlToolDispatcher's `format_results`.
-        let mut text_mode_result_block = String::new();
-        for call in &native_calls {
-            let call_started = Instant::now();
-            if let Some(ref tx) = progress_sink {
-                let _ = tx
-                    .send(AgentProgress::SubagentToolCallStarted {
-                        agent_id: agent_id.to_string(),
-                        task_id: task_id.to_string(),
-                        call_id: call.id.clone(),
-                        tool_name: call.name.clone(),
-                        iteration: (iteration + 1) as u32,
-                    })
-                    .await;
-            }
-
-            // Lazy registration: if the call is for an unknown tool but
-            // matches a real action slug in the bound toolkit's full
-            // catalogue, build the [`ComposioActionTool`] on the spot and
-            // admit it to the allowlist for this and subsequent turns.
-            // The fuzzy top-K filter exists to keep schemas out of the
-            // system prompt, not to gate execution — when the model
-            // names the slug correctly we should just dispatch.
-            if !allowed_names.contains(&call.name) {
-                if let Some(resolver) = lazy_resolver.as_ref() {
-                    if let Some(tool) = resolver.resolve(&call.name) {
-                        tracing::info!(
-                            task_id = %task_id,
-                            agent_id = %agent_id,
-                            tool = %call.name,
-                            "[subagent_runner] lazily registered toolkit action outside fuzzy top-K"
-                        );
-                        allowed_names.insert(tool.name().to_string());
-                        extra_tools.push(tool);
-                    }
-                }
-            }
+    fn on_results_batch(&mut self, content: &str, iteration: usize) {
+        self.append_worker_message(
+            content.to_string(),
+            "user".to_string(),
+            serde_json::json!({
+                "scope": "worker_thread",
+                "agent_id": self.agent_id,
+                "task_id": self.task_id,
+                "iteration": iteration + 1,
+                "mode": "text",
+            }),
+        );
+    }
 
-            let result_text = if !allowed_names.contains(&call.name) {
-                tracing::warn!(
-                    task_id = %task_id,
-                    agent_id = %agent_id,
-                    tool = %call.name,
-                    "[subagent_runner] tool not in allowlist for this sub-agent"
-                );
-                let mut available: Vec<&str> = allowed_names.iter().map(|s| s.as_str()).collect();
-                if let Some(resolver) = lazy_resolver.as_ref() {
-                    available.extend(resolver.known_slugs());
-                }
-                available.sort_unstable();
-                available.dedup();
-                format!(
-                    "Error: tool '{}' is not available to the {} sub-agent. Available tools: {}",
-                    call.name,
-                    agent_id,
-                    available.join(", ")
-                )
-            } else if let Some(tool) = extra_tools
-                .iter()
-                .find(|t| t.name() == call.name)
-                .or_else(|| parent_tools.iter().find(|t| t.name() == call.name))
-            {
-                let args = parse_tool_arguments(&call.arguments);
-                let timeout = crate::openhuman::tool_timeout::tool_execution_timeout_duration();
-                // ── External-effect approval gate (#1339, #2135) ─
-                // Subagents share the same gate as the parent loop;
-                // see `tool_loop.rs` for the rationale.
-                //
-                // When the call is allowed and persisted, we keep
-                // hold of the `request_id` so we can stamp the
-                // terminal execution outcome onto the same audit
-                // row (issue #2135).
-                let mut approval_request_id: Option<String> = None;
-                let mut approval_gate_for_audit: Option<
-                    std::sync::Arc<crate::openhuman::approval::ApprovalGate>,
-                > = None;
-                let gate_denial: Option<String> = if tool.external_effect_with_args(&args) {
-                    if let Some(gate) = crate::openhuman::approval::ApprovalGate::try_global() {
-                        let summary =
-                            crate::openhuman::approval::summarize_action(&call.name, &args);
-                        let redacted = crate::openhuman::approval::redact_args(&args);
-                        let (outcome, request_id) =
-                            gate.intercept_audited(&call.name, &summary, redacted).await;
-                        match outcome {
-                            crate::openhuman::approval::GateOutcome::Allow => {
-                                approval_request_id = request_id;
-                                if approval_request_id.is_some() {
-                                    approval_gate_for_audit = Some(gate);
-                                }
-                                None
-                            }
-                            crate::openhuman::approval::GateOutcome::Deny { reason } => {
-                                tracing::warn!(
-                                    tool = call.name.as_str(),
-                                    reason = %reason,
-                                    "[subagent_runner] approval gate denied tool call"
-                                );
-                                Some(reason)
-                            }
-                        }
-                    } else {
-                        None
-                    }
-                } else {
-                    None
-                };
+    fn after_iteration(&mut self, history: &[ChatMessage], _iteration: usize) {
+        self.persist_transcript(history);
+    }
+}
 
-                if let Some(reason) = gate_denial {
-                    // Prefix as Error so the downstream `call_success`
-                    // computation (`!result_text.starts_with("Error")`)
-                    // marks the denial as a failed tool call in
-                    // progress events and tool_result blocks.
-                    // (CodeRabbit review on PR #2149.)
-                    format!("Error: {reason}")
-                } else {
-                    let (raw, exec_success) =
-                        match tokio::time::timeout(timeout, tool.execute(args)).await {
-                            Ok(Ok(result)) => {
-                                let raw = result.output();
-                                if result.is_error {
-                                    (format!("Error: {raw}"), false)
-                                } else {
-                                    (raw, true)
-                                }
-                            }
-                            Ok(Err(err)) => {
-                                (format!("Error executing {}: {err}", call.name), false)
-                            }
-                            Err(_) => (format!("Error: tool '{}' timed out", call.name), false),
-                        };
-                    // Stamp the terminal status onto the
-                    // pending_approvals audit row — best-effort,
-                    // failures don't propagate to the agent (#2135).
-                    // Success comes from the structured execute result,
-                    // not from parsing `raw.starts_with("Error")` — a
-                    // legitimate success payload can start with "Error"
-                    // (search hits, copied logs), which would otherwise
-                    // persist a false Failure (CodeRabbit review on #2367).
-                    if let (Some(gate), Some(req_id)) = (
-                        approval_gate_for_audit.as_ref(),
-                        approval_request_id.as_ref(),
-                    ) {
-                        let success = exec_success;
-                        let exec_outcome = if success {
-                            crate::openhuman::approval::ExecutionOutcome::Success
-                        } else {
-                            crate::openhuman::approval::ExecutionOutcome::Failure
-                        };
-                        let err_text = if success { None } else { Some(raw.as_str()) };
-                        gate.record_execution(req_id, exec_outcome, err_text);
-                    }
-                    raw
-                }
-            } else {
-                format!("Unknown tool: {}", call.name)
-            };
+/// Sub-agent [`CheckpointStrategy`]: when the iteration cap is hit, summarize
+/// the run-so-far into a resumable checkpoint (so the delegating agent can
+/// continue from partial progress) instead of erroring. Falls back to a
+/// deterministic digest summary if the summarization call fails or returns no
+/// prose.
+struct SubagentCheckpoint<'a> {
+    provider: &'a dyn Provider,
+    model: String,
+    temperature: f64,
+    agent_id: String,
+}
 
-            // Progressive-disclosure handoff: if this spawn has a cache
-            // (integrations_agent-with-toolkit path) and the result is large
-            // and not itself an error / not from the extractor tool,
-            // stash the raw payload and replace it in history with a
-            // short placeholder. The sub-agent can drill in with
-            // `extract_from_result(result_id=..., query=...)` on the
-            // next turn. Errors and already-extracted output go through
-            // unchanged — no point handing off a 200-byte error or an
-            // already-compressed summary.
-            //
-            // Cleaning happens before the size check so HTML-heavy tool
-            // outputs (Gmail bodies, HTML-embedded Notion blocks) that
-            // drop below threshold after stripping markup skip the
-            // extract pipeline entirely. For anything still over
-            // threshold, the cache stores the cleaned text — chunks see
-            // real content, not `<div>` soup.
-            let result_text = if let Some(cache) = handoff_cache {
-                let skip_cleaning =
-                    call.name == "extract_from_result" || result_text.starts_with("Error");
-                let cleaned = if skip_cleaning {
-                    result_text
+#[async_trait::async_trait]
+impl super::super::engine::CheckpointStrategy for SubagentCheckpoint<'_> {
+    async fn on_max_iter(
+        &self,
+        digest: &str,
+        max_iterations: usize,
+    ) -> anyhow::Result<super::super::engine::CheckpointOutcome> {
+        let agent_id = &self.agent_id;
+        let deterministic = format!(
+            "I reached my tool-call limit ({max_iterations} steps) before finishing this task. \
+             Progress so far (tool calls + results):\n{digest}\n\nThe task is incomplete — the above is \
+             what I accomplished; continue from here."
+        );
+        let summary_input = vec![ChatMessage::user(format!(
+            "You are sub-agent `{agent_id}` and reached your tool-call limit before finishing. Here are \
+             the tool calls you made and their results — compile a brief progress checkpoint (what you \
+             accomplished, what still remains) for the agent that delegated to you. Do not call tools.\n\n{digest}"
+        ))];
+        match self
+            .provider
+            .chat(
+                ChatRequest {
+                    messages: &summary_input,
+                    tools: None,
+                    stream: None,
+                },
+                &self.model,
+                self.temperature,
+            )
+            .await
+        {
+            Ok(resp) => {
+                let usage = resp.usage.clone();
+                let raw = resp.text.unwrap_or_default();
+                let (prose, _) = super::super::parse::parse_tool_calls(&raw);
+                let text = if prose.trim().is_empty() {
+                    deterministic
                 } else {
-                    let pre_len = result_text.len();
-                    let cleaned = clean_tool_output(&result_text);
-                    if cleaned.len() < pre_len {
-                        tracing::debug!(
-                            tool = %call.name,
-                            before_bytes = pre_len,
-                            after_bytes = cleaned.len(),
-                            saved_pct = ((pre_len - cleaned.len()) * 100) / pre_len.max(1),
-                            "[subagent_runner:handoff] cleaned tool output (stripped markup/data-uris/whitespace)"
-                        );
-                    }
-                    cleaned
+                    prose
                 };
-                let tokens = cleaned.len().div_ceil(4);
-                if !skip_cleaning && tokens > HANDOFF_OVERSIZE_THRESHOLD_TOKENS {
-                    let id = cache.store(call.name.clone(), cleaned.clone());
-                    let placeholder = build_handoff_placeholder(&call.name, &id, &cleaned);
-                    tracing::info!(
-                        task_id = %task_id,
-                        agent_id = %agent_id,
-                        tool = %call.name,
-                        raw_tokens = tokens,
-                        raw_bytes = cleaned.len(),
-                        threshold_tokens = HANDOFF_OVERSIZE_THRESHOLD_TOKENS,
-                        result_id = %id,
-                        "[subagent_runner:handoff] stashed oversized tool output; substituted placeholder into history"
-                    );
-                    placeholder
-                } else {
-                    cleaned
-                }
-            } else {
-                result_text
-            };
-
-            let call_success = !result_text.starts_with("Error");
-            let call_output_chars = result_text.chars().count();
-            let call_elapsed_ms = call_started.elapsed().as_millis() as u64;
-
-            // Record this call in the run digest (output truncated to bound
-            // size) for a possible max-iteration checkpoint.
-            run_tool_digest.push_str(&format!(
-                "- {} [{}]: {}\n",
-                call.name,
-                if call_success { "ok" } else { "failed" },
-                truncate_with_ellipsis(&result_text, 800)
-            ));
-
-            // Repeated-failure circuit breaker (shared guard). `call.arguments`
-            // is the stable signature; on a trip we stash the root-cause summary
-            // and bail after this iteration's tool results are recorded.
-            if let Some(reason) =
-                failure_guard.record(&call.name, &call.arguments, call_success, &result_text)
-            {
-                tracing::warn!(
-                    agent_id = %agent_id,
-                    tool = call.name.as_str(),
-                    "[subagent_runner] circuit breaker tripped — halting with root cause"
-                );
-                halt_reason = Some(reason);
+                Ok(super::super::engine::CheckpointOutcome { text, usage })
             }
-
-            if force_text_mode {
-                let status = if call_success { "ok" } else { "error" };
-                let _ = std::fmt::Write::write_fmt(
-                    &mut text_mode_result_block,
-                    format_args!(
-                        "<tool_result name=\"{}\" status=\"{}\">\n{}\n</tool_result>\n",
-                        call.name, status, result_text
-                    ),
-                );
-            } else {
-                let tool_msg = serde_json::json!({
-                    "tool_call_id": call.id,
-                    "content": result_text.clone(),
-                });
-                history.push(ChatMessage::tool(tool_msg.to_string()));
-                append_worker_message(
-                    result_text.clone(),
-                    "user".to_string(),
-                    serde_json::json!({
-                        "scope": "worker_thread",
-                        "agent_id": agent_id,
-                        "task_id": task_id,
-                        "iteration": iteration + 1,
-                        "tool_call_id": call.id,
-                        "tool_name": call.name,
-                    }),
+            Err(e) => {
+                tracing::warn!(
+                    agent_id = %self.agent_id,
+                    error = %e,
+                    "[subagent_runner] checkpoint summary call failed — using deterministic fallback"
                 );
+                Ok(super::super::engine::CheckpointOutcome {
+                    text: deterministic,
+                    usage: None,
+                })
             }
-
-            if let Some(ref tx) = progress_sink {
-                let _ = tx
-                    .send(AgentProgress::SubagentToolCallCompleted {
-                        agent_id: agent_id.to_string(),
-                        task_id: task_id.to_string(),
-                        call_id: call.id.clone(),
-                        tool_name: call.name.clone(),
-                        success: call_success,
-                        output_chars: call_output_chars,
-                        elapsed_ms: call_elapsed_ms,
-                        iteration: (iteration + 1) as u32,
-                    })
-                    .await;
-            }
-        }
-
-        if force_text_mode && !text_mode_result_block.is_empty() {
-            let content = format!("[Tool results]\n{text_mode_result_block}");
-            history.push(ChatMessage::user(content.clone()));
-            append_worker_message(
-                content,
-                "user".to_string(),
-                serde_json::json!({
-                    "scope": "worker_thread",
-                    "agent_id": agent_id,
-                    "task_id": task_id,
-                    "iteration": iteration + 1,
-                    "mode": "text",
-                }),
-            );
-        }
-
-        // Persist again after tool results have been appended so the
-        // on-disk transcript reflects each round's complete
-        // assistant-intent + tool-result pair. Without this, a crash
-        // between `persist_transcript` at line ~1044 and the next
-        // iteration's provider call would leave the transcript without
-        // the tool outputs the next turn will be reasoning from.
-        persist_transcript(history, &usage);
-
-        // Circuit breaker tripped this iteration: return the root-cause summary
-        // as the subagent's result (tool results are already in `history`),
-        // instead of looping to MaxIterationsExceeded and being re-delegated.
-        if let Some(reason) = halt_reason.take() {
-            return Ok((reason, iteration + 1, usage));
         }
     }
-
-    // Iteration cap reached. Instead of erroring — which discards all of the
-    // sub-agent's partial work (the parent just sees "delegate failed") —
-    // compile a graceful checkpoint of what it accomplished and return it as
-    // the result, so the calling agent can continue from the partial progress
-    // (mirrors the main-agent checkpoint — bug-report-2026-05-26 A1).
-    let digest = if run_tool_digest.is_empty() {
-        "(no tool calls completed)"
-    } else {
-        run_tool_digest.as_str()
-    };
-    let deterministic = format!(
-        "I reached my tool-call limit ({max_iterations} steps) before finishing this task. \
-         Progress so far (tool calls + results):\n{digest}\n\nThe task is incomplete — the above is \
-         what I accomplished; continue from here."
-    );
-    let summary_input = vec![ChatMessage::user(format!(
-        "You are sub-agent `{agent_id}` and reached your tool-call limit before finishing. Here are \
-         the tool calls you made and their results — compile a brief progress checkpoint (what you \
-         accomplished, what still remains) for the agent that delegated to you. Do not call tools.\n\n{digest}"
-    ))];
-    let checkpoint = match provider
-        .chat(
-            ChatRequest {
-                messages: &summary_input,
-                tools: None,
-                stream: None,
-            },
-            model,
-            temperature,
-        )
-        .await
-    {
-        Ok(resp) => {
-            if let Some(ref u) = resp.usage {
-                usage.input_tokens += u.input_tokens;
-                usage.output_tokens += u.output_tokens;
-                usage.cached_input_tokens += u.cached_input_tokens;
-                usage.charged_amount_usd += u.charged_amount_usd;
-            }
-            // Strip any stray tool-call markup a text-mode model emits; if no
-            // prose survives, fall back to the deterministic digest.
-            let raw = resp.text.unwrap_or_default();
-            let (prose, _) = super::super::parse::parse_tool_calls(&raw);
-            if prose.trim().is_empty() {
-                deterministic
-            } else {
-                prose
-            }
-        }
-        Err(e) => {
-            tracing::warn!(
-                agent_id = %agent_id,
-                task_id = %task_id,
-                error = %e,
-                "[subagent_runner] checkpoint summary call failed — using deterministic fallback"
-            );
-            deterministic
-        }
-    };
-    // NB: unlike the main-agent path, this checkpoint is intentionally NOT
-    // written to a sub-agent transcript — the calling agent's transcript
-    // captures the delegated result, so there's no data loss. Don't "fix"
-    // this by adding a `persist_subagent_transcript` call.
-    Ok((checkpoint, max_iterations, usage))
 }
 
 fn parse_tool_arguments(arguments: &str) -> serde_json::Value {
diff --git a/src/openhuman/agent/harness/tool_loop.rs b/src/openhuman/agent/harness/tool_loop.rs
index 26e243bc34..4b9cbfbfbc 100644
--- a/src/openhuman/agent/harness/tool_loop.rs
+++ b/src/openhuman/agent/harness/tool_loop.rs
@@ -1,28 +1,14 @@
-use crate::openhuman::agent::cost::TurnCost;
-use crate::openhuman::agent::multimodal;
 use crate::openhuman::agent::progress::AgentProgress;
-use crate::openhuman::agent::stop_hooks::{current_stop_hooks, StopDecision, TurnState};
-use crate::openhuman::inference::provider::{
-    ChatMessage, ChatRequest, Provider, ProviderCapabilityError, ProviderDelta,
-};
-use crate::openhuman::tools::policy::{DefaultToolPolicy, PolicyDecision, ToolPolicy};
-use crate::openhuman::tools::traits::ToolScope;
+use crate::openhuman::inference::provider::{ChatMessage, Provider};
+use crate::openhuman::tools::policy::{DefaultToolPolicy, ToolPolicy};
 use crate::openhuman::tools::Tool;
 use anyhow::Result;
 use std::collections::HashSet;
-use std::fmt::Write as _;
-use std::io::Write as _;
 
-use super::credentials::scrub_credentials;
-use super::parse::{build_native_assistant_history, parse_structured_tool_calls, parse_tool_calls};
 use super::payload_summarizer::PayloadSummarizer;
-use crate::openhuman::context::guard::{ContextCheckResult, ContextGuard};
-use crate::openhuman::inference::model_context::context_window_for_model;
-
-use super::token_budget::trim_chat_messages_to_budget;
 
 /// Minimum characters per chunk when relaying LLM text to a streaming draft.
-const STREAM_CHUNK_MIN_CHARS: usize = 80;
+pub(crate) const STREAM_CHUNK_MIN_CHARS: usize = 80;
 
 /// Default maximum agentic tool-use iterations per user message to prevent runaway loops.
 /// Used as a safe fallback when `max_tool_iterations` is unset or configured as zero.
@@ -272,932 +258,49 @@ pub(crate) async fn run_tool_call_loop(
         max_tool_iterations
     };
 
-    // Is a given tool name visible to the model this turn? `None`
-    // means no filter (legacy behaviour = everything visible).
-    let is_visible = |name: &str| -> bool {
-        match visible_tool_names {
-            Some(set) => set.contains(name),
-            None => true,
-        }
-    };
-
-    // Filter to visible tools, then dedup by name before sending to the
-    // provider. Registry tools may collide with per-turn synthesised
-    // extra_tools (e.g. an `ArchetypeDelegationTool` whose
-    // `delegate_name = "research"` shadowing a same-named skill). Some
-    // providers (Anthropic, OpenHuman cloud after the uniqueness-enforcement
-    // rollout) 400 on duplicate tool names — see TAURI-RUST-4.
-    let filtered_specs: Vec<crate::openhuman::tools::ToolSpec> = tools_registry
-        .iter()
-        .chain(extra_tools.iter())
-        .filter(|tool| is_visible(tool.name()))
-        .map(|tool| tool.spec())
-        .collect();
-    let tool_specs =
-        crate::openhuman::agent::harness::session::dedup_visible_tool_specs(filtered_specs);
-    let use_native_tools = provider.supports_native_tools() && !tool_specs.is_empty();
-
+    // The agentic loop itself now lives in the shared turn engine; this
+    // function is a thin adapter that builds the channel/CLI tool source
+    // (registry + per-turn extras, visibility whitelist, pluggable policy)
+    // and hands off. The signature is retained verbatim so existing callers
+    // (the `agent.run_turn` bus handler, triage, the payload summarizer, and
+    // the harness test suite) are unaffected.
     log::debug!(
-        "[tool-loop] Registry has {} tool(s), extra {} tool(s), filter={} — {} visible in schema: [{}]",
+        "[tool-loop] Registry has {} tool(s), extra {} tool(s), filter={}",
         tools_registry.len(),
         extra_tools.len(),
         visible_tool_names
             .map(|s| format!("whitelist({})", s.len()))
             .unwrap_or_else(|| "none".to_string()),
-        tool_specs.len(),
-        tool_specs
-            .iter()
-            .map(|s| s.name.as_str())
-            .collect::<Vec<_>>()
-            .join(", ")
     );
-
-    let mut context_guard = context_window_for_model(model)
-        .map(ContextGuard::with_context_window)
-        .unwrap_or_else(ContextGuard::new);
-    let mut turn_cost = TurnCost::new();
-
-    // Announce turn start to progress subscribers (if any). We use
-    // `send().await` for lifecycle (turn/iteration) events so they
-    // survive downstream backpressure — dropping one of these would
-    // desync the web-channel progress bridge. High-volume delta events
-    // use the same backpressure discipline (see below).
-    if let Some(ref sink) = on_progress {
-        if let Err(e) = sink.send(AgentProgress::TurnStarted).await {
-            log::warn!("[agent_loop] progress sink closed at TurnStarted: {e}");
-        }
-    }
-
-    let stop_hooks = current_stop_hooks();
-    // Repeated-failure circuit breaker — halts with a root cause rather than
-    // grinding to `max_iterations` (shared with the subagent loop).
-    let mut failure_guard = RepeatFailureGuard::new();
-    let mut halt_reason: Option<String> = None;
-    for iteration in 0..max_iterations {
-        if let Some(ref sink) = on_progress {
-            if let Err(e) = sink
-                .send(AgentProgress::IterationStarted {
-                    iteration: (iteration + 1) as u32,
-                    max_iterations: max_iterations as u32,
-                })
-                .await
-            {
-                log::warn!("[agent_loop] progress sink closed at IterationStarted: {e}");
-            }
-        }
-
-        // ── Stop hooks: policy check before the next LLM call ──
-        if !stop_hooks.is_empty() {
-            let state = TurnState {
-                iteration: (iteration + 1) as u32,
-                max_iterations: max_iterations as u32,
-                cost: &turn_cost,
-                model,
-            };
-            for hook in &stop_hooks {
-                match hook.check(&state).await {
-                    StopDecision::Continue => {}
-                    StopDecision::Stop { reason } => {
-                        tracing::warn!(
-                            iteration = (iteration + 1),
-                            hook = hook.name(),
-                            reason = %reason,
-                            "[agent_loop] stop hook triggered — aborting turn"
-                        );
-                        anyhow::bail!("Agent turn stopped by hook '{}': {reason}", hook.name());
-                    }
-                }
-            }
-        }
-
-        // ── Context guard: check utilization before each LLM call ──
-        match context_guard.check() {
-            ContextCheckResult::Ok => {}
-            ContextCheckResult::CompactionNeeded => {
-                tracing::warn!(
-                    iteration,
-                    "[agent_loop] context guard: compaction needed (>{:.0}% full)",
-                    crate::openhuman::context::guard::COMPACTION_TRIGGER_THRESHOLD * 100.0
-                );
-                // Compaction is handled by history management upstream;
-                // log and continue so the caller can act on it.
-            }
-            ContextCheckResult::ContextExhausted {
-                utilization_pct,
-                reason,
-            } => {
-                let msg = format!("Context window exhausted ({utilization_pct}% full): {reason}");
-                crate::core::observability::report_error(
-                    msg.as_str(),
-                    "agent",
-                    "context_exhausted",
-                    &[
-                        ("provider", provider_name),
-                        ("model", model),
-                        ("utilization_pct", &utilization_pct.to_string()),
-                    ],
-                );
-                anyhow::bail!(msg);
-            }
-        }
-
-        if let Some(context_window) = context_window_for_model(model) {
-            let budget_outcome = trim_chat_messages_to_budget(history, context_window);
-            if budget_outcome.trimmed {
-                log::warn!(
-                    "[agent_loop] pre-dispatch history trimmed model={} context_window={} original_tokens={} final_tokens={} messages_removed={}",
-                    model,
-                    context_window,
-                    budget_outcome.original_tokens,
-                    budget_outcome.final_tokens,
-                    budget_outcome.messages_removed
-                );
-            } else {
-                tracing::debug!(
-                    iteration,
-                    model,
-                    context_window,
-                    estimated_tokens = budget_outcome.final_tokens,
-                    "[agent_loop] pre-dispatch token budget ok"
-                );
-            }
-        }
-
-        tracing::debug!(iteration, "[agent_loop] sending LLM request");
-        let image_marker_count = multimodal::count_image_markers(history);
-        if image_marker_count > 0 && !provider.supports_vision() {
-            let cap_err = ProviderCapabilityError {
-                provider: provider_name.to_string(),
-                capability: "vision".to_string(),
-                message: format!(
-                    "received {image_marker_count} image marker(s), but this provider does not support vision input"
-                ),
-            };
-            crate::core::observability::report_error(
-                &cap_err,
-                "agent",
-                "provider_capability",
-                &[
-                    ("provider", provider_name),
-                    ("capability", "vision"),
-                    ("model", model),
-                ],
-            );
-            return Err(cap_err.into());
-        }
-
-        let prepared_messages =
-            multimodal::prepare_messages_for_provider(history, multimodal_config).await?;
-
-        // Unified path via Provider::chat so provider-specific native tool logic
-        // (OpenAI/Anthropic/OpenRouter/compatible adapters) is honored.
-        let request_tools = if use_native_tools {
-            Some(tool_specs.as_slice())
-        } else {
-            None
-        };
-
-        // Wire up a ProviderDelta → AgentProgress forwarder for this
-        // iteration when a progress sink exists. Senders dropped after
-        // the chat call so the forwarder task exits cleanly.
-        let iteration_for_stream = (iteration + 1) as u32;
-        let (delta_tx_opt, delta_forwarder) = if let Some(progress_sink) = on_progress.clone() {
-            let (tx, mut rx) = tokio::sync::mpsc::channel::<ProviderDelta>(128);
-            let forwarder = tokio::spawn(async move {
-                while let Some(event) = rx.recv().await {
-                    let mapped = match event {
-                        ProviderDelta::TextDelta { delta } => AgentProgress::TextDelta {
-                            delta,
-                            iteration: iteration_for_stream,
-                        },
-                        ProviderDelta::ThinkingDelta { delta } => AgentProgress::ThinkingDelta {
-                            delta,
-                            iteration: iteration_for_stream,
-                        },
-                        ProviderDelta::ToolCallStart { call_id, tool_name } => {
-                            AgentProgress::ToolCallArgsDelta {
-                                call_id,
-                                tool_name,
-                                delta: String::new(),
-                                iteration: iteration_for_stream,
-                            }
-                        }
-                        ProviderDelta::ToolCallArgsDelta { call_id, delta } => {
-                            AgentProgress::ToolCallArgsDelta {
-                                call_id,
-                                tool_name: String::new(),
-                                delta,
-                                iteration: iteration_for_stream,
-                            }
-                        }
-                    };
-                    // Await backpressure rather than dropping deltas so
-                    // partial streamed text/args stays consistent with the
-                    // eventual ToolCallStarted / ToolCallCompleted events.
-                    if progress_sink.send(mapped).await.is_err() {
-                        // Downstream closed — abandon the forwarder.
-                        break;
-                    }
-                }
-            });
-            (Some(tx), Some(forwarder))
-        } else {
-            (None, None)
-        };
-
-        let chat_result = provider
-            .chat(
-                ChatRequest {
-                    messages: &prepared_messages.messages,
-                    tools: request_tools,
-                    stream: delta_tx_opt.as_ref(),
-                },
-                model,
-                temperature,
-            )
-            .await;
-
-        drop(delta_tx_opt);
-        if let Some(handle) = delta_forwarder {
-            let _ = handle.await;
-        }
-
-        let (response_text, parsed_text, tool_calls, assistant_history_content, native_tool_calls) =
-            match chat_result {
-                Ok(resp) => {
-                    // Update context guard with token usage from this response.
-                    if let Some(ref usage) = resp.usage {
-                        context_guard.update_usage(usage);
-                        turn_cost.add_call(model, usage);
-                        tracing::debug!(
-                            iteration,
-                            input_tokens = usage.input_tokens,
-                            output_tokens = usage.output_tokens,
-                            context_window = usage.context_window,
-                            cumulative_usd = turn_cost.total_usd(),
-                            "[agent_loop] LLM response received"
-                        );
-                        if let Some(ref sink) = on_progress {
-                            let event = AgentProgress::TurnCostUpdated {
-                                model: model.to_string(),
-                                iteration: (iteration + 1) as u32,
-                                input_tokens: turn_cost.input_tokens,
-                                output_tokens: turn_cost.output_tokens,
-                                cached_input_tokens: turn_cost.cached_input_tokens,
-                                total_usd: turn_cost.total_usd(),
-                            };
-                            if let Err(e) = sink.send(event).await {
-                                log::warn!(
-                                    "[agent_loop] progress sink closed at TurnCostUpdated: {e}"
-                                );
-                            }
-                        }
-                    } else {
-                        tracing::debug!(
-                            iteration,
-                            "[agent_loop] LLM response received (no usage info)"
-                        );
-                    }
-
-                    let response_text = resp.text_or_empty().to_string();
-                    let mut calls = parse_structured_tool_calls(&resp.tool_calls);
-                    let mut parsed_text = String::new();
-
-                    if calls.is_empty() {
-                        let (fallback_text, fallback_calls) = parse_tool_calls(&response_text);
-                        if !fallback_text.is_empty() {
-                            parsed_text = fallback_text;
-                        }
-                        calls = fallback_calls;
-                    }
-
-                    tracing::debug!(
-                        iteration,
-                        native_tool_calls = resp.tool_calls.len(),
-                        parsed_tool_calls = calls.len(),
-                        "[agent_loop] tool calls parsed"
-                    );
-
-                    // Preserve native tool call IDs in assistant history so role=tool
-                    // follow-up messages can reference the exact call id.
-                    let assistant_history_content = if resp.tool_calls.is_empty() {
-                        response_text.clone()
-                    } else {
-                        build_native_assistant_history(
-                            &response_text,
-                            resp.reasoning_content.as_deref(),
-                            &resp.tool_calls,
-                        )
-                    };
-
-                    let native_calls = resp.tool_calls;
-                    (
-                        response_text,
-                        parsed_text,
-                        calls,
-                        assistant_history_content,
-                        native_calls,
-                    )
-                }
-                Err(e) => {
-                    // Transient upstream failures (rate-limit, gateway 5xx, "no
-                    // healthy upstream", etc.) are already classified + retried
-                    // by reliable.rs and produce an aggregate Sentry event only
-                    // when every provider/model is exhausted. Reporting each
-                    // per-iteration provider_chat error here duplicates the
-                    // signal and floods Sentry — see OPENHUMAN-TAURI-3Y/3Z
-                    // (~46 events combined) and the underlying TAURI-2E/84/T
-                    // (~3300 events from raw per-attempt 429/503/504 reports).
-                    let transient = crate::openhuman::inference::provider::reliable::is_rate_limited(
-                        &e,
-                    )
-                        || crate::openhuman::inference::provider::reliable::is_upstream_unhealthy(
-                            &e,
-                        );
-                    if transient {
-                        tracing::warn!(
-                            domain = "agent",
-                            operation = "provider_chat",
-                            provider = provider_name,
-                            model = model,
-                            iteration = iteration + 1,
-                            error = %format!("{e:#}"),
-                            "[agent] transient provider_chat failure — retried upstream; \
-                             aggregated all-providers-exhausted will report if applicable"
-                        );
-                    } else {
-                        crate::core::observability::report_error_or_expected(
-                            &e,
-                            "agent",
-                            "provider_chat",
-                            &[
-                                ("provider", provider_name),
-                                ("model", model),
-                                ("iteration", &(iteration + 1).to_string()),
-                            ],
-                        );
-                    }
-                    return Err(e);
-                }
-            };
-
-        let display_text = if parsed_text.is_empty() {
-            response_text.clone()
-        } else {
-            parsed_text
-        };
-
-        if tool_calls.is_empty() {
-            tracing::debug!(
-                iteration,
-                "[agent_loop] no tool calls — returning final response"
-            );
-            // No tool calls — this is the final response.
-            // If a streaming sender is provided, relay the text in small chunks
-            // so the channel can progressively update the draft message.
-            if let Some(ref tx) = on_delta {
-                // Split on whitespace boundaries, accumulating chunks of at least
-                // STREAM_CHUNK_MIN_CHARS characters for progressive draft updates.
-                let mut chunk = String::new();
-                for word in display_text.split_inclusive(char::is_whitespace) {
-                    chunk.push_str(word);
-                    if chunk.len() >= STREAM_CHUNK_MIN_CHARS
-                        && tx.send(std::mem::take(&mut chunk)).await.is_err()
-                    {
-                        break; // receiver dropped
-                    }
-                }
-                if !chunk.is_empty() {
-                    let _ = tx.send(chunk).await;
-                }
-            }
-            history.push(ChatMessage::assistant(response_text.clone()));
-            log::info!(
-                "[agent_loop] turn complete: iters={} provider_calls={} tokens_in={} tokens_out={} cached_in={} usd={:.4}",
-                (iteration + 1),
-                turn_cost.call_count,
-                turn_cost.input_tokens,
-                turn_cost.output_tokens,
-                turn_cost.cached_input_tokens,
-                turn_cost.total_usd(),
-            );
-            if let Some(ref sink) = on_progress {
-                if let Err(e) = sink
-                    .send(AgentProgress::TurnCompleted {
-                        iterations: (iteration + 1) as u32,
-                    })
-                    .await
-                {
-                    log::warn!("[agent_loop] progress sink closed at TurnCompleted: {e}");
-                }
-            }
-            return Ok(display_text);
-        }
-
-        // Print any text the LLM produced alongside tool calls (unless silent)
-        if !silent && !display_text.is_empty() {
-            print!("{display_text}");
-            let _ = std::io::stdout().flush();
-        }
-
-        // Execute each tool call and build results.
-        // `individual_results` tracks per-call output so that native-mode history
-        // can emit one `role: tool` message per tool call with the correct ID.
-        let mut tool_results = String::new();
-        let mut individual_results: Vec<String> = Vec::new();
-        for (call_idx, call) in tool_calls.iter().enumerate() {
-            // Stable id threaded through the start/complete pair (and
-            // any preceding args-delta events) so consumers can
-            // reconcile tool rows by id. The fallback includes
-            // `call_idx` to stay unique when the same tool name
-            // appears multiple times in one iteration.
-            let progress_call_id = call
-                .id
-                .clone()
-                .unwrap_or_else(|| format!("loop-{iteration}-{call_idx}-{}", call.name));
-            // Emit `ToolCallStarted` for every parsed call, even ones
-            // that will be rejected below (approval denied, CliRpcOnly,
-            // unknown) — the client-side row was created from the
-            // streamed args and needs a terminal event to resolve.
-            if let Some(ref sink) = on_progress {
-                if let Err(e) = sink
-                    .send(AgentProgress::ToolCallStarted {
-                        call_id: progress_call_id.clone(),
-                        tool_name: call.name.clone(),
-                        arguments: call.arguments.clone(),
-                        iteration: (iteration + 1) as u32,
-                    })
-                    .await
-                {
-                    log::warn!(
-                        "[agent_loop] progress sink closed while emitting ToolCallStarted: {e}"
-                    );
-                }
-            }
-
-            // Helper: emit a failed `ToolCallCompleted` for an
-            // early-exit path (denied / CliRpcOnly / unknown) so the
-            // client row flips to `error` instead of staying running.
-            let emit_failed_completion = |message: &str| {
-                let call_id = progress_call_id.clone();
-                let tool_name = call.name.clone();
-                let output_chars = message.chars().count();
-                let iteration_u32 = (iteration + 1) as u32;
-                let sink_opt = on_progress.clone();
-                async move {
-                    if let Some(sink) = sink_opt {
-                        if let Err(e) = sink
-                            .send(AgentProgress::ToolCallCompleted {
-                                call_id,
-                                tool_name,
-                                success: false,
-                                output_chars,
-                                elapsed_ms: 0,
-                                iteration: iteration_u32,
-                            })
-                            .await
-                        {
-                            log::warn!(
-                                "[agent_loop] progress sink closed while emitting early-exit ToolCallCompleted: {e}"
-                            );
-                        }
-                    }
-                }
-            };
-
-            // ── Tool policy check (#2131) ─────────────────
-            // Evaluate the pluggable ToolPolicy before any approval or
-            // execution. If the policy denies the call, skip everything
-            // (including approval side-effects) and return the denial
-            // reason as a tool error to the model.
-            if let PolicyDecision::Deny(reason) = tool_policy.evaluate(&call.name, &call.arguments)
-            {
-                tracing::debug!(
-                    iteration,
-                    tool = call.name.as_str(),
-                    reason = %reason,
-                    "[agent_loop] tool policy denied tool call"
-                );
-                let denied = format!("Tool '{}' denied by policy: {reason}", call.name);
-                emit_failed_completion(&denied).await;
-                individual_results.push(denied.clone());
-                let _ = writeln!(
-                    tool_results,
-                    "<tool_result name=\"{}\">\n{denied}\n</tool_result>",
-                    call.name
-                );
-                // Record so a re-issued identical call halts the turn rather than
-                // repeating a deterministic policy denial to max_iterations.
-                if let Some(halt) =
-                    failure_guard.record(&call.name, &call.arguments.to_string(), false, &denied)
-                {
-                    halt_reason = Some(halt);
-                }
-                continue;
-            }
-
-            // Look up the tool by name in the combined registry + extras,
-            // subject to the visibility whitelist. If the model hallucinated
-            // a filtered-out tool name we treat it as unknown — the error
-            // path below produces a structured error message the LLM can
-            // correct in the next iteration.
-            let tool_opt: Option<&dyn Tool> = tools_registry
-                .iter()
-                .chain(extra_tools.iter())
-                .find(|t| t.name() == call.name && is_visible(t.name()))
-                .map(|b| b.as_ref());
-            tracing::debug!(
-                iteration,
-                tool = call.name.as_str(),
-                found = tool_opt.is_some(),
-                "[agent_loop] executing tool"
-            );
-
-            // Scope check: CliRpcOnly tools cannot run in the autonomous agent loop.
-            if let Some(tool) = tool_opt {
-                if tool.scope() == ToolScope::CliRpcOnly {
-                    tracing::warn!(
-                        iteration,
-                        tool = call.name.as_str(),
-                        "[agent_loop] tool scope is CliRpcOnly — denied in agent loop"
-                    );
-                    let denied = format!(
-                        "Tool '{}' is only available via explicit CLI/RPC invocation, not in the autonomous agent loop.",
-                        call.name
-                    );
-                    emit_failed_completion(&denied).await;
-                    individual_results.push(denied.clone());
-                    let _ = writeln!(
-                        tool_results,
-                        "<tool_result name=\"{}\">\n{denied}\n</tool_result>",
-                        call.name
-                    );
-                    if let Some(halt) = failure_guard.record(
-                        &call.name,
-                        &call.arguments.to_string(),
-                        false,
-                        &denied,
-                    ) {
-                        halt_reason = Some(halt);
-                    }
-                    continue;
-                }
-            }
-
-            // ── External-effect approval gate (#1339, #2135) ──
-            // Tools whose `external_effect()` returns true route
-            // through the process-global `ApprovalGate` so the UI
-            // can prompt the user before `execute()` runs. The gate
-            // is `None` when supervised mode is disabled or in test
-            // envs — behavior matches the pre-#1339 path.
-            //
-            // `approval_request_id` carries the persisted row id
-            // forward so we can stamp the terminal execution
-            // outcome onto the same `pending_approvals` row after
-            // the tool finishes (issue #2135). `None` means the
-            // tool was either not gated (no supervised gate, not
-            // external-effect), was session-allowlist-shortcutted,
-            // or was denied — none of which produce an audit row
-            // that needs an "after" entry.
-            let mut approval_request_id: Option<String> = None;
-            let mut approval_gate_for_audit: Option<
-                std::sync::Arc<crate::openhuman::approval::ApprovalGate>,
-            > = None;
-            if let Some(tool) = tool_opt {
-                if tool.external_effect_with_args(&call.arguments) {
-                    if let Some(gate) = crate::openhuman::approval::ApprovalGate::try_global() {
-                        let summary = crate::openhuman::approval::summarize_action(
-                            &call.name,
-                            &call.arguments,
-                        );
-                        let redacted = crate::openhuman::approval::redact_args(&call.arguments);
-                        let (outcome, request_id) =
-                            gate.intercept_audited(&call.name, &summary, redacted).await;
-                        match outcome {
-                            crate::openhuman::approval::GateOutcome::Allow => {
-                                approval_request_id = request_id;
-                                if approval_request_id.is_some() {
-                                    approval_gate_for_audit = Some(gate);
-                                }
-                            }
-                            crate::openhuman::approval::GateOutcome::Deny { reason } => {
-                                tracing::warn!(
-                                    iteration,
-                                    tool = call.name.as_str(),
-                                    reason = %reason,
-                                    "[agent_loop] approval gate denied tool call"
-                                );
-                                emit_failed_completion(&reason).await;
-                                individual_results.push(reason.clone());
-                                let _ = writeln!(
-                                    tool_results,
-                                    "<tool_result name=\"{}\">\n{reason}\n</tool_result>",
-                                    call.name
-                                );
-                                // Record the denial in the shared breaker (the
-                                // gate's `[policy-denied]` marker makes it a
-                                // hard reject) so a re-issued identical call
-                                // halts the turn instead of re-prompting
-                                // forever — the normal record path below is
-                                // skipped by this `continue`.
-                                if let Some(halt) = failure_guard.record(
-                                    &call.name,
-                                    &call.arguments.to_string(),
-                                    false,
-                                    &reason,
-                                ) {
-                                    halt_reason = Some(halt);
-                                }
-                                continue;
-                            }
-                        }
-                    }
-                }
-            }
-
-            let (result, call_succeeded) = if let Some(tool) = tool_opt {
-                let tool_deadline =
-                    crate::openhuman::tool_timeout::tool_execution_timeout_duration();
-                let timeout_secs = crate::openhuman::tool_timeout::tool_execution_timeout_secs();
-                let tool_started = std::time::Instant::now();
-                let outcome =
-                    tokio::time::timeout(tool_deadline, tool.execute(call.arguments.clone())).await;
-                let elapsed_ms = tool_started.elapsed().as_millis() as u64;
-                let (result_text, success) = match outcome {
-                    Ok(Ok(r)) => {
-                        let output = r.output();
-                        let success = !r.is_error;
-                        if success {
-                            tracing::debug!(
-                                iteration,
-                                tool = call.name.as_str(),
-                                output_len = output.len(),
-                                "[agent_loop] tool succeeded"
-                            );
-                            let mut scrubbed = scrub_credentials(&output);
-                            let (compacted, tj_stats) =
-                                crate::openhuman::tokenjuice::compact_tool_output(
-                                    &call.name,
-                                    Some(&call.arguments),
-                                    &scrubbed,
-                                    Some(0),
-                                );
-                            if tj_stats.applied {
-                                log::debug!(
-                                    "[agent_loop] tokenjuice applied tool={} rule={} {}->{} bytes",
-                                    call.name,
-                                    tj_stats.rule_id,
-                                    tj_stats.original_bytes,
-                                    tj_stats.compacted_bytes
-                                );
-                                scrubbed = compacted;
-                            }
-
-                            // Per-tool max_result_size_chars cap. When
-                            // a tool sets it and the (post-tokenjuice)
-                            // body still exceeds the cap, truncate
-                            // here and skip the global payload
-                            // summarizer for this call — the cap is
-                            // fast and deterministic, the summarizer
-                            // is the fallback for tools that don't
-                            // know their own size budget.
-                            let mut hit_per_tool_cap = false;
-                            if let Some(cap) = tool.max_result_size_chars() {
-                                let char_count = scrubbed.chars().count();
-                                if char_count > cap {
-                                    let truncated: String = scrubbed.chars().take(cap).collect();
-                                    let dropped = char_count - cap;
-                                    log::info!(
-                                        "[agent_loop] per-tool cap applied tool={} cap_chars={} original_chars={} dropped_chars={}",
-                                        call.name,
-                                        cap,
-                                        char_count,
-                                        dropped,
-                                    );
-                                    scrubbed = format!(
-                                        "{truncated}\n\n[truncated by tool cap: {dropped} more chars not shown]"
-                                    );
-                                    hit_per_tool_cap = true;
-                                }
-                            }
-
-                            if !hit_per_tool_cap {
-                                if let Some(summarizer) = payload_summarizer {
-                                    log::debug!(
-                                        "[agent_loop] payload_summarizer intercepting tool={} bytes={}",
-                                        call.name,
-                                        scrubbed.len()
-                                    );
-                                    match summarizer
-                                        .maybe_summarize(&call.name, None, &scrubbed)
-                                        .await
-                                    {
-                                        Ok(Some(payload)) => {
-                                            log::info!(
-                                                "[agent_loop] payload_summarizer compressed tool={} {}->{} bytes",
-                                                call.name,
-                                                payload.original_bytes,
-                                                payload.summary_bytes
-                                            );
-                                            scrubbed = payload.summary;
-                                        }
-                                        Ok(None) => {
-                                            log::debug!(
-                                                "[agent_loop] payload_summarizer pass-through tool={} bytes={}",
-                                                call.name,
-                                                scrubbed.len()
-                                            );
-                                        }
-                                        Err(e) => {
-                                            log::warn!(
-                                                "[agent_loop] payload_summarizer error tool={} err={} (passing raw payload through)",
-                                                call.name,
-                                                e
-                                            );
-                                        }
-                                    }
-                                }
-                            }
-                            (scrubbed, true)
-                        } else {
-                            tracing::warn!(
-                                iteration,
-                                tool = call.name.as_str(),
-                                "[agent_loop] tool returned error: {output}"
-                            );
-                            let scrubbed = scrub_credentials(&output);
-                            let (compacted, _) = crate::openhuman::tokenjuice::compact_tool_output(
-                                &call.name,
-                                Some(&call.arguments),
-                                &scrubbed,
-                                Some(1),
-                            );
-                            (format!("Error: {compacted}"), false)
-                        }
-                    }
-                    Ok(Err(e)) => {
-                        crate::core::observability::report_error(
-                            &e,
-                            "tool",
-                            "execute",
-                            &[
-                                ("tool", call.name.as_str()),
-                                ("outcome", "failed"),
-                                ("iteration", &(iteration + 1).to_string()),
-                            ],
-                        );
-                        (format!("Error executing {}: {e}", call.name), false)
-                    }
-                    Err(_) => {
-                        let msg = format!(
-                            "tool '{}' timed out after {} seconds",
-                            call.name, timeout_secs
-                        );
-                        crate::core::observability::report_error(
-                            msg.as_str(),
-                            "tool",
-                            "execute",
-                            &[
-                                ("tool", call.name.as_str()),
-                                ("outcome", "timeout"),
-                                ("timeout_secs", &timeout_secs.to_string()),
-                                ("iteration", &(iteration + 1).to_string()),
-                            ],
-                        );
-                        (
-                            format!(
-                                "Error: tool '{}' timed out after {} seconds",
-                                call.name, timeout_secs
-                            ),
-                            false,
-                        )
-                    }
-                };
-                if let Some(ref sink) = on_progress {
-                    if let Err(e) = sink
-                        .send(AgentProgress::ToolCallCompleted {
-                            call_id: progress_call_id.clone(),
-                            tool_name: call.name.clone(),
-                            success,
-                            output_chars: result_text.chars().count(),
-                            elapsed_ms,
-                            iteration: (iteration + 1) as u32,
-                        })
-                        .await
-                    {
-                        log::warn!("[agent_loop] progress sink closed while emitting ToolCallCompleted: {e}");
-                    }
-                }
-                // ── Approval audit after-action row (#2135) ────
-                // Stamp the terminal status onto the same
-                // `pending_approvals` row the gate created before
-                // execution, so the audit trail carries both the
-                // before (approval) and after (executed_at +
-                // outcome). Best-effort: a write failure here is
-                // logged but not propagated to the agent.
-                if let (Some(gate), Some(req_id)) = (
-                    approval_gate_for_audit.as_ref(),
-                    approval_request_id.as_ref(),
-                ) {
-                    let exec_outcome = if success {
-                        crate::openhuman::approval::ExecutionOutcome::Success
-                    } else {
-                        crate::openhuman::approval::ExecutionOutcome::Failure
-                    };
-                    let err_text = if success {
-                        None
-                    } else {
-                        Some(result_text.as_str())
-                    };
-                    gate.record_execution(req_id, exec_outcome, err_text);
-                }
-                (result_text, success)
-            } else {
-                tracing::warn!(
-                    iteration,
-                    tool = call.name.as_str(),
-                    "[agent_loop] unknown tool requested"
-                );
-                let msg = format!("Unknown tool: {}", call.name);
-                emit_failed_completion(&msg).await;
-                (msg, false)
-            };
-
-            individual_results.push(result.clone());
-            let _ = writeln!(
-                tool_results,
-                "<tool_result name=\"{}\">\n{}\n</tool_result>",
-                call.name, result
-            );
-
-            // Repeated-failure circuit breaker (shared guard) — halt with a root
-            // cause instead of grinding to `max_iterations` on a doomed action.
-            if let Some(reason) = failure_guard.record(
-                &call.name,
-                &call.arguments.to_string(),
-                call_succeeded,
-                &result,
-            ) {
-                tracing::warn!(
-                    iteration,
-                    tool = call.name.as_str(),
-                    "[agent_loop] circuit breaker tripped — halting with root cause"
-                );
-                halt_reason = Some(reason);
-            }
-        }
-
-        // Add assistant message with tool calls + tool results to history.
-        // Native mode: use JSON-structured messages so convert_messages() can
-        // reconstruct proper OpenAI-format tool_calls and tool result messages.
-        // Prompt mode: use XML-based text format as before.
-        history.push(ChatMessage::assistant(assistant_history_content));
-        if native_tool_calls.is_empty() {
-            history.push(ChatMessage::user(format!("[Tool results]\n{tool_results}")));
-        } else {
-            for (native_call, result) in native_tool_calls.iter().zip(individual_results.iter()) {
-                let tool_msg = serde_json::json!({
-                    "tool_call_id": native_call.id,
-                    "content": result,
-                });
-                history.push(ChatMessage::tool(tool_msg.to_string()));
-            }
-        }
-
-        // Circuit breaker tripped this iteration: return the root-cause summary
-        // as the agent's result instead of looping to `max_iterations`. The
-        // tool results are already in `history` above, so the caller still has
-        // full context if it wants it.
-        if let Some(reason) = halt_reason.take() {
-            // Mirror the normal-completion path: emit TurnCompleted before the
-            // early return, otherwise progress consumers stay "in-flight"
-            // indefinitely when the circuit breaker trips.
-            if let Some(ref sink) = on_progress {
-                if let Err(e) = sink
-                    .send(AgentProgress::TurnCompleted {
-                        iterations: (iteration + 1) as u32,
-                    })
-                    .await
-                {
-                    log::warn!("[agent_loop] progress sink closed at TurnCompleted: {e}");
-                }
-            }
-            return Ok(reason);
-        }
-    }
-
-    // Return the typed `AgentError::MaxIterationsExceeded` variant (boxed
-    // through `anyhow::Error`) so downstream wrappers — notably
-    // `Agent::run_single` in `harness/session/runtime.rs` — can downcast and
-    // suppress Sentry emission for this deterministic agent-state outcome
-    // (OPENHUMAN-TAURI-99 / -98). The `Display` text is preserved verbatim so
-    // any caller that already inspects the string (UI chat surface, tests)
-    // continues to work.
-    Err(anyhow::Error::new(
-        crate::openhuman::agent::error::AgentError::MaxIterationsExceeded {
-            max: max_iterations,
-        },
-    ))
+    let mut tool_source = super::engine::RegistryToolSource::new(
+        tools_registry,
+        extra_tools,
+        visible_tool_names,
+        tool_policy,
+        payload_summarizer,
+    );
+    let progress = super::engine::TurnProgress::new(on_progress);
+    let mut observer = super::engine::NullObserver;
+    let checkpoint = super::engine::ErrorCheckpoint;
+    let parser = super::engine::DefaultParser;
+    super::engine::run_turn_engine(
+        provider,
+        history,
+        &mut tool_source,
+        &progress,
+        &mut observer,
+        &checkpoint,
+        &parser,
+        provider_name,
+        model,
+        temperature,
+        silent,
+        multimodal_config,
+        max_iterations,
+        on_delta,
+    )
+    .await
+    .map(|outcome| outcome.text)
 }
 
 #[cfg(test)]
diff --git a/src/openhuman/agent/harness/tool_loop_tests.rs b/src/openhuman/agent/harness/tool_loop_tests.rs
index fd4a63ab83..56f2804758 100644
--- a/src/openhuman/agent/harness/tool_loop_tests.rs
+++ b/src/openhuman/agent/harness/tool_loop_tests.rs
@@ -1,6 +1,6 @@
 use super::*;
 use crate::openhuman::inference::provider::traits::ProviderCapabilities;
-use crate::openhuman::inference::provider::ChatResponse;
+use crate::openhuman::inference::provider::{ChatRequest, ChatResponse};
 use crate::openhuman::tools::{ToolResult, ToolScope};
 use async_trait::async_trait;
 use parking_lot::Mutex;