diff --git a/src/openhuman/agent/harness/session/builder.rs b/src/openhuman/agent/harness/session/builder.rs index d7859163b7..2f811bec35 100644 --- a/src/openhuman/agent/harness/session/builder.rs +++ b/src/openhuman/agent/harness/session/builder.rs @@ -565,7 +565,6 @@ impl AgentBuilder { last_memory_context: None, last_turn_citations: Vec::new(), history: Vec::new(), - last_tree_prefetch_at: None, post_turn_hooks: self.post_turn_hooks, learning_enabled: self.learning_enabled, explicit_preferences_enabled: self.explicit_preferences_enabled, diff --git a/src/openhuman/agent/harness/session/turn.rs b/src/openhuman/agent/harness/session/turn.rs index e3abe0e38d..785e91dbae 100644 --- a/src/openhuman/agent/harness/session/turn.rs +++ b/src/openhuman/agent/harness/session/turn.rs @@ -327,59 +327,6 @@ impl Agent { .await .unwrap_or_default(); - // ── Memory-tree eager prefetch (#710 wiring) ────────────────── - // The orchestrator session injects a cross-source digest on the - // first turn AND every `tree_loader::REFRESH_INTERVAL` (30 min by - // default) thereafter, so long-running conversations stay current - // with newly-ingested memory. Each injection still rides on the - // user message (NOT the system prompt) to keep the KV-cache prefix - // stable. Failure is non-fatal — bare `context` is returned on any - // error. The timestamp is bumped on every successful `load` (even - // when the digest is empty) so an empty workspace doesn't get - // re-queried every turn. - // - let now = std::time::Instant::now(); - let context = if crate::openhuman::agent::tree_loader::should_prefetch( - self.last_tree_prefetch_at, - now, - crate::openhuman::agent::tree_loader::REFRESH_INTERVAL, - ) { - match crate::openhuman::config::rpc::load_config_with_timeout().await { - Ok(cfg) => { - match crate::openhuman::agent::tree_loader::TreeContextLoader::load(&cfg).await - { - Ok(tree_ctx) => { - let was_first = self.last_tree_prefetch_at.is_none(); - self.last_tree_prefetch_at = Some(now); - if !tree_ctx.is_empty() { - log::info!( - "[memory_tree] tree context injected first_turn={} chars={}", - was_first, - tree_ctx.chars().count() - ); - format!("{context}{tree_ctx}") - } else { - context - } - } - Err(e) => { - log::warn!("[memory_tree] tree_loader.load failed (non-fatal): {e}"); - context - } - } - } - Err(e) => { - log::warn!( - "[memory_tree] tree_loader skipped — config load failed (non-fatal): {e}" - ); - context - } - } - } else { - log::trace!("[memory_tree] tree_loader skipped — within refresh interval"); - context - }; - // ── Phase 3 STM preemptive recall ──────────────────────────── // On the very first turn only, assemble a bounded cross-thread // context block from the FTS5 episodic arm (keyword match) and the diff --git a/src/openhuman/agent/harness/session/turn_tests.rs b/src/openhuman/agent/harness/session/turn_tests.rs index fd0f702af3..d5b46a6ca4 100644 --- a/src/openhuman/agent/harness/session/turn_tests.rs +++ b/src/openhuman/agent/harness/session/turn_tests.rs @@ -889,12 +889,6 @@ async fn turn_runs_full_tool_cycle_with_context_and_hooks() { }, crate::openhuman::config::ContextConfig::default(), ); - // Suppress the memory-tree eager prefetch — it reads the real workspace - // via `load_config_with_timeout`, not the injected loader, so leaving it - // on would make this test depend on whatever is in `~/.openhuman`. This - // test exercises the injected memory context + tool cycle, not the - // prefetch; marking it already-prefetched skips that path deterministically. - agent.last_tree_prefetch_at = Some(std::time::Instant::now()); let response = agent .turn("hello world") @@ -972,10 +966,6 @@ async fn turn_uses_cached_transcript_prefix_on_first_iteration() { ChatMessage::system("cached-system"), ChatMessage::assistant("cached-assistant"), ]); - // Skip the memory-tree eager prefetch (reads the real workspace, not the - // injected loader) so the user message stays exactly "fresh" regardless - // of local `~/.openhuman` content. - agent.last_tree_prefetch_at = Some(std::time::Instant::now()); let response = agent.turn("fresh").await.expect("turn should succeed"); assert_eq!(response, "cached-final"); diff --git a/src/openhuman/agent/harness/session/types.rs b/src/openhuman/agent/harness/session/types.rs index 43d7875ca2..c6bae3ea17 100644 --- a/src/openhuman/agent/harness/session/types.rs +++ b/src/openhuman/agent/harness/session/types.rs @@ -65,14 +65,6 @@ pub struct Agent { /// Consumed by web-channel delivery to render source chips in the UI. pub(super) last_turn_citations: Vec, pub(super) history: Vec, - /// Wall-clock timestamp of the last successful memory-tree prefetch - /// for this session. Drives the 30-minute refresh cadence in the turn - /// loop — `None` means "never fetched, fetch now"; otherwise we only - /// re-run `TreeContextLoader::load` when the elapsed time exceeds - /// `tree_loader::REFRESH_INTERVAL`. Updated on every successful call - /// (even when the digest came back empty) so an empty workspace - /// doesn't get hammered every turn. - pub(super) last_tree_prefetch_at: Option, pub(super) post_turn_hooks: Vec>, pub(super) learning_enabled: bool, /// When `true`, pinned preferences stored via `remember_preference` are diff --git a/src/openhuman/agent/tree_loader.rs b/src/openhuman/agent/tree_loader.rs index e775f1ec85..ab26333969 100644 --- a/src/openhuman/agent/tree_loader.rs +++ b/src/openhuman/agent/tree_loader.rs @@ -1,40 +1,54 @@ //! Eager prefetch of recent memory-tree activity into the orchestrator's //! session context (Phase 4 follow-on, #710 wiring). //! -//! The orchestrator answers "what happened this week?" / "what's been going -//! on with X?" style questions out of the user's own ingested memory. We -//! pre-load a 7-day recap on the session's first turn AND periodically -//! thereafter (every [`REFRESH_INTERVAL`]) so long-running conversations -//! stay current with newly-ingested memory without needing the LLM to -//! round-trip a tool call. The injection rides on the user message (NOT the -//! system prompt) to keep the KV-cache prefix stable. +//! NOTE (#3170): this loader is **not currently wired into the agent turn +//! loop**. The unconditional 7-day digest injection was removed because it +//! duplicated the on-demand memory-tree retrieval tools (the smart +//! multi-strategy walk, #3077) and — unlike those tools — ignored the +//! memory-tree on/off toggle (which only gates the ingestion scheduler, not +//! this read path). The module is retained so an opt-in eager digest can be +//! re-wired behind a proper read-side gate later. Its public surface is +//! still exercised by `tests/inference_agent_raw_coverage_e2e.rs`. The +//! historical behavior is described below (in past tense) for that future +//! re-wiring. //! -//! The recap is assembled by walking the **per-source** trees across the +//! Historically, the orchestrator answered "what happened this week?" / +//! "what's been going on with X?" style questions out of the user's own +//! ingested memory. It pre-loaded a 7-day recap on the session's first turn +//! AND periodically thereafter (every [`REFRESH_INTERVAL`]) so long-running +//! conversations stayed current with newly-ingested memory without needing +//! the LLM to round-trip a tool call. The injection rode on the user message +//! (NOT the system prompt) to keep the KV-cache prefix stable. +//! +//! The recap was assembled by walking the **per-source** trees across the //! window (the global digest tree was removed — source trees plus the -//! entity index are the substrate). When the workspace has no source +//! entity index are the substrate). When the workspace had no source //! summaries yet (early-life workspaces or no ingest configured), -//! [`TreeContextLoader::load`] returns an empty string and the caller -//! silently no-ops. The session-side timestamp is still bumped on those -//! empty results so an empty workspace doesn't get re-queried every turn. +//! [`TreeContextLoader::load`] returned an empty string and the caller +//! silently no-op'd. The session-side timestamp was bumped on those empty +//! results too so an empty workspace didn't get re-queried every turn. //! -//! Failure is non-fatal by design — the orchestrator must still be able to -//! reply when the memory tree is unavailable, mis-configured, or empty. We -//! log the failure mode and return `Ok(String::new())` so the caller can -//! concatenate without branching. +//! Failure was non-fatal by design — the orchestrator had to stay able to +//! reply when the memory tree was unavailable, mis-configured, or empty. The +//! loader logs the failure mode and returns `Ok(String::new())` so a caller +//! can concatenate without branching. use crate::openhuman::config::Config; use crate::openhuman::memory_tree::retrieval::query_source; -/// Default lookback window for the eager digest. Mirrors the language in -/// the orchestrator prompt ("7-day digest pre-loaded into session context"). +/// Default lookback window for the eager digest. Retained for a future +/// re-wiring (see the module-level NOTE) — not actively consumed now that +/// the prefetch is unwired. Mirrored the language in the orchestrator +/// prompt ("7-day digest pre-loaded into session context"). pub const DEFAULT_WINDOW_DAYS: u32 = 7; /// Minimum wall-clock interval between successive prefetches in the same -/// session. The first turn always fetches (timestamp is `None`); subsequent -/// turns re-prefetch only after this interval has elapsed since the last -/// successful call. Picked to balance freshness in long-running chats -/// against repeating the same digest content when no new ingest has -/// happened — the typical case for short bursts of conversation. +/// session. Retained for a future re-wiring (see the module-level NOTE) — +/// no caller drives this cadence today. Historically: the first turn always +/// fetched (timestamp `None`); later turns re-prefetched only after this +/// interval elapsed since the last successful call — picked to balance +/// freshness in long-running chats against repeating the same digest when no +/// new ingest had happened. pub const REFRESH_INTERVAL: std::time::Duration = std::time::Duration::from_secs(30 * 60); /// Per-hit content cap to keep the injection bounded; long summary bodies @@ -68,6 +82,9 @@ pub struct TreeContextLoader; impl TreeContextLoader { /// Build the eager-prefetch context block for the current workspace. /// + /// NOTE (#3170): not called from the agent turn loop anymore — see the + /// module-level NOTE. Retained for tests and a possible future re-wiring. + /// /// Returns: /// - `Ok("")` when the workspace has no source summaries yet, or when /// `query_source` returns an error (logged at warn level).