Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion src/openhuman/agent/harness/session/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -565,7 +565,6 @@ impl AgentBuilder {
last_memory_context: None,
last_turn_citations: Vec::new(),
history: Vec::new(),
last_tree_prefetch_at: None,
post_turn_hooks: self.post_turn_hooks,
learning_enabled: self.learning_enabled,
explicit_preferences_enabled: self.explicit_preferences_enabled,
Expand Down
53 changes: 0 additions & 53 deletions src/openhuman/agent/harness/session/turn.rs
Original file line number Diff line number Diff line change
Expand Up @@ -327,59 +327,6 @@ impl Agent {
.await
.unwrap_or_default();

// ── Memory-tree eager prefetch (#710 wiring) ──────────────────
// The orchestrator session injects a cross-source digest on the
// first turn AND every `tree_loader::REFRESH_INTERVAL` (30 min by
// default) thereafter, so long-running conversations stay current
// with newly-ingested memory. Each injection still rides on the
// user message (NOT the system prompt) to keep the KV-cache prefix
// stable. Failure is non-fatal — bare `context` is returned on any
// error. The timestamp is bumped on every successful `load` (even
// when the digest is empty) so an empty workspace doesn't get
// re-queried every turn.
//
let now = std::time::Instant::now();
let context = if crate::openhuman::agent::tree_loader::should_prefetch(
self.last_tree_prefetch_at,
now,
crate::openhuman::agent::tree_loader::REFRESH_INTERVAL,
) {
match crate::openhuman::config::rpc::load_config_with_timeout().await {
Ok(cfg) => {
match crate::openhuman::agent::tree_loader::TreeContextLoader::load(&cfg).await
{
Ok(tree_ctx) => {
let was_first = self.last_tree_prefetch_at.is_none();
self.last_tree_prefetch_at = Some(now);
if !tree_ctx.is_empty() {
log::info!(
"[memory_tree] tree context injected first_turn={} chars={}",
was_first,
tree_ctx.chars().count()
);
format!("{context}{tree_ctx}")
} else {
context
}
}
Err(e) => {
log::warn!("[memory_tree] tree_loader.load failed (non-fatal): {e}");
context
}
}
}
Err(e) => {
log::warn!(
"[memory_tree] tree_loader skipped — config load failed (non-fatal): {e}"
);
context
}
}
} else {
log::trace!("[memory_tree] tree_loader skipped — within refresh interval");
context
};

// ── Phase 3 STM preemptive recall ────────────────────────────
// On the very first turn only, assemble a bounded cross-thread
// context block from the FTS5 episodic arm (keyword match) and the
Expand Down
10 changes: 0 additions & 10 deletions src/openhuman/agent/harness/session/turn_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -889,12 +889,6 @@ async fn turn_runs_full_tool_cycle_with_context_and_hooks() {
},
crate::openhuman::config::ContextConfig::default(),
);
// Suppress the memory-tree eager prefetch — it reads the real workspace
// via `load_config_with_timeout`, not the injected loader, so leaving it
// on would make this test depend on whatever is in `~/.openhuman`. This
// test exercises the injected memory context + tool cycle, not the
// prefetch; marking it already-prefetched skips that path deterministically.
agent.last_tree_prefetch_at = Some(std::time::Instant::now());

let response = agent
.turn("hello world")
Expand Down Expand Up @@ -972,10 +966,6 @@ async fn turn_uses_cached_transcript_prefix_on_first_iteration() {
ChatMessage::system("cached-system"),
ChatMessage::assistant("cached-assistant"),
]);
// Skip the memory-tree eager prefetch (reads the real workspace, not the
// injected loader) so the user message stays exactly "fresh" regardless
// of local `~/.openhuman` content.
agent.last_tree_prefetch_at = Some(std::time::Instant::now());

let response = agent.turn("fresh").await.expect("turn should succeed");
assert_eq!(response, "cached-final");
Expand Down
8 changes: 0 additions & 8 deletions src/openhuman/agent/harness/session/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,14 +65,6 @@ pub struct Agent {
/// Consumed by web-channel delivery to render source chips in the UI.
pub(super) last_turn_citations: Vec<crate::openhuman::agent::memory_loader::MemoryCitation>,
pub(super) history: Vec<ConversationMessage>,
/// Wall-clock timestamp of the last successful memory-tree prefetch
/// for this session. Drives the 30-minute refresh cadence in the turn
/// loop — `None` means "never fetched, fetch now"; otherwise we only
/// re-run `TreeContextLoader::load` when the elapsed time exceeds
/// `tree_loader::REFRESH_INTERVAL`. Updated on every successful call
/// (even when the digest came back empty) so an empty workspace
/// doesn't get hammered every turn.
pub(super) last_tree_prefetch_at: Option<std::time::Instant>,
pub(super) post_turn_hooks: Vec<Arc<dyn PostTurnHook>>,
pub(super) learning_enabled: bool,
/// When `true`, pinned preferences stored via `remember_preference` are
Expand Down
63 changes: 40 additions & 23 deletions src/openhuman/agent/tree_loader.rs
Original file line number Diff line number Diff line change
@@ -1,40 +1,54 @@
//! Eager prefetch of recent memory-tree activity into the orchestrator's
//! session context (Phase 4 follow-on, #710 wiring).
//!
//! The orchestrator answers "what happened this week?" / "what's been going
//! on with X?" style questions out of the user's own ingested memory. We
//! pre-load a 7-day recap on the session's first turn AND periodically
//! thereafter (every [`REFRESH_INTERVAL`]) so long-running conversations
//! stay current with newly-ingested memory without needing the LLM to
//! round-trip a tool call. The injection rides on the user message (NOT the
//! system prompt) to keep the KV-cache prefix stable.
//! NOTE (#3170): this loader is **not currently wired into the agent turn
//! loop**. The unconditional 7-day digest injection was removed because it
//! duplicated the on-demand memory-tree retrieval tools (the smart
//! multi-strategy walk, #3077) and — unlike those tools — ignored the
//! memory-tree on/off toggle (which only gates the ingestion scheduler, not
//! this read path). The module is retained so an opt-in eager digest can be
//! re-wired behind a proper read-side gate later. Its public surface is
//! still exercised by `tests/inference_agent_raw_coverage_e2e.rs`. The
//! historical behavior is described below (in past tense) for that future
//! re-wiring.
//!
//! The recap is assembled by walking the **per-source** trees across the
//! Historically, the orchestrator answered "what happened this week?" /
//! "what's been going on with X?" style questions out of the user's own
//! ingested memory. It pre-loaded a 7-day recap on the session's first turn
//! AND periodically thereafter (every [`REFRESH_INTERVAL`]) so long-running
//! conversations stayed current with newly-ingested memory without needing
//! the LLM to round-trip a tool call. The injection rode on the user message
//! (NOT the system prompt) to keep the KV-cache prefix stable.
//!
//! The recap was assembled by walking the **per-source** trees across the
//! window (the global digest tree was removed — source trees plus the
//! entity index are the substrate). When the workspace has no source
//! entity index are the substrate). When the workspace had no source
//! summaries yet (early-life workspaces or no ingest configured),
//! [`TreeContextLoader::load`] returns an empty string and the caller
//! silently no-ops. The session-side timestamp is still bumped on those
//! empty results so an empty workspace doesn't get re-queried every turn.
//! [`TreeContextLoader::load`] returned an empty string and the caller
//! silently no-op'd. The session-side timestamp was bumped on those empty
//! results too so an empty workspace didn't get re-queried every turn.
//!
//! Failure is non-fatal by design — the orchestrator must still be able to
//! reply when the memory tree is unavailable, mis-configured, or empty. We
//! log the failure mode and return `Ok(String::new())` so the caller can
//! concatenate without branching.
//! Failure was non-fatal by design — the orchestrator had to stay able to
//! reply when the memory tree was unavailable, mis-configured, or empty. The
//! loader logs the failure mode and returns `Ok(String::new())` so a caller
//! can concatenate without branching.

use crate::openhuman::config::Config;
use crate::openhuman::memory_tree::retrieval::query_source;

/// Default lookback window for the eager digest. Mirrors the language in
/// the orchestrator prompt ("7-day digest pre-loaded into session context").
/// Default lookback window for the eager digest. Retained for a future
/// re-wiring (see the module-level NOTE) — not actively consumed now that
/// the prefetch is unwired. Mirrored the language in the orchestrator
/// prompt ("7-day digest pre-loaded into session context").
pub const DEFAULT_WINDOW_DAYS: u32 = 7;

/// Minimum wall-clock interval between successive prefetches in the same
/// session. The first turn always fetches (timestamp is `None`); subsequent
/// turns re-prefetch only after this interval has elapsed since the last
/// successful call. Picked to balance freshness in long-running chats
/// against repeating the same digest content when no new ingest has
/// happened — the typical case for short bursts of conversation.
/// session. Retained for a future re-wiring (see the module-level NOTE) —
/// no caller drives this cadence today. Historically: the first turn always
/// fetched (timestamp `None`); later turns re-prefetched only after this
/// interval elapsed since the last successful call — picked to balance
/// freshness in long-running chats against repeating the same digest when no
/// new ingest had happened.
pub const REFRESH_INTERVAL: std::time::Duration = std::time::Duration::from_secs(30 * 60);

/// Per-hit content cap to keep the injection bounded; long summary bodies
Expand Down Expand Up @@ -68,6 +82,9 @@ pub struct TreeContextLoader;
impl TreeContextLoader {
/// Build the eager-prefetch context block for the current workspace.
///
/// NOTE (#3170): not called from the agent turn loop anymore — see the
/// module-level NOTE. Retained for tests and a possible future re-wiring.
///
/// Returns:
/// - `Ok("")` when the workspace has no source summaries yet, or when
/// `query_source` returns an error (logged at warn level).
Expand Down
Loading