diff --git a/app/src/services/__tests__/rpcMethods.test.ts b/app/src/services/__tests__/rpcMethods.test.ts index 3ea39ea61d..7512ffb143 100644 --- a/app/src/services/__tests__/rpcMethods.test.ts +++ b/app/src/services/__tests__/rpcMethods.test.ts @@ -116,7 +116,7 @@ describe('rpcMethods catalog', () => { test('catalog canonical methods exist in core schema registry (drift guard)', () => { const schemaSources = [ fs.readFileSync( - path.resolve(__dirname, '../../../../src/openhuman/config/schemas.rs'), + path.resolve(__dirname, '../../../../src/openhuman/config/schemas/schema_defs.rs'), 'utf8' ), fs.readFileSync( diff --git a/src/openhuman/agent/harness/archivist.rs b/src/openhuman/agent/harness/archivist.rs deleted file mode 100644 index bdc0fb1738..0000000000 --- a/src/openhuman/agent/harness/archivist.rs +++ /dev/null @@ -1,1257 +0,0 @@ -//! Archivist — background PostTurnHook that extracts lessons, indexes -//! episodic records, and manages conversation segments with event extraction. -//! -//! After each turn, the Archivist: -//! 1. Inserts the turn into the FTS5 episodic table. -//! 2. Manages conversation segments (boundary detection + lifecycle). -//! 3. On segment close: produces an LLM recap (soft-fallback to heuristic), -//! embeds the recap, extracts events, and updates user profile. -//! 4. Extracts simple lessons from tool failures. -//! 5. (Phase 2 / #566) At segment close/flush, ingests the segment's raw prose -//! turns (user + assistant; tool-call JSON stripped) into the memory tree as -//! `source_id = "conversations:agent"` when -//! `config.learning.chat_to_tree_enabled` is true. The leaf is RAW PROSE — -//! the LLM recap is NEVER fed into the tree (evidence-vs-interpretation -//! policy). Each leaf carries episodic provenance stamped in `source_ref`. -//! 6. `flush_open_segment` force-closes the trailing open segment at session -//! end so the last segment always gets a recap + embedding + tree ingest. - -use crate::openhuman::agent::hooks::{PostTurnHook, TurnContext}; -use crate::openhuman::config::Config; -use crate::openhuman::memory::chat::ChatProvider; -use crate::openhuman::memory::ingest_pipeline; -use crate::openhuman::memory_store::events::{self, EventRecord, EventType}; -use crate::openhuman::memory_store::fts5::{self, EpisodicEntry}; -use crate::openhuman::memory_store::profile::{self, FacetType}; -use crate::openhuman::memory_store::segments::{ - self, BoundaryConfig, BoundaryDecision, ConversationSegment, -}; -use crate::openhuman::memory_store::trees::types::TreeKind; -use crate::openhuman::memory_sync::canonicalize::chat::{ChatBatch, ChatMessage}; -use crate::openhuman::memory_tree::score::embed::{build_embedder_from_config, Embedder}; -use crate::openhuman::memory_tree::summarise::{summarise, SummaryContext, SummaryInput}; -use async_trait::async_trait; -use parking_lot::Mutex; -use rusqlite::Connection; -use std::collections::hash_map::RandomState; -use std::hash::{BuildHasher, Hasher}; -use std::sync::Arc; -use std::time::{SystemTime, UNIX_EPOCH}; - -/// Background Archivist that indexes turns into FTS5 episodic memory -/// and manages conversation segmentation. -/// -/// Produces an LLM recap + embedding for each closed segment and flushes -/// the trailing open segment at session end. -pub struct ArchivistHook { - /// SQLite connection shared with UnifiedMemory. - conn: Option>>, - /// Whether the archivist is enabled. - enabled: bool, - /// Boundary detection configuration. - boundary_config: BoundaryConfig, - /// Optional runtime config — used to gate the tree-ingest path and to - /// build the LLM chat provider + embedder. - /// - /// When `None`, the tree-ingest path is skipped. Set via - /// [`ArchivistHook::with_config`] on the production path. - config: Option, - /// Optional LLM provider for segment recap. When `None`, the - /// fallback heuristic summary is used instead. - chat_provider: Option>, - /// Optional embedder for segment recap vectors. When `None`, embedding - /// is skipped (segment is still summarised). - embedder: Option>, -} - -impl ArchivistHook { - /// Create an Archivist hook with a shared SQLite connection. - /// - /// LLM recap and embedding are disabled by default; call - /// [`Self::with_config`] on the production path to wire them in. - pub fn new(conn: Arc>, enabled: bool) -> Self { - Self { - conn: Some(conn), - enabled, - boundary_config: BoundaryConfig::default(), - config: None, - chat_provider: None, - embedder: None, - } - } - - /// Attach runtime config so the archivist can gate the tree-ingest path - /// and build its LLM chat provider + embedder from config. - /// - /// When `config.learning.chat_to_tree_enabled` is `true`, each closed - /// segment's raw prose turns are ingested into the memory tree as - /// `source_id="conversations:agent"` (one batch per segment, not per turn). - /// The chat provider is built via `build_chat_provider(config, Summarise)`; - /// the embedder via `build_embedder_from_config(config)`. Both are - /// soft-fallback: if construction fails, the fields stay `None` and the - /// archivist falls back to heuristic summary / no embedding. - pub fn with_config(mut self, config: Config) -> Self { - // Build the LLM chat provider for segment recap. - let chat_provider: Option> = - match crate::openhuman::memory::chat::build_chat_provider(&config) { - Ok(p) => { - tracing::debug!("[archivist] segment recap provider={} registered", p.name()); - Some(p) - } - Err(e) => { - tracing::warn!( - "[archivist] failed to build chat provider for recap (will use fallback): {e}" - ); - None - } - }; - - // Build the embedder for segment recap vectors. - let embedder: Option> = match build_embedder_from_config(&config) { - Ok(e) => { - tracing::debug!("[archivist] segment embed provider={} registered", e.name()); - Some(Arc::from(e)) - } - Err(e) => { - tracing::warn!( - "[archivist] failed to build embedder for segment recap (embedding skipped): {e}" - ); - None - } - }; - - self.chat_provider = chat_provider; - self.embedder = embedder; - self.config = Some(config); - self - } - - /// Create a disabled/no-op Archivist (when FTS5 is not available). - pub fn disabled() -> Self { - Self { - conn: None, - enabled: false, - boundary_config: BoundaryConfig::default(), - config: None, - chat_provider: None, - embedder: None, - } - } - - /// Flush the currently-open segment for `session_id`, if any, by - /// force-closing it and running the same close path (recap + embed + - /// event extraction). This guarantees the trailing segment of a session - /// is always finalized even when no boundary-triggering turn arrives. - /// - /// Called at session end (see `Agent::spawn_session_memory_extraction` - /// in `session/turn.rs`). Safe to call multiple times — segment_close - /// is idempotent (only transitions `open → closed`). - pub async fn flush_open_segment(&self, session_id: &str) { - if !self.enabled { - return; - } - let Some(conn) = &self.conn else { - return; - }; - let now = Self::now_timestamp(); - tracing::debug!("[archivist] flush_open_segment: checking session={session_id}"); - let open_segment = match segments::open_segment_for_session(conn, session_id) { - Ok(seg) => seg, - Err(e) => { - tracing::warn!("[archivist] flush: failed to query open segment: {e}"); - return; - } - }; - let Some(segment) = open_segment else { - tracing::debug!("[archivist] flush: no open segment for session={session_id}"); - return; - }; - tracing::debug!( - "[archivist] flush: force-closing segment={} turn_count={}", - segment.segment_id, - segment.turn_count - ); - if let Err(e) = segments::segment_close(conn, &segment.segment_id, now) { - tracing::warn!("[archivist] flush: failed to close segment: {e}"); - return; - } - self.on_segment_closed(conn, &segment, session_id, now) - .await; - } - - fn now_timestamp() -> f64 { - SystemTime::now() - .duration_since(UNIX_EPOCH) - .unwrap_or_default() - .as_secs_f64() - } - - /// Handle segment lifecycle for a new turn. - /// - /// Returns the closed segment (if any) so the caller can run - /// `on_segment_closed` asynchronously after this function returns. - /// Event extraction and recap run outside this function because they - /// are async and may re-acquire the connection lock. - fn manage_segment_sync( - &self, - conn: &Arc>, - session_id: &str, - timestamp: f64, - user_message: &str, - current_episodic_id: i64, - current_seq: Option, - ) -> Option { - let now = Self::now_timestamp(); - - // Check for an open segment for this session. - let open_segment = match segments::open_segment_for_session(conn, session_id) { - Ok(seg) => seg, - Err(e) => { - tracing::warn!("[archivist] failed to query open segment: {e}"); - return None; - } - }; - - match open_segment { - Some(segment) => { - // Run boundary detection. - let decision = segments::detect_boundary( - &self.boundary_config, - &segment, - timestamp, - user_message, - None, // No embedding for now — cosine drift skipped without embedder access. - ); - - match decision { - BoundaryDecision::Continue => { - tracing::debug!( - "[archivist] segment={} continues (turn_count={})", - segment.segment_id, - segment.turn_count - ); - if let Err(e) = segments::segment_append_turn( - conn, - &segment.segment_id, - current_episodic_id, - current_seq, - timestamp, - now, - ) { - tracing::warn!("[archivist] failed to append turn to segment: {e}"); - } - None - } - BoundaryDecision::Boundary(reason) => { - tracing::debug!( - "[archivist] segment boundary detected: {reason} — closing {}", - segment.segment_id - ); - - // Close the current segment. - if let Err(e) = segments::segment_close(conn, &segment.segment_id, now) { - tracing::warn!("[archivist] failed to close segment: {e}"); - return None; - } - - // Create a new segment for the new topic. - // The new segment starts at the current turn's episodic ID. - let new_id = format!("seg-{}", uuid_v4()); - if let Err(e) = segments::segment_create( - conn, - &new_id, - session_id, - "global", - current_episodic_id, - current_seq, - timestamp, - now, - ) { - tracing::warn!("[archivist] failed to create new segment: {e}"); - } - - // Return the closed segment so the caller can run - // on_segment_closed asynchronously. - Some(segment) - } - } - } - None => { - // No open segment — create the first one using the current episodic ID. - let segment_id = format!("seg-{}", uuid_v4()); - tracing::debug!( - "[archivist] creating first segment={segment_id} for session={session_id}" - ); - if let Err(e) = segments::segment_create( - conn, - &segment_id, - session_id, - "global", - current_episodic_id, - current_seq, - timestamp, - now, - ) { - tracing::warn!("[archivist] failed to create initial segment: {e}"); - } - None - } - } - } - - /// Called when a segment is closed. - /// - /// Produces a segment recap (LLM if a chat provider is configured, - /// otherwise the heuristic fallback), embeds the recap, extracts - /// heuristic events, and updates the user profile. - /// - /// Soft-fallback contract (mirrors `LlmSummariser`): this function - /// never returns `Err`; all failures are logged and ignored. - async fn on_segment_closed( - &self, - conn: &Arc>, - segment: &ConversationSegment, - session_id: &str, - now: f64, - ) { - // Gather the conversation text for this segment. Prefer the - // md-backed memory_archivist read when config is available; fall - // back to FTS5 in test paths or when config isn't wired. - let entries = self.read_session_entries(conn, session_id); - - // Filter entries that fall within the segment's time window. - // Use <= for end_timestamp (entries at the boundary are part of this - // segment). The boundary-triggering turn has a timestamp AFTER - // end_timestamp, so it won't be included. - let segment_entries: Vec<&EpisodicEntry> = entries - .iter() - .filter(|e| { - e.timestamp >= segment.start_timestamp - && segment - .end_timestamp - .map(|end| e.timestamp <= end) - .unwrap_or(true) - }) - .collect(); - - if segment_entries.is_empty() { - tracing::debug!( - "[archivist] segment={} has no entries — skipping recap", - segment.segment_id - ); - return; - } - - // Build segment text from user messages (for event extraction). - let segment_text: String = segment_entries - .iter() - .filter(|e| e.role == "user") - .map(|e| e.content.as_str()) - .collect::>() - .join(". "); - - // ── Segment recap (LLM or heuristic fallback) ──────────────────── - let (summary, _from_llm) = self - .summarize_entries(&segment_entries, &segment.segment_id, segment.turn_count) - .await; - - // Persist the recap. - if let Err(e) = segments::segment_set_summary(conn, &segment.segment_id, &summary, now) { - tracing::warn!("[archivist] failed to set segment summary: {e}"); - } else { - tracing::debug!( - "[archivist] recap persisted segment={} summary_chars={}", - segment.segment_id, - summary.len() - ); - } - - // ── Finalize-time embedding ─────────────────────────────────────── - // Embed the recap only when the segment is being finalized (closed). - // Never embed per-turn or on an open segment — this is the single - // write point for segment_embeddings rows. - if let Some(ref embedder) = self.embedder { - let model_signature = embedder.name().to_string(); - tracing::debug!( - "[archivist] embedding recap segment={} model={}", - segment.segment_id, - model_signature - ); - match embedder.embed(&summary).await { - Ok(vec) => { - match segments::segment_embedding_upsert( - conn, - &segment.segment_id, - &model_signature, - &vec, - now, - ) { - Ok(()) => { - tracing::debug!( - "[archivist] embedding stored segment={} model={} dim={}", - segment.segment_id, - model_signature, - vec.len() - ); - } - Err(e) => { - tracing::warn!( - "[archivist] failed to persist segment embedding (non-fatal) segment={}: {e}", - segment.segment_id - ); - } - } - } - Err(e) => { - tracing::warn!( - "[archivist] embed call failed (non-fatal) segment={} model={}: {e}", - segment.segment_id, - model_signature - ); - } - } - } else { - tracing::debug!( - "[archivist] no embedder — skipping segment embedding segment={}", - segment.segment_id - ); - } - - // ── Heuristic event extraction ──────────────────────────────────── - if !segment_text.is_empty() { - let extracted = events::extract_events_heuristic(&segment_text); - tracing::debug!( - "[archivist] extracted {} events from segment {}", - extracted.len(), - segment.segment_id - ); - - for (event_type, content) in &extracted { - let event_id = format!("evt-{}", uuid_v4()); - let event = EventRecord { - event_id, - segment_id: segment.segment_id.clone(), - session_id: session_id.to_string(), - namespace: segment.namespace.clone(), - event_type: event_type.clone(), - content: content.clone(), - subject: None, - timestamp_ref: None, - confidence: 0.6, - embedding: None, - source_turn_ids: None, - created_at: now, - }; - if let Err(e) = events::event_insert(conn, &event) { - tracing::warn!("[archivist] failed to insert event: {e}"); - } - - // Update user profile from preference and fact events. - match event_type { - EventType::Preference => { - let key = extract_profile_key(content, "preference"); - let facet_id = format!("prf-{}", uuid_v4()); - if let Err(e) = profile::profile_upsert( - conn, - &facet_id, - &FacetType::Preference, - &key, - content, - 0.6, - Some(&segment.segment_id), - now, - ) { - tracing::warn!("[archivist] failed to upsert profile facet: {e}"); - } - } - EventType::Fact => { - let key = extract_profile_key(content, "fact"); - let facet_id = format!("prf-{}", uuid_v4()); - if let Err(e) = profile::profile_upsert( - conn, - &facet_id, - &FacetType::Context, - &key, - content, - 0.6, - Some(&segment.segment_id), - now, - ) { - tracing::warn!("[archivist] failed to upsert profile fact: {e}"); - } - } - _ => {} - } - } - } - - // ── Phase 2: tree ingest at segment granularity ─────────────────── - // Gate: only when config is attached and chat_to_tree_enabled is true. - // Ingest the segment's raw prose turns (NOT the LLM recap) as one - // ChatBatch into the memory tree under `source_id="conversations:agent"`. - // Evidence-vs-interpretation: the tree must ingest raw prose and build - // its own summaries; feeding the recap would make the tree summarise - // a summary. Non-fatal: failures are logged and swallowed. - if let Some(ref cfg) = self.config { - if cfg.learning.chat_to_tree_enabled { - tracing::debug!( - "[archivist] piping segment into tree as conversations:agent \ - session={session_id} segment={} entries={}", - segment.segment_id, - segment_entries.len() - ); - self.pipe_segment_to_tree(cfg, segment, session_id, &segment_entries) - .await; - } - } - } -} - -#[async_trait] -impl PostTurnHook for ArchivistHook { - fn name(&self) -> &str { - "archivist" - } - - async fn on_turn_complete(&self, ctx: &TurnContext) -> anyhow::Result<()> { - if !self.enabled { - return Ok(()); - } - - let Some(conn) = &self.conn else { - return Ok(()); - }; - - let session_id = ctx.session_id.as_deref().unwrap_or("unknown"); - let timestamp = Self::now_timestamp(); - - tracing::debug!( - "[archivist] indexing turn: session={session_id}, tools={}, duration={}ms", - ctx.tool_calls.len(), - ctx.turn_duration_ms - ); - - // Index user message. - fts5::episodic_insert( - conn, - &EpisodicEntry { - id: None, - session_id: session_id.to_string(), - timestamp, - role: "user".to_string(), - content: ctx.user_message.clone(), - lesson: None, - tool_calls_json: None, - cost_microdollars: 0, - }, - )?; - - // Retrieve the inserted episodic ID for segment tracking. - let current_episodic_id = { - let db = conn.lock(); - db.query_row("SELECT last_insert_rowid()", [], |row| row.get::<_, i64>(0)) - .unwrap_or(1) - }; - - // Index assistant response with tool call summary. - let tool_calls_json = if ctx.tool_calls.is_empty() { - None - } else { - Some(serde_json::to_string(&ctx.tool_calls).unwrap_or_default()) - }; - - // Extract a simple lesson from tool failures (lightweight, no LLM needed). - let lesson = extract_lesson_from_tools(&ctx.tool_calls); - - fts5::episodic_insert( - conn, - &EpisodicEntry { - id: None, - session_id: session_id.to_string(), - // Offset by 1ms so assistant entries sort after user entries within - // the same turn. Relies on turn timestamps having >=1ms resolution. - timestamp: timestamp + 0.001, - role: "assistant".to_string(), - content: ctx.assistant_response.clone(), - lesson, - tool_calls_json, - cost_microdollars: 0, - }, - )?; - - tracing::debug!("[archivist] episodic rows written: session={session_id}"); - - // Dual-write into memory_archivist::store (md-backed) so we can - // validate the FTS5 → md migration before flipping the read side. - // Best-effort: a write failure here must not break the turn. The - // user turn's assigned seq is captured into `current_seq` so the - // segment ops can store it alongside the FTS5 episodic id. - let mut current_seq: Option = None; - if let Some(cfg) = self.config.as_ref() { - let ts_ms = (timestamp * 1000.0) as i64; - let user_turn = crate::openhuman::memory_archivist::ArchivedTurn { - session_id: session_id.to_string(), - seq: 0, // assigned by record_turn - timestamp_ms: ts_ms, - role: "user".to_string(), - content: ctx.user_message.clone(), - lesson: None, - tool_calls_json: None, - cost_microdollars: 0, - }; - match crate::openhuman::memory_archivist::store::record_turn(cfg, user_turn) { - Ok(stored) => current_seq = Some(stored.seq), - Err(e) => { - tracing::warn!("[archivist] memory_archivist user dual-write failed: {e}"); - } - } - // Assistant turn carries the tool_calls_json + lesson the FTS5 - // insert just wrote. Re-derive locally so we don't depend on - // FTS5 having returned. - let assistant_lesson = extract_lesson_from_tools(&ctx.tool_calls); - let assistant_tool_calls = if ctx.tool_calls.is_empty() { - None - } else { - Some(serde_json::to_string(&ctx.tool_calls).unwrap_or_default()) - }; - let assistant_turn = crate::openhuman::memory_archivist::ArchivedTurn { - session_id: session_id.to_string(), - seq: 0, - timestamp_ms: ts_ms + 1, - role: "assistant".to_string(), - content: ctx.assistant_response.clone(), - lesson: assistant_lesson, - tool_calls_json: assistant_tool_calls, - cost_microdollars: 0, - }; - if let Err(e) = - crate::openhuman::memory_archivist::store::record_turn(cfg, assistant_turn) - { - tracing::warn!("[archivist] memory_archivist assistant dual-write failed: {e}"); - } - } - - // Manage conversation segmentation (sync boundary detection + SQLite - // operations). Returns the just-closed segment when a boundary fired. - let closed_segment = self.manage_segment_sync( - conn, - session_id, - timestamp, - &ctx.user_message, - current_episodic_id, - current_seq, - ); - - // Run async recap + embed + segment-tree ingest on the closed segment - // (if any). Per-turn tree ingest is intentionally absent — Phase 2 - // moves the tree write to segment granularity inside on_segment_closed. - if let Some(ref segment) = closed_segment { - let now = Self::now_timestamp(); - self.on_segment_closed(conn, segment, session_id, now).await; - } - - tracing::debug!("[archivist] turn indexed successfully: session={session_id}"); - Ok(()) - } -} - -impl ArchivistHook { - /// Read every entry recorded for `session_id`, preferring the - /// md-backed `memory_archivist::store` when `self.config` is set and - /// falling back to the legacy FTS5 episodic table otherwise. - /// - /// Returns `EpisodicEntry` so the existing call sites (segment - /// gathering, recap rendering, tree push) keep their shape unchanged - /// during the FTS5 retirement migration. - fn read_session_entries( - &self, - conn: &Arc>, - session_id: &str, - ) -> Vec { - if let Some(cfg) = self.config.as_ref() { - match crate::openhuman::memory_archivist::store::session_entries(cfg, session_id) { - Ok(turns) => { - return turns - .into_iter() - .map(|t| EpisodicEntry { - id: None, - session_id: t.session_id, - // ArchivedTurn stores epoch-ms; EpisodicEntry - // takes epoch-seconds as f64. - timestamp: (t.timestamp_ms as f64) / 1000.0, - role: t.role, - content: t.content, - lesson: t.lesson, - tool_calls_json: t.tool_calls_json, - cost_microdollars: t.cost_microdollars, - }) - .collect(); - } - Err(e) => { - tracing::warn!( - "[archivist] memory_archivist read failed (falling back to FTS5): {e}" - ); - } - } - } - fts5::episodic_session_entries(conn, session_id).unwrap_or_default() - } - - /// Shared summarize helper — the **single LLM summarizer** used by both - /// the finalize path (`on_segment_closed`) and the rolling-recap path - /// (`rolling_segment_recap`). - /// - /// Builds a prose corpus from `entries`, calls the `LlmSummariser` when a - /// `chat_provider` is configured, and falls back to the heuristic - /// `segments::fallback_summary` on any failure or when no provider is - /// wired in. Always returns a non-empty string. - /// - /// Invariants: - /// - NEVER mutates DB state (no `segment_set_summary`, no embedding). - /// - NEVER closes a segment. - /// - Safe to call on both open and closed segments. - /// Summarize a set of episodic entries into a recap string. - /// - /// Returns `(text, produced_by_llm)`. `produced_by_llm == false` means the - /// LLM was unavailable / failed / returned empty and `text` is the shallow - /// heuristic `fallback_summary` bookend stub. That stub is an acceptable - /// durable last-resort on the *finalize* path, but callers driving the - /// **live prompt** (rolling recap → compaction) must treat - /// `produced_by_llm == false` as "no real recap" and fall back to their - /// own strategy — the stub must never become live compaction text. - async fn summarize_entries( - &self, - entries: &[&EpisodicEntry], - segment_id: &str, - turn_count: i32, - ) -> (String, bool) { - if entries.is_empty() { - tracing::debug!( - "[archivist] summarize_entries: no entries for segment={segment_id} — \ - returning empty fallback" - ); - return (segments::fallback_summary("", "", turn_count), false); - } - - // Build a full prose corpus from ALL entries (user + assistant prose; - // tool-call JSON is already excluded because the archivist stores - // stripped prose in the `content` column). - let corpus_inputs: Vec = entries - .iter() - .filter(|e| !e.content.trim().is_empty()) - .map(|e| { - use crate::openhuman::memory_store::chunks::types::approx_token_count; - let content = e.content.clone(); - let token_count = approx_token_count(&content); - let ts = chrono::DateTime::from_timestamp(e.timestamp as i64, 0) - .unwrap_or_else(chrono::Utc::now); - SummaryInput { - id: format!("{}-{}", e.role, e.timestamp as u64), - content, - token_count, - entities: Vec::new(), - topics: Vec::new(), - time_range_start: ts, - time_range_end: ts, - score: 0.5, - } - }) - .collect(); - - let summary_ctx = SummaryContext { - tree_id: segment_id, - tree_kind: TreeKind::Source, - target_level: 0, - token_budget: 2_000, - }; - - let first = entries.first().map(|e| e.content.as_str()).unwrap_or(""); - let last = entries.last().map(|e| e.content.as_str()).unwrap_or(first); - - if self.chat_provider.is_some() { - if let Some(ref config) = self.config { - tracing::debug!( - "[archivist] summarize_entries: LLM recap segment={segment_id} entries={}", - entries.len() - ); - #[cfg(test)] - let summary_result = if let Some(provider) = self.chat_provider.as_ref() { - crate::openhuman::memory::chat::test_override::with_provider( - Arc::clone(provider), - summarise(config, &corpus_inputs, &summary_ctx), - ) - .await - } else { - summarise(config, &corpus_inputs, &summary_ctx).await - }; - #[cfg(not(test))] - let summary_result = summarise(config, &corpus_inputs, &summary_ctx).await; - - match summary_result { - Ok(output) if !output.content.is_empty() => { - tracing::debug!( - "[archivist] summarize_entries: LLM recap ok segment={segment_id} \ - chars={}", - output.content.len() - ); - return (output.content, true); - } - Ok(_) => { - tracing::debug!( - "[archivist] summarize_entries: LLM returned empty — \ - heuristic fallback segment={segment_id}" - ); - } - Err(e) => { - tracing::warn!( - "[archivist] summarize_entries: LLM recap failed (non-fatal) \ - segment={segment_id}: {e} — heuristic fallback" - ); - } - } - } else { - tracing::debug!( - "[archivist] summarize_entries: no config — \ - heuristic fallback segment={segment_id}" - ); - } - } else { - tracing::debug!( - "[archivist] summarize_entries: no chat provider — \ - heuristic fallback segment={segment_id}" - ); - } - (segments::fallback_summary(first, last, turn_count), false) - } - - /// Produce a rolling recap of the **currently-open** segment for - /// `session_id` WITHOUT closing it, writing `segment_set_summary`, or - /// embedding. - /// - /// This is the Phase 1.5 "one summarizer" entry point. Both - /// `on_segment_closed` (finalize) and this function delegate to the same - /// [`Self::summarize_entries`] helper so the same LLM path is used in both - /// cases. The distinction is purely in what happens *after* the summary - /// string is produced: - /// - /// - **Finalize** (`on_segment_closed`): persists the summary via - /// `segment_set_summary`, embeds it, extracts events, pipes tree ingest. - /// - **Rolling** (this function): returns the summary string and does - /// nothing else — segment stays open, DB is untouched. - /// - /// Returns `None` when: - /// - The archivist is disabled or has no connection. - /// - There is no open segment for `session_id`. - /// - The open segment has no episodic entries. - /// - No real LLM recap was produced (LLM unavailable / failed / empty, so - /// only the heuristic bookend stub is available). The shallow stub is - /// deliberately NOT used as live compaction text. - /// - /// Callers must treat `None` as "recap unavailable" and fall back to - /// their own compaction strategy (e.g. `ProviderSummarizer`). - pub async fn rolling_segment_recap(&self, session_id: &str) -> Option { - if !self.enabled { - tracing::debug!( - "[archivist] rolling_segment_recap: archivist disabled \ - session={session_id} — returning None" - ); - return None; - } - let conn = self.conn.as_ref()?; - - // Find the currently-open segment for this session. - let open_segment = match segments::open_segment_for_session(conn, session_id) { - Ok(Some(seg)) => seg, - Ok(None) => { - tracing::debug!( - "[archivist] rolling_segment_recap: no open segment for \ - session={session_id} — returning None" - ); - return None; - } - Err(e) => { - tracing::warn!( - "[archivist] rolling_segment_recap: failed to query open segment \ - session={session_id}: {e} — returning None" - ); - return None; - } - }; - - // Gather the episodic entries for this session so far. - let all_entries = self.read_session_entries(conn, session_id); - - // Keep only entries within the open segment's time window (start → - // now, inclusive). An open segment has `end_timestamp = None`. - let segment_entries: Vec<&EpisodicEntry> = all_entries - .iter() - .filter(|e| e.timestamp >= open_segment.start_timestamp) - .collect(); - - if segment_entries.is_empty() { - tracing::debug!( - "[archivist] rolling_segment_recap: no entries in open segment={} \ - session={session_id} — returning None", - open_segment.segment_id - ); - return None; - } - - tracing::debug!( - "[archivist] rolling_segment_recap: summarizing open segment={} \ - entries={} session={session_id}", - open_segment.segment_id, - segment_entries.len() - ); - - let (recap, from_llm) = self - .summarize_entries( - &segment_entries, - &open_segment.segment_id, - open_segment.turn_count, - ) - .await; - - if !from_llm { - tracing::debug!( - "[archivist] rolling_segment_recap: only heuristic bookend stub \ - available (no real LLM recap) session={session_id} segment={} — \ - returning None so compaction falls back to ProviderSummarizer", - open_segment.segment_id - ); - return None; - } - - if recap.is_empty() { - tracing::debug!( - "[archivist] rolling_segment_recap: summarize_entries returned empty \ - session={session_id} segment={} — returning None", - open_segment.segment_id - ); - return None; - } - - tracing::debug!( - "[archivist] rolling_segment_recap: produced LLM recap chars={} \ - session={session_id} segment={}", - recap.len(), - open_segment.segment_id - ); - Some(recap) - } - - /// Pipe a closed segment's raw prose turns into the memory tree as - /// `source_id="conversations:agent"`. - /// - /// **Design contract (Phase 2):** - /// - ONE ingest per segment (not per turn) — the batch boundary is the - /// segment, so all turns land as a single ChatBatch. - /// - RAW PROSE only — the LLM recap (summary) is explicitly NOT ingested. - /// The tree must build its own summaries from evidence (raw turns); - /// feeding a summary-of-a-summary violates the evidence-vs-interpretation - /// policy. - /// - `source_id = "conversations:agent"` is a CONSTANT — a single shared - /// tree source for all agent chat sessions (never per-session or per-segment). - /// - Tool-call JSON is stripped from assistant entries so structured - /// payloads do not reach the tree (memory ingestion policy). - /// - Provenance is stamped on each `ChatMessage.source_ref` as - /// `agent://session/{session_id}/segment/{segment_id}#ep{start}-{end}` - /// so tree leaves can be traced back to episodic rows for drill-down and - /// deduplication. - /// - /// Failures are logged and swallowed; the episodic write is the source of - /// truth. - async fn pipe_segment_to_tree( - &self, - config: &Config, - segment: &crate::openhuman::memory_store::segments::ConversationSegment, - session_id: &str, - entries: &[&fts5::EpisodicEntry], - ) { - use chrono::{TimeZone, Utc}; - - // Collect the episodic id span for provenance stamping. - // start_episodic_id comes from the segment record (set at creation); - // end_episodic_id is the latest turn id (may be None if only one turn). - let start_ep = segment.start_episodic_id; - let end_ep = segment.end_episodic_id.unwrap_or(start_ep); - let segment_id = &segment.segment_id; - - // The provenance URI embeds session + segment + episodic id span so - // tree leaves can be traced back to episodic_log rows without a - // full-text scan. - let provenance = - format!("agent://session/{session_id}/segment/{segment_id}#ep{start_ep}-{end_ep}"); - - // Build one ChatMessage per episodic entry (user + assistant; skip - // empties). Tool-call JSON is stripped from assistant content so only - // prose flows into the tree. - let messages: Vec = entries - .iter() - .filter_map(|e| { - let raw_text = if e.role == "assistant" { - strip_tool_calls_from_response(&e.content) - } else { - e.content.clone() - }; - // Strip `[IMAGE:]` attachment markers so images never - // enter episodic memory ingestion — otherwise the base64 is - // chunked, embedded (garbage + Voyage size errors), and fed to - // the extract LLM (#3205). `parse_image_markers` returns the - // marker-free prose, already trimmed; the image itself isn't - // useful memory text. An image-only turn collapses to empty and - // is skipped by the guard below. - let (text, _image_refs) = - crate::openhuman::agent::multimodal::parse_image_markers(&raw_text); - if text.is_empty() { - return None; - } - - // Convert the f64 Unix timestamp to DateTime. - let secs = e.timestamp as i64; - let nanos = ((e.timestamp.fract()) * 1e9) as u32; - let ts = Utc - .timestamp_opt(secs, nanos.min(999_999_999)) - .single() - .unwrap_or_else(Utc::now); - - Some(ChatMessage { - author: e.role.clone(), - timestamp: ts, - text, - source_ref: Some(provenance.clone()), - }) - }) - .collect(); - - if messages.is_empty() { - tracing::debug!( - "[archivist] pipe_segment_to_tree: no prose messages in segment={segment_id} — skipping" - ); - return; - } - - let batch = ChatBatch { - platform: "agent".into(), - // channel_label carries session_id for human-readable context. - channel_label: session_id.to_string(), - messages, - }; - - // `source_id` is intentionally a CONSTANT — all agent sessions share - // one tree source so cross-session summarisation sees the full history. - let source_id = "conversations:agent"; - // `owner` scopes the memory to the session; `tags` enable filtering. - let owner = session_id; - let tags = vec!["agent_chat".to_string()]; - - tracing::debug!( - "[archivist] tree ingest start: source_id={source_id} session={session_id} \ - segment={segment_id} ep_span={start_ep}-{end_ep} provenance={provenance}" - ); - - match ingest_pipeline::ingest_chat(config, source_id, owner, tags, batch).await { - Ok(result) => { - tracing::debug!( - "[archivist] tree ingest ok: source_id={source_id} \ - session={session_id} segment={segment_id} \ - chunks_written={} provenance={provenance}", - result.chunks_written - ); - } - Err(e) => { - tracing::warn!( - "[archivist] tree ingest failed (non-fatal): source_id={source_id} \ - session={session_id} segment={segment_id} error={e}" - ); - } - } - } -} - -/// Strip tool-call JSON blocks from an assistant response, leaving only the -/// prose text. -/// -/// The archivist stores the full response (including `tool_calls_json`) in -/// the episodic log for diagnostic purposes. However, per the memory -/// ingestion policy, structured tool-call payloads must not reach the memory -/// tree — only the assistant's natural-language prose is ingested. -/// -/// This function applies a lightweight heuristic: it removes any contiguous -/// spans of text that look like `` XML/JSON blocks or -/// raw JSON objects that begin with `{"tool_calls":`. The output may be empty -/// if the entire response was tool-call markup — callers should handle that -/// case (empty text → no-op ingest). -fn strip_tool_calls_from_response(response: &str) -> String { - // Fast path: if the response contains no obvious tool-call markers, return - // it unchanged to avoid unnecessary allocation. - if !response.contains("") - && !response.contains("{\"tool_calls\"") - && !response.contains("\"tool_use\"") - { - return response.to_string(); - } - - // Remove XML-style tool-call blocks. - let mut cleaned = response.to_string(); - - // Strip spans (may span multiple lines). - while let Some(start) = cleaned.find("") { - if let Some(end) = cleaned[start..].find("") { - cleaned.drain(start..start + end + "".len()); - } else { - // Unclosed tag — remove from the tag to end of string. - cleaned.truncate(start); - break; - } - } - - // Drop JSON / tool-use payload lines the XML strip above cannot catch - // (evidence-vs-interpretation policy: tool-call payloads must never reach - // tree ingest). - cleaned = cleaned - .lines() - .filter(|line| { - let l = line.trim(); - !(l.contains("\"tool_use\"") - || l.starts_with("{\"tool_calls\"") - || l.starts_with("\"tool_calls\"")) - }) - .collect::>() - .join("\n"); - - // Trim and collapse runs of blank lines left by block removal. - let trimmed = cleaned - .lines() - .map(str::trim_end) - .collect::>() - .join("\n"); - - // Collapse more than two consecutive newlines to two. - let mut result = String::with_capacity(trimmed.len()); - let mut blank_run = 0usize; - for line in trimmed.lines() { - if line.is_empty() { - blank_run += 1; - if blank_run <= 2 { - result.push('\n'); - } - } else { - blank_run = 0; - result.push_str(line); - result.push('\n'); - } - } - - result.trim().to_string() -} - -/// Extract simple lessons from tool call outcomes (no LLM needed). -fn extract_lesson_from_tools( - tool_calls: &[crate::openhuman::agent::hooks::ToolCallRecord], -) -> Option { - let failures: Vec<&str> = tool_calls - .iter() - .filter(|tc| !tc.success) - .map(|tc| tc.name.as_str()) - .collect(); - - if failures.is_empty() { - return None; - } - - Some(format!( - "Tools that failed in this turn: {}", - failures.join(", ") - )) -} - -/// Extract a short profile key from event content (first few meaningful words). -fn extract_profile_key(content: &str, prefix: &str) -> String { - let words: Vec<&str> = content - .split_whitespace() - .filter(|w| w.len() > 2) - .take(4) - .collect(); - let key = words.join("_").to_lowercase(); - let key = key - .chars() - .filter(|c| c.is_ascii_alphanumeric() || *c == '_') - .collect::(); - if key.is_empty() { - format!("{prefix}_unknown") - } else { - format!("{prefix}_{key}") - } -} - -/// Generate a simple UUID v4 (random). -fn uuid_v4() -> String { - let nanos = SystemTime::now() - .duration_since(UNIX_EPOCH) - .unwrap_or_default() - .as_nanos(); - format!("{:x}{:08x}", nanos, rand_u32()) -} - -/// Simple random u32 from system entropy. -fn rand_u32() -> u32 { - let state = RandomState::new(); - let mut hasher = state.build_hasher(); - hasher.write_u64( - SystemTime::now() - .duration_since(UNIX_EPOCH) - .unwrap_or_default() - .as_nanos() as u64, - ); - hasher.finish() as u32 -} - -#[cfg(test)] -impl ArchivistHook { - /// Test-only constructor that injects a stub `ChatProvider` and `Embedder` - /// directly, bypassing `with_config`'s provider-build logic. Used by - /// Phase 1 tests to verify LLM recap and embedding paths without hitting - /// a real LLM or Ollama daemon. Exposed as `pub(crate)` so Phase 3 - /// STM recall integration tests can drive the full archivist path. - pub(crate) fn new_with_stubs( - conn: Arc>, - chat_provider: Arc, - embedder: Arc, - ) -> Self { - Self { - conn: Some(conn), - enabled: true, - boundary_config: BoundaryConfig::default(), - config: Some(Config::default()), - chat_provider: Some(chat_provider), - embedder: Some(embedder), - } - } - - /// Test-only constructor that injects stub providers AND a `Config`, so the - /// Phase 2 segment-tree ingest path (gated by - /// `config.learning.chat_to_tree_enabled`) can be exercised hermetically. - /// - /// `config.learning.chat_to_tree_enabled` must be set to `true` by the caller - /// for the tree ingest to fire; the hook does NOT force it on. - pub(crate) fn new_with_stubs_and_config( - conn: Arc>, - chat_provider: Arc, - embedder: Arc, - config: Config, - ) -> Self { - Self { - conn: Some(conn), - enabled: true, - boundary_config: BoundaryConfig::default(), - config: Some(config), - chat_provider: Some(chat_provider), - embedder: Some(embedder), - } - } -} - -#[cfg(test)] -#[path = "archivist_tests.rs"] -mod tests; diff --git a/src/openhuman/agent/harness/archivist/helpers.rs b/src/openhuman/agent/harness/archivist/helpers.rs new file mode 100644 index 0000000000..531b39298b --- /dev/null +++ b/src/openhuman/agent/harness/archivist/helpers.rs @@ -0,0 +1,143 @@ +//! Small utility functions used across archivist sub-modules. + +use std::collections::hash_map::RandomState; +use std::hash::{BuildHasher, Hasher}; +use std::time::{SystemTime, UNIX_EPOCH}; + +/// Strip tool-call JSON blocks from an assistant response, leaving only the +/// prose text. +/// +/// The archivist stores the full response (including `tool_calls_json`) in +/// the episodic log for diagnostic purposes. However, per the memory +/// ingestion policy, structured tool-call payloads must not reach the memory +/// tree — only the assistant's natural-language prose is ingested. +/// +/// This function applies a lightweight heuristic: it removes any contiguous +/// spans of text that look like `` XML/JSON blocks or +/// raw JSON objects that begin with `{"tool_calls":`. The output may be empty +/// if the entire response was tool-call markup — callers should handle that +/// case (empty text → no-op ingest). +pub(super) fn strip_tool_calls_from_response(response: &str) -> String { + // Fast path: if the response contains no obvious tool-call markers, return + // it unchanged to avoid unnecessary allocation. + if !response.contains("") + && !response.contains("{\"tool_calls\"") + && !response.contains("\"tool_use\"") + { + return response.to_string(); + } + + // Remove XML-style tool-call blocks. + let mut cleaned = response.to_string(); + + // Strip spans (may span multiple lines). + while let Some(start) = cleaned.find("") { + if let Some(end) = cleaned[start..].find("") { + cleaned.drain(start..start + end + "".len()); + } else { + // Unclosed tag — remove from the tag to end of string. + cleaned.truncate(start); + break; + } + } + + // Drop JSON / tool-use payload lines the XML strip above cannot catch + // (evidence-vs-interpretation policy: tool-call payloads must never reach + // tree ingest). + cleaned = cleaned + .lines() + .filter(|line| { + let l = line.trim(); + !(l.contains("\"tool_use\"") + || l.starts_with("{\"tool_calls\"") + || l.starts_with("\"tool_calls\"")) + }) + .collect::>() + .join("\n"); + + // Trim and collapse runs of blank lines left by block removal. + let trimmed = cleaned + .lines() + .map(str::trim_end) + .collect::>() + .join("\n"); + + // Collapse more than two consecutive newlines to two. + let mut result = String::with_capacity(trimmed.len()); + let mut blank_run = 0usize; + for line in trimmed.lines() { + if line.is_empty() { + blank_run += 1; + if blank_run <= 2 { + result.push('\n'); + } + } else { + blank_run = 0; + result.push_str(line); + result.push('\n'); + } + } + + result.trim().to_string() +} + +/// Extract simple lessons from tool call outcomes (no LLM needed). +pub(super) fn extract_lesson_from_tools( + tool_calls: &[crate::openhuman::agent::hooks::ToolCallRecord], +) -> Option { + let failures: Vec<&str> = tool_calls + .iter() + .filter(|tc| !tc.success) + .map(|tc| tc.name.as_str()) + .collect(); + + if failures.is_empty() { + return None; + } + + Some(format!( + "Tools that failed in this turn: {}", + failures.join(", ") + )) +} + +/// Extract a short profile key from event content (first few meaningful words). +pub(crate) fn extract_profile_key(content: &str, prefix: &str) -> String { + let words: Vec<&str> = content + .split_whitespace() + .filter(|w| w.len() > 2) + .take(4) + .collect(); + let key = words.join("_").to_lowercase(); + let key = key + .chars() + .filter(|c| c.is_ascii_alphanumeric() || *c == '_') + .collect::(); + if key.is_empty() { + format!("{prefix}_unknown") + } else { + format!("{prefix}_{key}") + } +} + +/// Generate a simple UUID v4 (random). +pub(super) fn uuid_v4() -> String { + let nanos = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_nanos(); + format!("{:x}{:08x}", nanos, rand_u32()) +} + +/// Simple random u32 from system entropy. +fn rand_u32() -> u32 { + let state = RandomState::new(); + let mut hasher = state.build_hasher(); + hasher.write_u64( + SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_nanos() as u64, + ); + hasher.finish() as u32 +} diff --git a/src/openhuman/agent/harness/archivist/hook_impl.rs b/src/openhuman/agent/harness/archivist/hook_impl.rs new file mode 100644 index 0000000000..ab4db9a559 --- /dev/null +++ b/src/openhuman/agent/harness/archivist/hook_impl.rs @@ -0,0 +1,155 @@ +//! `PostTurnHook` implementation for `ArchivistHook`. + +use super::helpers::extract_lesson_from_tools; +use super::types::ArchivistHook; +use crate::openhuman::agent::hooks::{PostTurnHook, TurnContext}; +use crate::openhuman::memory_store::fts5::{self, EpisodicEntry}; +use async_trait::async_trait; + +#[async_trait] +impl PostTurnHook for ArchivistHook { + fn name(&self) -> &str { + "archivist" + } + + async fn on_turn_complete(&self, ctx: &TurnContext) -> anyhow::Result<()> { + if !self.enabled { + return Ok(()); + } + + let Some(conn) = &self.conn else { + return Ok(()); + }; + + let session_id = ctx.session_id.as_deref().unwrap_or("unknown"); + let timestamp = Self::now_timestamp(); + + tracing::debug!( + "[archivist] indexing turn: session={session_id}, tools={}, duration={}ms", + ctx.tool_calls.len(), + ctx.turn_duration_ms + ); + + // Index user message. + fts5::episodic_insert( + conn, + &EpisodicEntry { + id: None, + session_id: session_id.to_string(), + timestamp, + role: "user".to_string(), + content: ctx.user_message.clone(), + lesson: None, + tool_calls_json: None, + cost_microdollars: 0, + }, + )?; + + // Retrieve the inserted episodic ID for segment tracking. + let current_episodic_id = { + let db = conn.lock(); + db.query_row("SELECT last_insert_rowid()", [], |row| row.get::<_, i64>(0)) + .unwrap_or(1) + }; + + // Index assistant response with tool call summary. + let tool_calls_json = if ctx.tool_calls.is_empty() { + None + } else { + Some(serde_json::to_string(&ctx.tool_calls).unwrap_or_default()) + }; + + // Extract a simple lesson from tool failures (lightweight, no LLM needed). + let lesson = extract_lesson_from_tools(&ctx.tool_calls); + + fts5::episodic_insert( + conn, + &EpisodicEntry { + id: None, + session_id: session_id.to_string(), + // Offset by 1ms so assistant entries sort after user entries within + // the same turn. Relies on turn timestamps having >=1ms resolution. + timestamp: timestamp + 0.001, + role: "assistant".to_string(), + content: ctx.assistant_response.clone(), + lesson, + tool_calls_json, + cost_microdollars: 0, + }, + )?; + + tracing::debug!("[archivist] episodic rows written: session={session_id}"); + + // Dual-write into memory_archivist::store (md-backed) so we can + // validate the FTS5 → md migration before flipping the read side. + // Best-effort: a write failure here must not break the turn. The + // user turn's assigned seq is captured into `current_seq` so the + // segment ops can store it alongside the FTS5 episodic id. + let mut current_seq: Option = None; + if let Some(cfg) = self.config.as_ref() { + let ts_ms = (timestamp * 1000.0) as i64; + let user_turn = crate::openhuman::memory_archivist::ArchivedTurn { + session_id: session_id.to_string(), + seq: 0, // assigned by record_turn + timestamp_ms: ts_ms, + role: "user".to_string(), + content: ctx.user_message.clone(), + lesson: None, + tool_calls_json: None, + cost_microdollars: 0, + }; + match crate::openhuman::memory_archivist::store::record_turn(cfg, user_turn) { + Ok(stored) => current_seq = Some(stored.seq), + Err(e) => { + tracing::warn!("[archivist] memory_archivist user dual-write failed: {e}"); + } + } + // Assistant turn carries the tool_calls_json + lesson the FTS5 + // insert just wrote. Re-derive locally so we don't depend on + // FTS5 having returned. + let assistant_lesson = extract_lesson_from_tools(&ctx.tool_calls); + let assistant_tool_calls = if ctx.tool_calls.is_empty() { + None + } else { + Some(serde_json::to_string(&ctx.tool_calls).unwrap_or_default()) + }; + let assistant_turn = crate::openhuman::memory_archivist::ArchivedTurn { + session_id: session_id.to_string(), + seq: 0, + timestamp_ms: ts_ms + 1, + role: "assistant".to_string(), + content: ctx.assistant_response.clone(), + lesson: assistant_lesson, + tool_calls_json: assistant_tool_calls, + cost_microdollars: 0, + }; + if let Err(e) = + crate::openhuman::memory_archivist::store::record_turn(cfg, assistant_turn) + { + tracing::warn!("[archivist] memory_archivist assistant dual-write failed: {e}"); + } + } + + // Manage conversation segmentation (sync boundary detection + SQLite + // operations). Returns the just-closed segment when a boundary fired. + let closed_segment = self.manage_segment_sync( + conn, + session_id, + timestamp, + &ctx.user_message, + current_episodic_id, + current_seq, + ); + + // Run async recap + embed + segment-tree ingest on the closed segment + // (if any). Per-turn tree ingest is intentionally absent — Phase 2 + // moves the tree write to segment granularity inside on_segment_closed. + if let Some(ref segment) = closed_segment { + let now = Self::now_timestamp(); + self.on_segment_closed(conn, segment, session_id, now).await; + } + + tracing::debug!("[archivist] turn indexed successfully: session={session_id}"); + Ok(()) + } +} diff --git a/src/openhuman/agent/harness/archivist/lifecycle.rs b/src/openhuman/agent/harness/archivist/lifecycle.rs new file mode 100644 index 0000000000..79683f1808 --- /dev/null +++ b/src/openhuman/agent/harness/archivist/lifecycle.rs @@ -0,0 +1,461 @@ +//! Constructor methods, segment lifecycle management, and flush logic for +//! `ArchivistHook`. + +use super::helpers::{extract_profile_key, uuid_v4}; +use super::types::ArchivistHook; +use crate::openhuman::config::Config; +use crate::openhuman::memory::chat::ChatProvider; +use crate::openhuman::memory_store::events::{self, EventRecord, EventType}; +use crate::openhuman::memory_store::fts5::EpisodicEntry; +use crate::openhuman::memory_store::profile::{self, FacetType}; +use crate::openhuman::memory_store::segments::{ + self, BoundaryConfig, BoundaryDecision, ConversationSegment, +}; +use crate::openhuman::memory_tree::score::embed::{build_embedder_from_config, Embedder}; +use parking_lot::Mutex; +use rusqlite::Connection; +use std::sync::Arc; +use std::time::{SystemTime, UNIX_EPOCH}; + +impl ArchivistHook { + /// Create an Archivist hook with a shared SQLite connection. + /// + /// LLM recap and embedding are disabled by default; call + /// [`Self::with_config`] on the production path to wire them in. + pub fn new(conn: Arc>, enabled: bool) -> Self { + Self { + conn: Some(conn), + enabled, + boundary_config: BoundaryConfig::default(), + config: None, + chat_provider: None, + embedder: None, + } + } + + /// Attach runtime config so the archivist can gate the tree-ingest path + /// and build its LLM chat provider + embedder from config. + /// + /// When `config.learning.chat_to_tree_enabled` is `true`, each closed + /// segment's raw prose turns are ingested into the memory tree as + /// `source_id="conversations:agent"` (one batch per segment, not per turn). + /// The chat provider is built via `build_chat_provider(config, Summarise)`; + /// the embedder via `build_embedder_from_config(config)`. Both are + /// soft-fallback: if construction fails, the fields stay `None` and the + /// archivist falls back to heuristic summary / no embedding. + pub fn with_config(mut self, config: Config) -> Self { + // Build the LLM chat provider for segment recap. + let chat_provider: Option> = + match crate::openhuman::memory::chat::build_chat_provider(&config) { + Ok(p) => { + tracing::debug!("[archivist] segment recap provider={} registered", p.name()); + Some(p) + } + Err(e) => { + tracing::warn!( + "[archivist] failed to build chat provider for recap (will use fallback): {e}" + ); + None + } + }; + + // Build the embedder for segment recap vectors. + let embedder: Option> = match build_embedder_from_config(&config) { + Ok(e) => { + tracing::debug!("[archivist] segment embed provider={} registered", e.name()); + Some(Arc::from(e)) + } + Err(e) => { + tracing::warn!( + "[archivist] failed to build embedder for segment recap (embedding skipped): {e}" + ); + None + } + }; + + self.chat_provider = chat_provider; + self.embedder = embedder; + self.config = Some(config); + self + } + + /// Create a disabled/no-op Archivist (when FTS5 is not available). + pub fn disabled() -> Self { + Self { + conn: None, + enabled: false, + boundary_config: BoundaryConfig::default(), + config: None, + chat_provider: None, + embedder: None, + } + } + + /// Flush the currently-open segment for `session_id`, if any, by + /// force-closing it and running the same close path (recap + embed + + /// event extraction). This guarantees the trailing segment of a session + /// is always finalized even when no boundary-triggering turn arrives. + /// + /// Called at session end (see `Agent::spawn_session_memory_extraction` + /// in `session/turn.rs`). Safe to call multiple times — segment_close + /// is idempotent (only transitions `open → closed`). + pub async fn flush_open_segment(&self, session_id: &str) { + if !self.enabled { + return; + } + let Some(conn) = &self.conn else { + return; + }; + let now = Self::now_timestamp(); + tracing::debug!("[archivist] flush_open_segment: checking session={session_id}"); + let open_segment = match segments::open_segment_for_session(conn, session_id) { + Ok(seg) => seg, + Err(e) => { + tracing::warn!("[archivist] flush: failed to query open segment: {e}"); + return; + } + }; + let Some(segment) = open_segment else { + tracing::debug!("[archivist] flush: no open segment for session={session_id}"); + return; + }; + tracing::debug!( + "[archivist] flush: force-closing segment={} turn_count={}", + segment.segment_id, + segment.turn_count + ); + if let Err(e) = segments::segment_close(conn, &segment.segment_id, now) { + tracing::warn!("[archivist] flush: failed to close segment: {e}"); + return; + } + self.on_segment_closed(conn, &segment, session_id, now) + .await; + } + + pub(super) fn now_timestamp() -> f64 { + SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_secs_f64() + } + + /// Handle segment lifecycle for a new turn. + /// + /// Returns the closed segment (if any) so the caller can run + /// `on_segment_closed` asynchronously after this function returns. + /// Event extraction and recap run outside this function because they + /// are async and may re-acquire the connection lock. + pub(super) fn manage_segment_sync( + &self, + conn: &Arc>, + session_id: &str, + timestamp: f64, + user_message: &str, + current_episodic_id: i64, + current_seq: Option, + ) -> Option { + let now = Self::now_timestamp(); + + // Check for an open segment for this session. + let open_segment = match segments::open_segment_for_session(conn, session_id) { + Ok(seg) => seg, + Err(e) => { + tracing::warn!("[archivist] failed to query open segment: {e}"); + return None; + } + }; + + match open_segment { + Some(segment) => { + // Run boundary detection. + let decision = segments::detect_boundary( + &self.boundary_config, + &segment, + timestamp, + user_message, + None, // No embedding for now — cosine drift skipped without embedder access. + ); + + match decision { + BoundaryDecision::Continue => { + tracing::debug!( + "[archivist] segment={} continues (turn_count={})", + segment.segment_id, + segment.turn_count + ); + if let Err(e) = segments::segment_append_turn( + conn, + &segment.segment_id, + current_episodic_id, + current_seq, + timestamp, + now, + ) { + tracing::warn!("[archivist] failed to append turn to segment: {e}"); + } + None + } + BoundaryDecision::Boundary(reason) => { + tracing::debug!( + "[archivist] segment boundary detected: {reason} — closing {}", + segment.segment_id + ); + + // Close the current segment. + if let Err(e) = segments::segment_close(conn, &segment.segment_id, now) { + tracing::warn!("[archivist] failed to close segment: {e}"); + return None; + } + + // Create a new segment for the new topic. + // The new segment starts at the current turn's episodic ID. + let new_id = format!("seg-{}", uuid_v4()); + if let Err(e) = segments::segment_create( + conn, + &new_id, + session_id, + "global", + current_episodic_id, + current_seq, + timestamp, + now, + ) { + tracing::warn!("[archivist] failed to create new segment: {e}"); + } + + // Return the closed segment so the caller can run + // on_segment_closed asynchronously. + Some(segment) + } + } + } + None => { + // No open segment — create the first one using the current episodic ID. + let segment_id = format!("seg-{}", uuid_v4()); + tracing::debug!( + "[archivist] creating first segment={segment_id} for session={session_id}" + ); + if let Err(e) = segments::segment_create( + conn, + &segment_id, + session_id, + "global", + current_episodic_id, + current_seq, + timestamp, + now, + ) { + tracing::warn!("[archivist] failed to create initial segment: {e}"); + } + None + } + } + } + + /// Called when a segment is closed. + /// + /// Produces a segment recap (LLM if a chat provider is configured, + /// otherwise the heuristic fallback), embeds the recap, extracts + /// heuristic events, and updates the user profile. + /// + /// Soft-fallback contract (mirrors `LlmSummariser`): this function + /// never returns `Err`; all failures are logged and ignored. + pub(super) async fn on_segment_closed( + &self, + conn: &Arc>, + segment: &ConversationSegment, + session_id: &str, + now: f64, + ) { + // Gather the conversation text for this segment. Prefer the + // md-backed memory_archivist read when config is available; fall + // back to FTS5 in test paths or when config isn't wired. + let entries = self.read_session_entries(conn, session_id); + + // Filter entries that fall within the segment's time window. + // Use <= for end_timestamp (entries at the boundary are part of this + // segment). The boundary-triggering turn has a timestamp AFTER + // end_timestamp, so it won't be included. + let segment_entries: Vec<&EpisodicEntry> = entries + .iter() + .filter(|e| { + e.timestamp >= segment.start_timestamp + && segment + .end_timestamp + .map(|end| e.timestamp <= end) + .unwrap_or(true) + }) + .collect(); + + if segment_entries.is_empty() { + tracing::debug!( + "[archivist] segment={} has no entries — skipping recap", + segment.segment_id + ); + return; + } + + // Build segment text from user messages (for event extraction). + let segment_text: String = segment_entries + .iter() + .filter(|e| e.role == "user") + .map(|e| e.content.as_str()) + .collect::>() + .join(". "); + + // ── Segment recap (LLM or heuristic fallback) ──────────────────── + let (summary, _from_llm) = self + .summarize_entries(&segment_entries, &segment.segment_id, segment.turn_count) + .await; + + // Persist the recap. + if let Err(e) = segments::segment_set_summary(conn, &segment.segment_id, &summary, now) { + tracing::warn!("[archivist] failed to set segment summary: {e}"); + } else { + tracing::debug!( + "[archivist] recap persisted segment={} summary_chars={}", + segment.segment_id, + summary.len() + ); + } + + // ── Finalize-time embedding ─────────────────────────────────────── + // Embed the recap only when the segment is being finalized (closed). + // Never embed per-turn or on an open segment — this is the single + // write point for segment_embeddings rows. + if let Some(ref embedder) = self.embedder { + let model_signature = embedder.name().to_string(); + tracing::debug!( + "[archivist] embedding recap segment={} model={}", + segment.segment_id, + model_signature + ); + match embedder.embed(&summary).await { + Ok(vec) => { + match segments::segment_embedding_upsert( + conn, + &segment.segment_id, + &model_signature, + &vec, + now, + ) { + Ok(()) => { + tracing::debug!( + "[archivist] embedding stored segment={} model={} dim={}", + segment.segment_id, + model_signature, + vec.len() + ); + } + Err(e) => { + tracing::warn!( + "[archivist] failed to persist segment embedding (non-fatal) segment={}: {e}", + segment.segment_id + ); + } + } + } + Err(e) => { + tracing::warn!( + "[archivist] embed call failed (non-fatal) segment={} model={}: {e}", + segment.segment_id, + model_signature + ); + } + } + } else { + tracing::debug!( + "[archivist] no embedder — skipping segment embedding segment={}", + segment.segment_id + ); + } + + // ── Heuristic event extraction ──────────────────────────────────── + if !segment_text.is_empty() { + let extracted = events::extract_events_heuristic(&segment_text); + tracing::debug!( + "[archivist] extracted {} events from segment {}", + extracted.len(), + segment.segment_id + ); + + for (event_type, content) in &extracted { + let event_id = format!("evt-{}", uuid_v4()); + let event = EventRecord { + event_id, + segment_id: segment.segment_id.clone(), + session_id: session_id.to_string(), + namespace: segment.namespace.clone(), + event_type: event_type.clone(), + content: content.clone(), + subject: None, + timestamp_ref: None, + confidence: 0.6, + embedding: None, + source_turn_ids: None, + created_at: now, + }; + if let Err(e) = events::event_insert(conn, &event) { + tracing::warn!("[archivist] failed to insert event: {e}"); + } + + // Update user profile from preference and fact events. + match event_type { + EventType::Preference => { + let key = extract_profile_key(content, "preference"); + let facet_id = format!("prf-{}", uuid_v4()); + if let Err(e) = profile::profile_upsert( + conn, + &facet_id, + &FacetType::Preference, + &key, + content, + 0.6, + Some(&segment.segment_id), + now, + ) { + tracing::warn!("[archivist] failed to upsert profile facet: {e}"); + } + } + EventType::Fact => { + let key = extract_profile_key(content, "fact"); + let facet_id = format!("prf-{}", uuid_v4()); + if let Err(e) = profile::profile_upsert( + conn, + &facet_id, + &FacetType::Context, + &key, + content, + 0.6, + Some(&segment.segment_id), + now, + ) { + tracing::warn!("[archivist] failed to upsert profile fact: {e}"); + } + } + _ => {} + } + } + } + + // ── Phase 2: tree ingest at segment granularity ─────────────────── + // Gate: only when config is attached and chat_to_tree_enabled is true. + // Ingest the segment's raw prose turns (NOT the LLM recap) as one + // ChatBatch into the memory tree under `source_id="conversations:agent"`. + // Evidence-vs-interpretation: the tree must ingest raw prose and build + // its own summaries; feeding the recap would make the tree summarise + // a summary. Non-fatal: failures are logged and swallowed. + if let Some(ref cfg) = self.config { + if cfg.learning.chat_to_tree_enabled { + tracing::debug!( + "[archivist] piping segment into tree as conversations:agent \ + session={session_id} segment={} entries={}", + segment.segment_id, + segment_entries.len() + ); + self.pipe_segment_to_tree(cfg, segment, session_id, &segment_entries) + .await; + } + } + } +} diff --git a/src/openhuman/agent/harness/archivist/mod.rs b/src/openhuman/agent/harness/archivist/mod.rs new file mode 100644 index 0000000000..7f9887c7b2 --- /dev/null +++ b/src/openhuman/agent/harness/archivist/mod.rs @@ -0,0 +1,47 @@ +//! Archivist — background PostTurnHook that extracts lessons, indexes +//! episodic records, and manages conversation segments with event extraction. +//! +//! After each turn, the Archivist: +//! 1. Inserts the turn into the FTS5 episodic table. +//! 2. Manages conversation segments (boundary detection + lifecycle). +//! 3. On segment close: produces an LLM recap (soft-fallback to heuristic), +//! embeds the recap, extracts events, and updates user profile. +//! 4. Extracts simple lessons from tool failures. +//! 5. (Phase 2 / #566) At segment close/flush, ingests the segment's raw prose +//! turns (user + assistant; tool-call JSON stripped) into the memory tree as +//! `source_id = "conversations:agent"` when +//! `config.learning.chat_to_tree_enabled` is true. The leaf is RAW PROSE — +//! the LLM recap is NEVER fed into the tree (evidence-vs-interpretation +//! policy). Each leaf carries episodic provenance stamped in `source_ref`. +//! 6. `flush_open_segment` force-closes the trailing open segment at session +//! end so the last segment always gets a recap + embedding + tree ingest. + +mod helpers; +mod hook_impl; +mod lifecycle; +mod recap; +#[cfg(test)] +mod test_constructors; +mod tree_ingest; +mod types; + +pub use types::ArchivistHook; + +#[cfg(test)] +pub(crate) use crate::openhuman::agent::hooks::PostTurnHook; +#[cfg(test)] +pub(crate) use crate::openhuman::config::Config; +#[cfg(test)] +pub(crate) use crate::openhuman::memory_store::profile; +#[cfg(test)] +pub(crate) use helpers::extract_profile_key; +#[cfg(test)] +pub(crate) use parking_lot::Mutex; +#[cfg(test)] +pub(crate) use rusqlite::Connection; +#[cfg(test)] +pub(crate) use std::sync::Arc; + +#[cfg(test)] +#[path = "../archivist_tests.rs"] +mod tests; diff --git a/src/openhuman/agent/harness/archivist/recap.rs b/src/openhuman/agent/harness/archivist/recap.rs new file mode 100644 index 0000000000..05704e7082 --- /dev/null +++ b/src/openhuman/agent/harness/archivist/recap.rs @@ -0,0 +1,298 @@ +//! Summarization and rolling recap logic for `ArchivistHook`. + +use super::types::ArchivistHook; +use crate::openhuman::memory_store::fts5::{self, EpisodicEntry}; +use crate::openhuman::memory_store::segments; +use crate::openhuman::memory_store::trees::types::TreeKind; +use crate::openhuman::memory_tree::summarise::{summarise, SummaryContext, SummaryInput}; +use parking_lot::Mutex; +use rusqlite::Connection; +use std::sync::Arc; + +impl ArchivistHook { + /// Read every entry recorded for `session_id`, preferring the + /// md-backed `memory_archivist::store` when `self.config` is set and + /// falling back to the legacy FTS5 episodic table otherwise. + /// + /// Returns `EpisodicEntry` so the existing call sites (segment + /// gathering, recap rendering, tree push) keep their shape unchanged + /// during the FTS5 retirement migration. + pub(super) fn read_session_entries( + &self, + conn: &Arc>, + session_id: &str, + ) -> Vec { + if let Some(cfg) = self.config.as_ref() { + match crate::openhuman::memory_archivist::store::session_entries(cfg, session_id) { + Ok(turns) => { + return turns + .into_iter() + .map(|t| EpisodicEntry { + id: None, + session_id: t.session_id, + // ArchivedTurn stores epoch-ms; EpisodicEntry + // takes epoch-seconds as f64. + timestamp: (t.timestamp_ms as f64) / 1000.0, + role: t.role, + content: t.content, + lesson: t.lesson, + tool_calls_json: t.tool_calls_json, + cost_microdollars: t.cost_microdollars, + }) + .collect(); + } + Err(e) => { + tracing::warn!( + "[archivist] memory_archivist read failed (falling back to FTS5): {e}" + ); + } + } + } + fts5::episodic_session_entries(conn, session_id).unwrap_or_default() + } + + /// Shared summarize helper — the **single LLM summarizer** used by both + /// the finalize path (`on_segment_closed`) and the rolling-recap path + /// (`rolling_segment_recap`). + /// + /// Builds a prose corpus from `entries`, calls the `LlmSummariser` when a + /// `chat_provider` is configured, and falls back to the heuristic + /// `segments::fallback_summary` on any failure or when no provider is + /// wired in. Always returns a non-empty string. + /// + /// Invariants: + /// - NEVER mutates DB state (no `segment_set_summary`, no embedding). + /// - NEVER closes a segment. + /// - Safe to call on both open and closed segments. + /// Summarize a set of episodic entries into a recap string. + /// + /// Returns `(text, produced_by_llm)`. `produced_by_llm == false` means the + /// LLM was unavailable / failed / returned empty and `text` is the shallow + /// heuristic `fallback_summary` bookend stub. That stub is an acceptable + /// durable last-resort on the *finalize* path, but callers driving the + /// **live prompt** (rolling recap → compaction) must treat + /// `produced_by_llm == false` as "no real recap" and fall back to their + /// own strategy — the stub must never become live compaction text. + pub(super) async fn summarize_entries( + &self, + entries: &[&EpisodicEntry], + segment_id: &str, + turn_count: i32, + ) -> (String, bool) { + if entries.is_empty() { + tracing::debug!( + "[archivist] summarize_entries: no entries for segment={segment_id} — \ + returning empty fallback" + ); + return (segments::fallback_summary("", "", turn_count), false); + } + + // Build a full prose corpus from ALL entries (user + assistant prose; + // tool-call JSON is already excluded because the archivist stores + // stripped prose in the `content` column). + let corpus_inputs: Vec = entries + .iter() + .filter(|e| !e.content.trim().is_empty()) + .map(|e| { + use crate::openhuman::memory_store::chunks::types::approx_token_count; + let content = e.content.clone(); + let token_count = approx_token_count(&content); + let ts = chrono::DateTime::from_timestamp(e.timestamp as i64, 0) + .unwrap_or_else(chrono::Utc::now); + SummaryInput { + id: format!("{}-{}", e.role, e.timestamp as u64), + content, + token_count, + entities: Vec::new(), + topics: Vec::new(), + time_range_start: ts, + time_range_end: ts, + score: 0.5, + } + }) + .collect(); + + let summary_ctx = SummaryContext { + tree_id: segment_id, + tree_kind: TreeKind::Source, + target_level: 0, + token_budget: 2_000, + }; + + let first = entries.first().map(|e| e.content.as_str()).unwrap_or(""); + let last = entries.last().map(|e| e.content.as_str()).unwrap_or(first); + + if self.chat_provider.is_some() { + if let Some(ref config) = self.config { + tracing::debug!( + "[archivist] summarize_entries: LLM recap segment={segment_id} entries={}", + entries.len() + ); + #[cfg(test)] + let summary_result = if let Some(provider) = self.chat_provider.as_ref() { + crate::openhuman::memory::chat::test_override::with_provider( + Arc::clone(provider), + summarise(config, &corpus_inputs, &summary_ctx), + ) + .await + } else { + summarise(config, &corpus_inputs, &summary_ctx).await + }; + #[cfg(not(test))] + let summary_result = summarise(config, &corpus_inputs, &summary_ctx).await; + + match summary_result { + Ok(output) if !output.content.is_empty() => { + tracing::debug!( + "[archivist] summarize_entries: LLM recap ok segment={segment_id} \ + chars={}", + output.content.len() + ); + return (output.content, true); + } + Ok(_) => { + tracing::debug!( + "[archivist] summarize_entries: LLM returned empty — \ + heuristic fallback segment={segment_id}" + ); + } + Err(e) => { + tracing::warn!( + "[archivist] summarize_entries: LLM recap failed (non-fatal) \ + segment={segment_id}: {e} — heuristic fallback" + ); + } + } + } else { + tracing::debug!( + "[archivist] summarize_entries: no config — \ + heuristic fallback segment={segment_id}" + ); + } + } else { + tracing::debug!( + "[archivist] summarize_entries: no chat provider — \ + heuristic fallback segment={segment_id}" + ); + } + (segments::fallback_summary(first, last, turn_count), false) + } + + /// Produce a rolling recap of the **currently-open** segment for + /// `session_id` WITHOUT closing it, writing `segment_set_summary`, or + /// embedding. + /// + /// This is the Phase 1.5 "one summarizer" entry point. Both + /// `on_segment_closed` (finalize) and this function delegate to the same + /// [`Self::summarize_entries`] helper so the same LLM path is used in both + /// cases. The distinction is purely in what happens *after* the summary + /// string is produced: + /// + /// - **Finalize** (`on_segment_closed`): persists the summary via + /// `segment_set_summary`, embeds it, extracts events, pipes tree ingest. + /// - **Rolling** (this function): returns the summary string and does + /// nothing else — segment stays open, DB is untouched. + /// + /// Returns `None` when: + /// - The archivist is disabled or has no connection. + /// - There is no open segment for `session_id`. + /// - The open segment has no episodic entries. + /// - No real LLM recap was produced (LLM unavailable / failed / empty, so + /// only the heuristic bookend stub is available). The shallow stub is + /// deliberately NOT used as live compaction text. + /// + /// Callers must treat `None` as "recap unavailable" and fall back to + /// their own compaction strategy (e.g. `ProviderSummarizer`). + pub async fn rolling_segment_recap(&self, session_id: &str) -> Option { + if !self.enabled { + tracing::debug!( + "[archivist] rolling_segment_recap: archivist disabled \ + session={session_id} — returning None" + ); + return None; + } + let conn = self.conn.as_ref()?; + + // Find the currently-open segment for this session. + let open_segment = match crate::openhuman::memory_store::segments::open_segment_for_session( + conn, session_id, + ) { + Ok(Some(seg)) => seg, + Ok(None) => { + tracing::debug!( + "[archivist] rolling_segment_recap: no open segment for \ + session={session_id} — returning None" + ); + return None; + } + Err(e) => { + tracing::warn!( + "[archivist] rolling_segment_recap: failed to query open segment \ + session={session_id}: {e} — returning None" + ); + return None; + } + }; + + // Gather the episodic entries for this session so far. + let all_entries = self.read_session_entries(conn, session_id); + + // Keep only entries within the open segment's time window (start → + // now, inclusive). An open segment has `end_timestamp = None`. + let segment_entries: Vec<&EpisodicEntry> = all_entries + .iter() + .filter(|e| e.timestamp >= open_segment.start_timestamp) + .collect(); + + if segment_entries.is_empty() { + tracing::debug!( + "[archivist] rolling_segment_recap: no entries in open segment={} \ + session={session_id} — returning None", + open_segment.segment_id + ); + return None; + } + + tracing::debug!( + "[archivist] rolling_segment_recap: summarizing open segment={} \ + entries={} session={session_id}", + open_segment.segment_id, + segment_entries.len() + ); + + let (recap, from_llm) = self + .summarize_entries( + &segment_entries, + &open_segment.segment_id, + open_segment.turn_count, + ) + .await; + + if !from_llm { + tracing::debug!( + "[archivist] rolling_segment_recap: only heuristic bookend stub \ + available (no real LLM recap) session={session_id} segment={} — \ + returning None so compaction falls back to ProviderSummarizer", + open_segment.segment_id + ); + return None; + } + + if recap.is_empty() { + tracing::debug!( + "[archivist] rolling_segment_recap: summarize_entries returned empty \ + session={session_id} segment={} — returning None", + open_segment.segment_id + ); + return None; + } + + tracing::debug!( + "[archivist] rolling_segment_recap: produced LLM recap chars={} \ + session={session_id} segment={}", + recap.len(), + open_segment.segment_id + ); + Some(recap) + } +} diff --git a/src/openhuman/agent/harness/archivist/test_constructors.rs b/src/openhuman/agent/harness/archivist/test_constructors.rs new file mode 100644 index 0000000000..2d4cfd4e83 --- /dev/null +++ b/src/openhuman/agent/harness/archivist/test_constructors.rs @@ -0,0 +1,56 @@ +//! Test-only constructors for `ArchivistHook` that inject stub providers +//! directly, bypassing `with_config`'s provider-build logic. + +use super::types::ArchivistHook; +use crate::openhuman::config::Config; +use crate::openhuman::memory::chat::ChatProvider; +use crate::openhuman::memory_store::segments::BoundaryConfig; +use crate::openhuman::memory_tree::score::embed::Embedder; +use parking_lot::Mutex; +use rusqlite::Connection; +use std::sync::Arc; + +#[cfg(test)] +impl ArchivistHook { + /// Test-only constructor that injects a stub `ChatProvider` and `Embedder` + /// directly, bypassing `with_config`'s provider-build logic. Used by + /// Phase 1 tests to verify LLM recap and embedding paths without hitting + /// a real LLM or Ollama daemon. Exposed as `pub(crate)` so Phase 3 + /// STM recall integration tests can drive the full archivist path. + pub(crate) fn new_with_stubs( + conn: Arc>, + chat_provider: Arc, + embedder: Arc, + ) -> Self { + Self { + conn: Some(conn), + enabled: true, + boundary_config: BoundaryConfig::default(), + config: Some(Config::default()), + chat_provider: Some(chat_provider), + embedder: Some(embedder), + } + } + + /// Test-only constructor that injects stub providers AND a `Config`, so the + /// Phase 2 segment-tree ingest path (gated by + /// `config.learning.chat_to_tree_enabled`) can be exercised hermetically. + /// + /// `config.learning.chat_to_tree_enabled` must be set to `true` by the caller + /// for the tree ingest to fire; the hook does NOT force it on. + pub(crate) fn new_with_stubs_and_config( + conn: Arc>, + chat_provider: Arc, + embedder: Arc, + config: Config, + ) -> Self { + Self { + conn: Some(conn), + enabled: true, + boundary_config: BoundaryConfig::default(), + config: Some(config), + chat_provider: Some(chat_provider), + embedder: Some(embedder), + } + } +} diff --git a/src/openhuman/agent/harness/archivist/tree_ingest.rs b/src/openhuman/agent/harness/archivist/tree_ingest.rs new file mode 100644 index 0000000000..3992c55e6e --- /dev/null +++ b/src/openhuman/agent/harness/archivist/tree_ingest.rs @@ -0,0 +1,139 @@ +//! Memory tree ingest logic for `ArchivistHook` — pipes closed segment prose +//! into the memory tree as `source_id="conversations:agent"`. + +use super::helpers::strip_tool_calls_from_response; +use super::types::ArchivistHook; +use crate::openhuman::config::Config; +use crate::openhuman::memory::ingest_pipeline; +use crate::openhuman::memory_store::fts5; +use crate::openhuman::memory_sync::canonicalize::chat::{ChatBatch, ChatMessage}; + +impl ArchivistHook { + /// Pipe a closed segment's raw prose turns into the memory tree as + /// `source_id="conversations:agent"`. + /// + /// **Design contract (Phase 2):** + /// - ONE ingest per segment (not per turn) — the batch boundary is the + /// segment, so all turns land as a single ChatBatch. + /// - RAW PROSE only — the LLM recap (summary) is explicitly NOT ingested. + /// The tree must build its own summaries from evidence (raw turns); + /// feeding a summary-of-a-summary violates the evidence-vs-interpretation + /// policy. + /// - `source_id = "conversations:agent"` is a CONSTANT — a single shared + /// tree source for all agent chat sessions (never per-session or per-segment). + /// - Tool-call JSON is stripped from assistant entries so structured + /// payloads do not reach the tree (memory ingestion policy). + /// - Provenance is stamped on each `ChatMessage.source_ref` as + /// `agent://session/{session_id}/segment/{segment_id}#ep{start}-{end}` + /// so tree leaves can be traced back to episodic rows for drill-down and + /// deduplication. + /// + /// Failures are logged and swallowed; the episodic write is the source of + /// truth. + pub(super) async fn pipe_segment_to_tree( + &self, + config: &Config, + segment: &crate::openhuman::memory_store::segments::ConversationSegment, + session_id: &str, + entries: &[&fts5::EpisodicEntry], + ) { + use chrono::{TimeZone, Utc}; + + // Collect the episodic id span for provenance stamping. + // start_episodic_id comes from the segment record (set at creation); + // end_episodic_id is the latest turn id (may be None if only one turn). + let start_ep = segment.start_episodic_id; + let end_ep = segment.end_episodic_id.unwrap_or(start_ep); + let segment_id = &segment.segment_id; + + // The provenance URI embeds session + segment + episodic id span so + // tree leaves can be traced back to episodic_log rows without a + // full-text scan. + let provenance = + format!("agent://session/{session_id}/segment/{segment_id}#ep{start_ep}-{end_ep}"); + + // Build one ChatMessage per episodic entry (user + assistant; skip + // empties). Tool-call JSON is stripped from assistant content so only + // prose flows into the tree. + let messages: Vec = entries + .iter() + .filter_map(|e| { + let raw_text = if e.role == "assistant" { + strip_tool_calls_from_response(&e.content) + } else { + e.content.clone() + }; + // Strip `[IMAGE:]` attachment markers so images never + // enter episodic memory ingestion — otherwise the base64 is + // chunked, embedded (garbage + Voyage size errors), and fed to + // the extract LLM (#3205). `parse_image_markers` returns the + // marker-free prose, already trimmed; the image itself isn't + // useful memory text. An image-only turn collapses to empty and + // is skipped by the guard below. + let (text, _image_refs) = + crate::openhuman::agent::multimodal::parse_image_markers(&raw_text); + if text.is_empty() { + return None; + } + + // Convert the f64 Unix timestamp to DateTime. + let secs = e.timestamp as i64; + let nanos = ((e.timestamp.fract()) * 1e9) as u32; + let ts = Utc + .timestamp_opt(secs, nanos.min(999_999_999)) + .single() + .unwrap_or_else(Utc::now); + + Some(ChatMessage { + author: e.role.clone(), + timestamp: ts, + text, + source_ref: Some(provenance.clone()), + }) + }) + .collect(); + + if messages.is_empty() { + tracing::debug!( + "[archivist] pipe_segment_to_tree: no prose messages in segment={segment_id} — skipping" + ); + return; + } + + let batch = ChatBatch { + platform: "agent".into(), + // channel_label carries session_id for human-readable context. + channel_label: session_id.to_string(), + messages, + }; + + // `source_id` is intentionally a CONSTANT — all agent sessions share + // one tree source so cross-session summarisation sees the full history. + let source_id = "conversations:agent"; + // `owner` scopes the memory to the session; `tags` enable filtering. + let owner = session_id; + let tags = vec!["agent_chat".to_string()]; + + tracing::debug!( + "[archivist] tree ingest start: source_id={source_id} session={session_id} \ + segment={segment_id} ep_span={start_ep}-{end_ep} provenance={provenance}" + ); + + match ingest_pipeline::ingest_chat(config, source_id, owner, tags, batch).await { + Ok(result) => { + tracing::debug!( + "[archivist] tree ingest ok: source_id={source_id} \ + session={session_id} segment={segment_id} \ + chunks_written={} provenance={provenance}", + result.chunks_written + ); + } + Err(e) => { + tracing::warn!( + "[archivist] tree ingest failed (non-fatal): source_id={source_id} \ + session={session_id} segment={segment_id} error={e}" + ); + } + } + } +} diff --git a/src/openhuman/agent/harness/archivist/types.rs b/src/openhuman/agent/harness/archivist/types.rs new file mode 100644 index 0000000000..54fbd1b492 --- /dev/null +++ b/src/openhuman/agent/harness/archivist/types.rs @@ -0,0 +1,35 @@ +//! Core type definition for the Archivist hook. + +use crate::openhuman::config::Config; +use crate::openhuman::memory::chat::ChatProvider; +use crate::openhuman::memory_store::segments::BoundaryConfig; +use crate::openhuman::memory_tree::score::embed::Embedder; +use parking_lot::Mutex; +use rusqlite::Connection; +use std::sync::Arc; + +/// Background Archivist that indexes turns into FTS5 episodic memory +/// and manages conversation segmentation. +/// +/// Produces an LLM recap + embedding for each closed segment and flushes +/// the trailing open segment at session end. +pub struct ArchivistHook { + /// SQLite connection shared with UnifiedMemory. + pub(super) conn: Option>>, + /// Whether the archivist is enabled. + pub(super) enabled: bool, + /// Boundary detection configuration. + pub(super) boundary_config: BoundaryConfig, + /// Optional runtime config — used to gate the tree-ingest path and to + /// build the LLM chat provider + embedder. + /// + /// When `None`, the tree-ingest path is skipped. Set via + /// [`ArchivistHook::with_config`] on the production path. + pub(super) config: Option, + /// Optional LLM provider for segment recap. When `None`, the + /// fallback heuristic summary is used instead. + pub(super) chat_provider: Option>, + /// Optional embedder for segment recap vectors. When `None`, embedding + /// is skipped (segment is still summarised). + pub(super) embedder: Option>, +} diff --git a/src/openhuman/agent/harness/session/builder/builder_tests.rs b/src/openhuman/agent/harness/session/builder/builder_tests.rs new file mode 100644 index 0000000000..71445ed0ef --- /dev/null +++ b/src/openhuman/agent/harness/session/builder/builder_tests.rs @@ -0,0 +1,73 @@ +//! Tests for the builder module — dedup_visible_tool_specs and related logic. + +use super::dedup_visible_tool_specs; +use crate::openhuman::tools::ToolSpec; +use serde_json::json; + +fn spec(name: &str) -> ToolSpec { + ToolSpec { + name: name.to_string(), + description: format!("description for {name}"), + parameters: json!({}), + } +} + +#[test] +fn drops_duplicates_first_wins() { + // Real-world collision: researcher's `delegate_name = "research"` + // synthesises a delegate tool that shadows a same-named skill. + // Anthropic 400s on duplicate tool names; the dedup helper must + // keep the *first* occurrence so registration order semantics + // are preserved (the underlying tool dispatch lookup-by-name + // still resolves the right tool). + let specs = vec![ + spec("research"), // skill + spec("plan"), + spec("research"), // delegate, dropped + spec("run_code"), + spec("plan"), // dropped + ]; + + let deduped = dedup_visible_tool_specs(specs); + + let names: Vec<&str> = deduped.iter().map(|s| s.name.as_str()).collect(); + assert_eq!(names, vec!["research", "plan", "run_code"]); +} + +#[test] +fn passes_through_when_no_duplicates() { + let specs = vec![spec("a"), spec("b"), spec("c")]; + let deduped = dedup_visible_tool_specs(specs); + assert_eq!(deduped.len(), 3); + assert_eq!(deduped[0].name, "a"); + assert_eq!(deduped[1].name, "b"); + assert_eq!(deduped[2].name, "c"); +} + +#[test] +fn handles_empty_input() { + let deduped = dedup_visible_tool_specs(Vec::::new()); + assert!(deduped.is_empty()); +} + +#[test] +fn preserves_full_spec_content_for_kept_entries() { + // Description + parameters must survive the dedup pass intact — + // the LLM uses both for tool-call decisions, and corrupting them + // would silently degrade function-calling quality. + let mut spec_a = spec("alpha"); + spec_a.description = "first alpha — should win".to_string(); + spec_a.parameters = json!({"type": "object", "required": ["x"]}); + + let mut spec_a_dup = spec("alpha"); + spec_a_dup.description = "second alpha — should be dropped".to_string(); + + let deduped = dedup_visible_tool_specs(vec![spec_a.clone(), spec_a_dup]); + + assert_eq!(deduped.len(), 1); + assert_eq!(deduped[0].description, "first alpha — should win"); + assert_eq!( + deduped[0].parameters, + json!({"type": "object", "required": ["x"]}) + ); +} diff --git a/src/openhuman/agent/harness/session/builder.rs b/src/openhuman/agent/harness/session/builder/factory.rs similarity index 61% rename from src/openhuman/agent/harness/session/builder.rs rename to src/openhuman/agent/harness/session/builder/factory.rs index 1ca11359fa..6eb45eed2d 100644 --- a/src/openhuman/agent/harness/session/builder.rs +++ b/src/openhuman/agent/harness/session/builder/factory.rs @@ -1,625 +1,27 @@ -//! `AgentBuilder` fluent API and the `Agent::from_config` factory. -//! -//! Everything in this file is about *constructing* an `Agent` — the -//! builder setters, the `build()` validator, and the `from_config()` -//! factory that wires together the real provider / memory / tool -//! registry from a loaded [`Config`]. Per-turn behaviour lives in -//! [`super::turn`]; accessors and run-helpers live in [`super::runtime`]. +//! `Agent::from_config` factory methods and the internal +//! `build_session_agent_inner` constructor. -use super::types::{Agent, AgentBuilder}; +use super::helpers::prefetch_tool_memory_rules_blocking; use crate::openhuman::agent::dispatcher::{ - NativeToolDispatcher, PFormatToolDispatcher, ToolDispatcher, XmlToolDispatcher, + NativeToolDispatcher, PFormatToolDispatcher, XmlToolDispatcher, }; use crate::openhuman::agent::harness::definition::{ AgentDefinitionRegistry, PromptSource, ToolScope, }; +use crate::openhuman::agent::harness::session::types::Agent; use crate::openhuman::agent::host_runtime; -use crate::openhuman::agent::memory_loader::{DefaultMemoryLoader, MemoryLoader}; -use crate::openhuman::agent_tool_policy::{ToolPolicyEngine, ToolPolicySession}; -use crate::openhuman::config::{Config, ContextConfig}; +use crate::openhuman::agent::memory_loader::DefaultMemoryLoader; +use crate::openhuman::config::Config; use crate::openhuman::context::prompt::SystemPromptBuilder; -use crate::openhuman::context::{ContextManager, ProviderSummarizer, SegmentRecapSummarizer}; use crate::openhuman::inference::provider::{self, Provider}; use crate::openhuman::memory::Memory; use crate::openhuman::memory_store; -use crate::openhuman::memory_tools::{ToolMemoryCaptureHook, ToolMemoryRule, ToolMemoryStore}; +use crate::openhuman::memory_tools::ToolMemoryCaptureHook; use crate::openhuman::security::SecurityPolicy; -use crate::openhuman::tools::{self, Tool, ToolSpec}; +use crate::openhuman::tools::{self, Tool}; use anyhow::Result; use std::sync::Arc; -/// Drop entries with duplicate `name` fields, first occurrence wins. -/// -/// Anthropic (and other strict providers) rejects a chat/completions -/// request that lists two tools with the same name — OpenHuman's own -/// backend and OpenAI silently accept duplicates, which hid the -/// underlying collision (researcher sub-agent's `delegate_name = -/// "research"` shadowing a same-named skill tool) until #1710's -/// per-role routing started sending the same tool list to Anthropic. -/// -/// Called from every place that materialises the visible tool spec -/// list — initial build, post-composio refresh, scope-filter change — -/// so the request the provider sees is always name-unique regardless -/// of which path produced it. -pub(crate) fn dedup_visible_tool_specs(specs: Vec) -> Vec { - let mut seen: std::collections::HashSet = std::collections::HashSet::new(); - let mut deduped: Vec = Vec::with_capacity(specs.len()); - let mut dropped: Vec = Vec::new(); - for spec in specs { - if seen.insert(spec.name.clone()) { - deduped.push(spec); - } else { - dropped.push(spec.name); - } - } - if !dropped.is_empty() { - log::warn!( - "[agent] dropped {} duplicate tool spec(s) before sending to provider: {:?}", - dropped.len(), - dropped - ); - } - deduped -} - -pub(super) fn visible_tool_specs_for_policy( - tool_specs: &[ToolSpec], - visible_names: &std::collections::HashSet, - tool_policy: &ToolPolicySession, -) -> Vec { - tool_specs - .iter() - .filter(|spec| { - (visible_names.is_empty() || visible_names.contains(&spec.name)) - && tool_policy.is_allowed(&spec.name) - }) - .cloned() - .collect() -} - -impl AgentBuilder { - /// Creates a new `AgentBuilder` with default values. - pub fn new() -> Self { - Self { - provider: None, - tools: None, - visible_tool_names: None, - memory: None, - prompt_builder: None, - tool_dispatcher: None, - memory_loader: None, - config: None, - context_config: None, - model_name: None, - temperature: None, - workspace_dir: None, - action_dir: None, - skills: None, - auto_save: None, - post_turn_hooks: Vec::new(), - learning_enabled: false, - explicit_preferences_enabled: true, - event_session_id: None, - event_channel: None, - agent_definition_name: None, - session_parent_prefix: None, - omit_profile: None, - omit_memory_md: None, - payload_summarizer: None, - tool_policy: None, - archivist_hook: None, - unified_compaction_enabled: true, - } - } - - /// Sets the AI provider for the agent. - /// - /// Accepts a `Box` for backward compatibility but stores - /// the provider as an `Arc` internally so sub-agents spawned from this - /// agent (via `spawn_subagent`) can share the same instance. - pub fn provider(mut self, provider: Box) -> Self { - self.provider = Some(Arc::from(provider)); - self - } - - /// Sets the AI provider from an existing `Arc`. Use this when sharing - /// a provider instance across multiple agents. - pub fn provider_arc(mut self, provider: Arc) -> Self { - self.provider = Some(provider); - self - } - - /// Sets the available tools for the agent. - pub fn tools(mut self, tools: Vec>) -> Self { - self.tools = Some(tools); - self - } - - /// Restricts which tools the main agent can see and call directly. - /// Tools not in this set are still available to sub-agents via the - /// runner. Pass `None` (default) to make all tools visible. - pub fn visible_tool_names(mut self, names: std::collections::HashSet) -> Self { - self.visible_tool_names = Some(names); - self - } - - /// Sets the memory system for the agent. - pub fn memory(mut self, memory: Arc) -> Self { - self.memory = Some(memory); - self - } - - /// Sets the system prompt builder for the agent. - pub fn prompt_builder(mut self, prompt_builder: SystemPromptBuilder) -> Self { - self.prompt_builder = Some(prompt_builder); - self - } - - /// Sets the tool dispatcher for the agent. - pub fn tool_dispatcher(mut self, tool_dispatcher: Box) -> Self { - self.tool_dispatcher = Some(tool_dispatcher); - self - } - - /// Sets the memory loader for the agent. - pub fn memory_loader(mut self, memory_loader: Box) -> Self { - self.memory_loader = Some(memory_loader); - self - } - - /// Sets the agent configuration. - pub fn config(mut self, config: crate::openhuman::config::AgentConfig) -> Self { - self.config = Some(config); - self - } - - /// Sets the global context-management configuration. Threaded - /// into the [`ContextManager`] constructed in [`Self::build`]. If - /// not set the manager is constructed with - /// [`ContextConfig::default`]. - pub fn context_config(mut self, context_config: ContextConfig) -> Self { - self.context_config = Some(context_config); - self - } - - /// Sets the model name to use for chat requests. - pub fn model_name(mut self, model_name: String) -> Self { - self.model_name = Some(model_name); - self - } - - /// Sets the temperature for chat requests. - pub fn temperature(mut self, temperature: f64) -> Self { - self.temperature = Some(temperature); - self - } - - /// Sets the workspace directory for the agent. - pub fn workspace_dir(mut self, workspace_dir: std::path::PathBuf) -> Self { - self.workspace_dir = Some(workspace_dir); - self - } - - pub fn action_dir(mut self, action_dir: std::path::PathBuf) -> Self { - self.action_dir = Some(action_dir); - self - } - - /// Sets the skills available to the agent. - pub fn skills(mut self, skills: Vec) -> Self { - self.skills = Some(skills); - self - } - - /// Enables or disables automatic saving of conversation history to memory. - pub fn auto_save(mut self, auto_save: bool) -> Self { - self.auto_save = Some(auto_save); - self - } - - /// Sets the post-turn hooks to be executed after each turn. - pub fn post_turn_hooks( - mut self, - hooks: Vec>, - ) -> Self { - self.post_turn_hooks = hooks; - self - } - - /// Enables or disables learning features. - pub fn learning_enabled(mut self, enabled: bool) -> Self { - self.learning_enabled = enabled; - self - } - - /// Enables or disables explicit-preference injection. - /// - /// When `true` (the default), preferences stored via `remember_preference` - /// are fetched from the `user_profile` namespace and injected into the - /// system prompt on every turn, independent of `learning_enabled`. - pub fn explicit_preferences_enabled(mut self, enabled: bool) -> Self { - self.explicit_preferences_enabled = enabled; - self - } - - /// Sets the event-bus `session_id` and `channel` used to tag - /// `DomainEvent`s emitted by this agent. - /// - /// - `session_id` groups all events for a single user / conversation so - /// downstream subscribers can correlate turns, tool calls, and errors. - /// - `channel` labels the source or stream the events originated from - /// (e.g. `"cli"`, `"telegram"`, `"rpc"`) — useful when multiple front - /// ends share the same subscriber pipeline. - /// - /// Both parameters are converted into owned `String`s and stored in - /// `event_session_id` / `event_channel` respectively. - pub fn event_context( - mut self, - session_id: impl Into, - channel: impl Into, - ) -> Self { - self.event_session_id = Some(session_id.into()); - self.event_channel = Some(channel.into()); - self - } - - /// Sets the agent definition id this session is running - /// (`welcome`, `orchestrator`, `integrations_agent`, …). - /// - /// This value is stamped onto the built [`Agent`] and surfaces in - /// the following places: - /// - /// * **Transcript filename on disk** — `transcript::write_transcript` - /// and `transcript::find_latest_transcript` use it as the - /// `{agent}` prefix in `sessions/DDMMYYYY/{agent}_{index}.md`. - /// Both the write path and the resume-lookup path read the same - /// field on `self`, so a session is always self-consistent; the - /// user-visible signal is which filename the transcript lands - /// under. Leaving it at the legacy `"main"` fallback silently - /// misfiles every non-orchestrator session under `main_*.md`. - /// * **Transcript metadata header** — `transcript::write_transcript` - /// stamps it into the `` - /// block at the top of every `.md` file. This is the ground-truth - /// signal for "which agent definition ran this session" when - /// inspecting transcripts after the fact. - /// * **[`PromptContext::agent_id`]** at prompt-build time (see - /// `turn.rs`). Today only one prompt section reads this field — - /// the `Connected Integrations` branch in `context/prompt.rs` - /// that special-cases `integrations_agent` vs every other agent — so - /// the current user-visible impact of a wrong id is limited to - /// the two bullets above. The stamped `prompt_builder` injected - /// by [`Agent::from_config_for_agent`] is what actually drives - /// prompt flavour per archetype, independent of this field. That - /// said, any future prompt section that branches on a - /// non-`integrations_agent` id (e.g. welcome-specific banner, planner- - /// specific rubric) would silently never fire if the field were - /// left at `"main"`, so keeping it correctly stamped closes a - /// latent foot-gun for code that hasn't been written yet. - /// - /// Callers building via [`Agent::from_config_for_agent`] get this - /// wired automatically inside `build_session_agent_inner`; direct - /// builder users (tests, CLI) must set it explicitly if they care - /// about any of the surfaces above. - pub fn agent_definition_name(mut self, name: impl Into) -> Self { - self.agent_definition_name = Some(name.into()); - self - } - - /// Set the parent session-key chain for a sub-agent. Passing - /// `Some("1713000000_orchestrator")` produces a sub-agent whose - /// transcript filename is prefixed with the parent's session key, - /// yielding a flat hierarchy on disk - /// (`session_raw/DDMMYYYY/{parent}__{child}.jsonl`). Nested - /// delegations chain further prefixes with `__`. Leave `None` - /// (default) for root sessions. - pub fn session_parent_prefix(mut self, prefix: Option) -> Self { - self.session_parent_prefix = prefix; - self - } - - /// Forward the target agent definition's `omit_profile` flag so - /// [`Agent::build_system_prompt`] can decide whether to inject - /// `PROFILE.md`. Only opt-in agents (welcome, orchestrator, the - /// trigger pair) should set this to `false`. - pub fn omit_profile(mut self, omit: bool) -> Self { - self.omit_profile = Some(omit); - self - } - - /// Forward the target agent definition's `omit_memory_md` flag so - /// [`Agent::build_system_prompt`] can decide whether to inject - /// `MEMORY.md`. Same opt-in set as `omit_profile`. - pub fn omit_memory_md(mut self, omit: bool) -> Self { - self.omit_memory_md = Some(omit); - self - } - - /// Wire an oversized-tool-result summarizer into the agent. When - /// set, [`Agent::execute_tool_call`] calls - /// [`crate::openhuman::agent::harness::payload_summarizer::PayloadSummarizer::maybe_summarize`] - /// on every successful tool output and replaces the raw payload - /// with the compressed summary on success. Currently set only for - /// the orchestrator session by - /// [`Agent::build_session_agent_inner`]. - pub fn payload_summarizer( - mut self, - summarizer: Arc< - dyn crate::openhuman::agent::harness::payload_summarizer::PayloadSummarizer, - >, - ) -> Self { - self.payload_summarizer = Some(summarizer); - self - } - - /// Installs pre-execution policy middleware for tool calls. - /// - /// The default policy allows all calls. Custom policies can deny a call - /// before `Tool::execute_with_options` runs. - pub fn tool_policy( - mut self, - policy: Arc, - ) -> Self { - self.tool_policy = Some(policy); - self - } - - /// Attach the production [`ArchivistHook`] instance so the session - /// turn loop can call [`ArchivistHook::flush_open_segment`] at - /// session-wind-down time, guaranteeing the trailing open segment is - /// always finalized with an LLM recap + embedding. - /// - /// Set from `build_session_agent_inner` when - /// `config.learning.episodic_capture_enabled` is `true` and a - /// SQLite connection is available. Callers that construct an `Agent` - /// directly (tests, CLI) can leave this `None` — flush is a no-op - /// when the hook is absent. - pub fn archivist_hook( - mut self, - hook: Option>, - ) -> Self { - self.archivist_hook = hook; - self - } - - /// Phase 1.5 — gate the unified compaction path. - /// - /// When `true` (the default) and an archivist hook is wired in via - /// [`Self::archivist_hook`], the session's `ContextManager` summarizer is - /// wrapped with a [`SegmentRecapSummarizer`] that routes autocompaction - /// through the archivist's rolling recap (one LLM summarizer, soft-fallback - /// to [`ProviderSummarizer`] when the recap is unavailable). - /// - /// When `false` the `ProviderSummarizer` is used directly and Phase 1.5 is - /// completely absent from the hot path — behaviour is identical to today's. - pub fn unified_compaction_enabled(mut self, enabled: bool) -> Self { - self.unified_compaction_enabled = enabled; - self - } - - /// Validates the configuration and constructs a new `Agent` instance. - /// - /// This method is responsible for wiring together the provided components, - /// setting up the context manager, and initializing the conversation history. - /// It ensures that all required fields (provider, tools, memory, etc.) are present. - pub fn build(self) -> Result { - let tools = self - .tools - .ok_or_else(|| anyhow::anyhow!("tools are required"))?; - let tool_specs: Vec = tools.iter().map(|tool| tool.spec()).collect(); - - let visible_names = self.visible_tool_names.unwrap_or_default(); - let config = self.config.clone().unwrap_or_default(); - let event_session_id = self - .event_session_id - .clone() - .unwrap_or_else(|| "standalone".to_string()); - let event_channel = self - .event_channel - .clone() - .unwrap_or_else(|| "internal".to_string()); - let agent_definition_name = self - .agent_definition_name - .clone() - .unwrap_or_else(|| "main".to_string()); - let tool_policy_session = ToolPolicyEngine::build_session( - &agent_definition_name, - &event_channel, - "session", - &config.channel_permissions, - &tools, - &visible_names, - ); - - // Build the filtered spec list that the main agent sends to the - // provider. The explicit visible-tool allowlist and the resolved - // channel permission policy must stay aligned so prompt-visible - // tools cannot exceed the runtime execution boundary. - let visible_tool_specs_unfiltered = - visible_tool_specs_for_policy(&tool_specs, &visible_names, &tool_policy_session); - - // Dedupe by tool name. Anthropic (and other strict providers) - // rejects a chat/completions request that lists two tools with - // the same name — OpenHuman's own backend and OpenAI silently - // accept duplicates, which hid this bug until #1710's per-role - // routing started sending the same tool list to Anthropic. - let visible_tool_specs: Vec = - dedup_visible_tool_specs(visible_tool_specs_unfiltered); - - let visible_names_list: Vec<&str> = - visible_tool_specs.iter().map(|s| s.name.as_str()).collect(); - log::info!( - "[agent] tool spec filter: total={} visible={} (filter_active={} policy_restricted={}) names=[{}]", - tool_specs.len(), - visible_tool_specs.len(), - !visible_names.is_empty(), - tool_policy_session.has_restrictions(), - visible_names_list.join(", ") - ); - - // Pull the provider out of the builder once. We store it on - // the Agent (for normal turn chat calls) and also clone the - // Arc into the ProviderSummarizer so the context manager can - // dispatch autocompaction through the same provider. - let provider = self - .provider - .ok_or_else(|| anyhow::anyhow!("provider is required"))?; - - let prompt_builder = self - .prompt_builder - .unwrap_or_else(SystemPromptBuilder::with_defaults); - - let model_name = self - .model_name - .unwrap_or_else(|| crate::openhuman::config::DEFAULT_MODEL.into()); - - // Assemble the per-session ContextManager. The manager owns - // the prompt builder, the reduction pipeline, and the - // summarizer — every concern that touches "what's in the - // model's context window" routes through this single handle. - let context_config = self.context_config.unwrap_or_default(); - - // Phase 1.5 — unified compaction. - // - // When `unified_compaction_enabled` is true AND an archivist hook - // is wired in, wrap the inner `ProviderSummarizer` with a - // `SegmentRecapSummarizer`. The outer type: - // 1. Tries the rolling segment recap from the open segment. - // 2. Falls back to the inner `ProviderSummarizer` if unavailable. - // - // With the flag off OR no archivist, the plain `ProviderSummarizer` - // is used and Phase 1.5 is completely absent from the hot path - // — behaviour is identical to Phase 1. - let inner_summarizer: Arc = - Arc::new(ProviderSummarizer::new(provider.clone())); - let session_id_for_recap = self - .event_session_id - .clone() - .unwrap_or_else(|| "standalone".to_string()); - let summarizer: Arc = - if self.unified_compaction_enabled { - if let Some(ref archivist) = self.archivist_hook { - log::debug!( - "[agent::builder] unified_compaction_enabled=true — \ - wrapping summarizer with SegmentRecapSummarizer \ - session_id={session_id_for_recap}" - ); - Arc::new(SegmentRecapSummarizer::new( - Arc::clone(archivist), - session_id_for_recap, - inner_summarizer, - )) - } else { - log::debug!( - "[agent::builder] unified_compaction_enabled=true but \ - no archivist hook — using ProviderSummarizer" - ); - inner_summarizer - } - } else { - log::debug!( - "[agent::builder] unified_compaction_enabled=false — \ - using ProviderSummarizer (Phase 1.5 disabled)" - ); - inner_summarizer - }; - - let context = ContextManager::new( - &context_config, - summarizer, - model_name.clone(), - prompt_builder, - ); - - let workspace_dir = self - .workspace_dir - .unwrap_or_else(|| std::path::PathBuf::from(".")); - let action_dir = self.action_dir.unwrap_or_else(|| workspace_dir.clone()); - - Ok(Agent { - provider, - tools: Arc::new(tools), - tool_specs: Arc::new(tool_specs), - visible_tool_specs: Arc::new(visible_tool_specs), - visible_tool_names: visible_names, - tool_policy_session, - memory: self - .memory - .ok_or_else(|| anyhow::anyhow!("memory is required"))?, - tool_dispatcher: std::sync::Arc::from( - self.tool_dispatcher - .ok_or_else(|| anyhow::anyhow!("tool_dispatcher is required"))?, - ), - memory_loader: self - .memory_loader - .unwrap_or_else(|| Box::new(DefaultMemoryLoader::default())), - config, - model_name, - temperature: self.temperature.unwrap_or(0.7), - workspace_dir, - action_dir, - skills: self.skills.unwrap_or_default(), - auto_save: self.auto_save.unwrap_or(false), - last_memory_context: None, - last_turn_citations: Vec::new(), - history: Vec::new(), - post_turn_hooks: self.post_turn_hooks, - learning_enabled: self.learning_enabled, - explicit_preferences_enabled: self.explicit_preferences_enabled, - event_session_id, - event_channel, - agent_definition_name: agent_definition_name.clone(), - // Canonical registry id — captured here at build time - // before any caller can call `set_agent_definition_name` - // and clobber the transcript-facing name. Used by - // `refresh_delegation_tools` to re-resolve the agent's - // `subagents` declaration against the global registry. - agent_definition_id: agent_definition_name.clone(), - session_transcript_path: None, - session_key: { - let unix_ts = std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .map(|d| d.as_secs()) - .unwrap_or(0); - let sanitized: String = agent_definition_name - .chars() - .map(|c| { - if c.is_ascii_alphanumeric() || c == '_' || c == '-' { - c - } else { - '_' - } - }) - .collect(); - format!("{unix_ts}_{sanitized}") - }, - session_parent_prefix: self.session_parent_prefix, - cached_transcript_messages: None, - context, - on_progress: None, - run_queue: None, - connected_integrations: Vec::new(), - connected_integrations_initialized: false, - integration_runtime_config: None, - // Default to `true` (omit) so legacy / custom agents built - // without a definition stay lean. Opt-in agents thread their - // `omit_profile = false` through the builder. - omit_profile: self.omit_profile.unwrap_or(true), - omit_memory_md: self.omit_memory_md.unwrap_or(true), - payload_summarizer: self.payload_summarizer, - tool_policy: self.tool_policy.unwrap_or_else(|| { - Arc::new(crate::openhuman::agent::tool_policy::AllowAllToolPolicy) - }), - last_seen_integrations_hash: 0, - composio_integrations_rx: None, - announced_integrations: std::collections::HashSet::new(), - pending_integration_announcement: Vec::new(), - archivist_hook: self.archivist_hook, - synthesized_tool_names: std::collections::HashSet::new(), - pending_synthesized_tools_mask: std::collections::HashSet::new(), - }) - } -} - impl Agent { /// Constructs an `Agent` instance from a global system configuration. /// @@ -1076,7 +478,10 @@ impl Agent { let prompt_root = config.workspace_dir.join("agent").join("prompts"); let workspace_path = prompt_root.join(path); let body_text = if workspace_path.is_file() { - match crate::openhuman::security::validate_path_within_root(&workspace_path, &prompt_root) { + match crate::openhuman::security::validate_path_within_root( + &workspace_path, + &prompt_root, + ) { Ok(resolved) => { std::fs::read_to_string(&resolved).unwrap_or_else(|e| { log::warn!( @@ -1469,15 +874,16 @@ impl Agent { // entry. The registry is self-contained — it doesn't hold a // reference back into the tools Vec. let pformat_registry = crate::openhuman::agent::pformat::build_registry(&tools); - let tool_dispatcher: Box = match dispatcher_choice.as_str() { - "native" => Box::new(NativeToolDispatcher), - "xml" => Box::new(XmlToolDispatcher), - "pformat" => Box::new(PFormatToolDispatcher::new(pformat_registry.clone())), - _ if supports_native => Box::new(NativeToolDispatcher), - // Default for text-only providers: P-Format. Flip the - // `agent.tool_dispatcher` config to `"xml"` to revert. - _ => Box::new(PFormatToolDispatcher::new(pformat_registry.clone())), - }; + let tool_dispatcher: Box = + match dispatcher_choice.as_str() { + "native" => Box::new(NativeToolDispatcher), + "xml" => Box::new(XmlToolDispatcher), + "pformat" => Box::new(PFormatToolDispatcher::new(pformat_registry.clone())), + _ if supports_native => Box::new(NativeToolDispatcher), + // Default for text-only providers: P-Format. Flip the + // `agent.tool_dispatcher` config to `"xml"` to revert. + _ => Box::new(PFormatToolDispatcher::new(pformat_registry.clone())), + }; // Provider-side grammar decoders (e.g. Fireworks) compile every // tool JSON schema into a grammar and index its rules with a @@ -1498,7 +904,7 @@ impl Agent { // mattering. Downside: slightly looser tool-call formatting // than native; the existing `parse_tool_calls` recovers from // stray formatting and the loop retries on malformed output. - let tool_dispatcher: Box = + let tool_dispatcher: Box = if agent_id == "integrations_agent" && tool_dispatcher.should_send_tool_specs() { log::info!( "[agent::builder] integrations_agent: overriding native tool dispatcher with \ @@ -1662,128 +1068,3 @@ impl Agent { Ok(agent) } } - -/// (#1400) Best-effort synchronous prefetch of eager tool-scoped rules. -/// -/// `from_config_*` is sync but typically runs inside a multi-threaded -/// Tokio runtime (the agent harness path from the channels runtime). -/// We use `block_in_place` + the current runtime handle to call the -/// async store API without restructuring the whole session builder. -/// -/// Returns an empty `Vec` (rather than erroring) when: -/// - no Tokio runtime is active (e.g. a sync CLI bootstrap), -/// - the runtime is single-threaded (`block_in_place` would panic), -/// - or the underlying `rules_for_prompt` call returns an error -/// (e.g. the memory backend isn't ready yet). -/// -/// Critical / High rules captured later in the session are still -/// available via the `memory_tool_rules_for_prompt` RPC; this prefetch -/// merely seeds the rules that exist at session start. -fn prefetch_tool_memory_rules_blocking( - memory: Arc, - tool_names: &[String], -) -> Vec { - let Ok(handle) = tokio::runtime::Handle::try_current() else { - return Vec::new(); - }; - if handle.runtime_flavor() != tokio::runtime::RuntimeFlavor::MultiThread { - return Vec::new(); - } - let tool_names = tool_names.to_vec(); - tokio::task::block_in_place(|| { - handle.block_on(async move { - let store = ToolMemoryStore::new(memory); - match store.rules_for_prompt(&tool_names).await { - Ok(grouped) => { - let mut flat: Vec<_> = grouped.into_values().flatten().collect(); - flat.sort_by(|a, b| { - b.priority - .cmp(&a.priority) - .then_with(|| a.tool_name.cmp(&b.tool_name)) - .then_with(|| a.rule.cmp(&b.rule)) - }); - flat - } - Err(err) => { - log::warn!("[memory::tool_memory] prefetch failed: {err}"); - Vec::new() - } - } - }) - }) -} - -#[cfg(test)] -mod dedup_tests { - use super::dedup_visible_tool_specs; - use crate::openhuman::tools::ToolSpec; - use serde_json::json; - - fn spec(name: &str) -> ToolSpec { - ToolSpec { - name: name.to_string(), - description: format!("description for {name}"), - parameters: json!({}), - } - } - - #[test] - fn drops_duplicates_first_wins() { - // Real-world collision: researcher's `delegate_name = "research"` - // synthesises a delegate tool that shadows a same-named skill. - // Anthropic 400s on duplicate tool names; the dedup helper must - // keep the *first* occurrence so registration order semantics - // are preserved (the underlying tool dispatch lookup-by-name - // still resolves the right tool). - let specs = vec![ - spec("research"), // skill - spec("plan"), - spec("research"), // delegate, dropped - spec("run_code"), - spec("plan"), // dropped - ]; - - let deduped = dedup_visible_tool_specs(specs); - - let names: Vec<&str> = deduped.iter().map(|s| s.name.as_str()).collect(); - assert_eq!(names, vec!["research", "plan", "run_code"]); - } - - #[test] - fn passes_through_when_no_duplicates() { - let specs = vec![spec("a"), spec("b"), spec("c")]; - let deduped = dedup_visible_tool_specs(specs); - assert_eq!(deduped.len(), 3); - assert_eq!(deduped[0].name, "a"); - assert_eq!(deduped[1].name, "b"); - assert_eq!(deduped[2].name, "c"); - } - - #[test] - fn handles_empty_input() { - let deduped = dedup_visible_tool_specs(Vec::::new()); - assert!(deduped.is_empty()); - } - - #[test] - fn preserves_full_spec_content_for_kept_entries() { - // Description + parameters must survive the dedup pass intact — - // the LLM uses both for tool-call decisions, and corrupting them - // would silently degrade function-calling quality. - let mut spec_a = spec("alpha"); - spec_a.description = "first alpha — should win".to_string(); - spec_a.parameters = json!({"type": "object", "required": ["x"]}); - - let mut spec_a_dup = spec("alpha"); - spec_a_dup.description = "second alpha — should be dropped".to_string(); - - let deduped = dedup_visible_tool_specs(vec![spec_a.clone(), spec_a_dup]); - - assert_eq!(deduped.len(), 1); - assert_eq!(deduped[0].description, "first alpha — should win"); - assert_eq!( - deduped[0].parameters, - json!({"type": "object", "required": ["x"]}) - ); - } -} diff --git a/src/openhuman/agent/harness/session/builder/helpers.rs b/src/openhuman/agent/harness/session/builder/helpers.rs new file mode 100644 index 0000000000..6de044d68f --- /dev/null +++ b/src/openhuman/agent/harness/session/builder/helpers.rs @@ -0,0 +1,55 @@ +//! Utility helpers used during agent construction. + +use crate::openhuman::memory::Memory; +use crate::openhuman::memory_tools::{ToolMemoryRule, ToolMemoryStore}; +use std::sync::Arc; + +/// (#1400) Best-effort synchronous prefetch of eager tool-scoped rules. +/// +/// `from_config_*` is sync but typically runs inside a multi-threaded +/// Tokio runtime (the agent harness path from the channels runtime). +/// We use `block_in_place` + the current runtime handle to call the +/// async store API without restructuring the whole session builder. +/// +/// Returns an empty `Vec` (rather than erroring) when: +/// - no Tokio runtime is active (e.g. a sync CLI bootstrap), +/// - the runtime is single-threaded (`block_in_place` would panic), +/// - or the underlying `rules_for_prompt` call returns an error +/// (e.g. the memory backend isn't ready yet). +/// +/// Critical / High rules captured later in the session are still +/// available via the `memory_tool_rules_for_prompt` RPC; this prefetch +/// merely seeds the rules that exist at session start. +pub(super) fn prefetch_tool_memory_rules_blocking( + memory: Arc, + tool_names: &[String], +) -> Vec { + let Ok(handle) = tokio::runtime::Handle::try_current() else { + return Vec::new(); + }; + if handle.runtime_flavor() != tokio::runtime::RuntimeFlavor::MultiThread { + return Vec::new(); + } + let tool_names = tool_names.to_vec(); + tokio::task::block_in_place(|| { + handle.block_on(async move { + let store = ToolMemoryStore::new(memory); + match store.rules_for_prompt(&tool_names).await { + Ok(grouped) => { + let mut flat: Vec<_> = grouped.into_values().flatten().collect(); + flat.sort_by(|a, b| { + b.priority + .cmp(&a.priority) + .then_with(|| a.tool_name.cmp(&b.tool_name)) + .then_with(|| a.rule.cmp(&b.rule)) + }); + flat + } + Err(err) => { + log::warn!("[memory::tool_memory] prefetch failed: {err}"); + Vec::new() + } + } + }) + }) +} diff --git a/src/openhuman/agent/harness/session/builder/mod.rs b/src/openhuman/agent/harness/session/builder/mod.rs new file mode 100644 index 0000000000..c7b29ea214 --- /dev/null +++ b/src/openhuman/agent/harness/session/builder/mod.rs @@ -0,0 +1,66 @@ +//! `AgentBuilder` fluent API and the `Agent::from_config` factory. +//! +//! Everything in this module is about *constructing* an `Agent` — the +//! builder setters, the `build()` validator, and the `from_config()` +//! factory that wires together the real provider / memory / tool +//! registry from a loaded [`Config`]. Per-turn behaviour lives in +//! [`super::turn`]; accessors and run-helpers live in [`super::runtime`]. + +mod factory; +mod helpers; +mod setters; + +#[cfg(test)] +mod builder_tests; + +use crate::openhuman::agent_tool_policy::ToolPolicySession; +use crate::openhuman::tools::ToolSpec; + +/// Drop entries with duplicate `name` fields, first occurrence wins. +/// +/// Anthropic (and other strict providers) rejects a chat/completions +/// request that lists two tools with the same name — OpenHuman's own +/// backend and OpenAI silently accept duplicates, which hid the +/// underlying collision (researcher sub-agent's `delegate_name = +/// "research"` shadowing a same-named skill tool) until #1710's +/// per-role routing started sending the same tool list to Anthropic. +/// +/// Called from every place that materialises the visible tool spec +/// list — initial build, post-composio refresh, scope-filter change — +/// so the request the provider sees is always name-unique regardless +/// of which path produced it. +pub(crate) fn dedup_visible_tool_specs(specs: Vec) -> Vec { + let mut seen: std::collections::HashSet = std::collections::HashSet::new(); + let mut deduped: Vec = Vec::with_capacity(specs.len()); + let mut dropped: Vec = Vec::new(); + for spec in specs { + if seen.insert(spec.name.clone()) { + deduped.push(spec); + } else { + dropped.push(spec.name); + } + } + if !dropped.is_empty() { + log::warn!( + "[agent] dropped {} duplicate tool spec(s) before sending to provider: {:?}", + dropped.len(), + dropped + ); + } + deduped +} + +pub(super) fn visible_tool_specs_for_policy( + tool_specs: &[ToolSpec], + visible_names: &std::collections::HashSet, + tool_policy: &ToolPolicySession, +) -> Vec { + tool_specs + .iter() + .filter(|spec| { + (visible_names.is_empty() || visible_names.contains(&spec.name)) + && tool_policy.is_allowed(&spec.name) + }) + .cloned() + .collect() +} diff --git a/src/openhuman/agent/harness/session/builder/setters.rs b/src/openhuman/agent/harness/session/builder/setters.rs new file mode 100644 index 0000000000..372df86e9c --- /dev/null +++ b/src/openhuman/agent/harness/session/builder/setters.rs @@ -0,0 +1,573 @@ +//! `AgentBuilder` fluent setters and the `build()` validator. +//! +//! All setter methods return `Self` for chaining. `build()` validates that +//! required fields are present and assembles the final [`Agent`]. + +use super::{dedup_visible_tool_specs, visible_tool_specs_for_policy}; +use crate::openhuman::agent::harness::session::types::{Agent, AgentBuilder}; +use crate::openhuman::agent::memory_loader::DefaultMemoryLoader; +use crate::openhuman::agent_tool_policy::ToolPolicyEngine; +use crate::openhuman::config::ContextConfig; +use crate::openhuman::context::{ContextManager, ProviderSummarizer, SegmentRecapSummarizer}; +use crate::openhuman::memory::Memory; +use crate::openhuman::tools::{Tool, ToolSpec}; +use anyhow::Result; +use std::sync::Arc; + +impl AgentBuilder { + /// Creates a new `AgentBuilder` with default values. + pub fn new() -> Self { + Self { + provider: None, + tools: None, + visible_tool_names: None, + memory: None, + prompt_builder: None, + tool_dispatcher: None, + memory_loader: None, + config: None, + context_config: None, + model_name: None, + temperature: None, + workspace_dir: None, + action_dir: None, + skills: None, + auto_save: None, + post_turn_hooks: Vec::new(), + learning_enabled: false, + explicit_preferences_enabled: true, + event_session_id: None, + event_channel: None, + agent_definition_name: None, + session_parent_prefix: None, + omit_profile: None, + omit_memory_md: None, + payload_summarizer: None, + tool_policy: None, + archivist_hook: None, + unified_compaction_enabled: true, + } + } + + /// Sets the AI provider for the agent. + /// + /// Accepts a `Box` for backward compatibility but stores + /// the provider as an `Arc` internally so sub-agents spawned from this + /// agent (via `spawn_subagent`) can share the same instance. + pub fn provider( + mut self, + provider: Box, + ) -> Self { + self.provider = Some(Arc::from(provider)); + self + } + + /// Sets the AI provider from an existing `Arc`. Use this when sharing + /// a provider instance across multiple agents. + pub fn provider_arc( + mut self, + provider: Arc, + ) -> Self { + self.provider = Some(provider); + self + } + + /// Sets the available tools for the agent. + pub fn tools(mut self, tools: Vec>) -> Self { + self.tools = Some(tools); + self + } + + /// Restricts which tools the main agent can see and call directly. + /// Tools not in this set are still available to sub-agents via the + /// runner. Pass `None` (default) to make all tools visible. + pub fn visible_tool_names(mut self, names: std::collections::HashSet) -> Self { + self.visible_tool_names = Some(names); + self + } + + /// Sets the memory system for the agent. + pub fn memory(mut self, memory: Arc) -> Self { + self.memory = Some(memory); + self + } + + /// Sets the system prompt builder for the agent. + pub fn prompt_builder( + mut self, + prompt_builder: crate::openhuman::context::prompt::SystemPromptBuilder, + ) -> Self { + self.prompt_builder = Some(prompt_builder); + self + } + + /// Sets the tool dispatcher for the agent. + pub fn tool_dispatcher( + mut self, + tool_dispatcher: Box, + ) -> Self { + self.tool_dispatcher = Some(tool_dispatcher); + self + } + + /// Sets the memory loader for the agent. + pub fn memory_loader( + mut self, + memory_loader: Box, + ) -> Self { + self.memory_loader = Some(memory_loader); + self + } + + /// Sets the agent configuration. + pub fn config(mut self, config: crate::openhuman::config::AgentConfig) -> Self { + self.config = Some(config); + self + } + + /// Sets the global context-management configuration. Threaded + /// into the [`ContextManager`] constructed in [`Self::build`]. If + /// not set the manager is constructed with + /// [`ContextConfig::default`]. + pub fn context_config(mut self, context_config: ContextConfig) -> Self { + self.context_config = Some(context_config); + self + } + + /// Sets the model name to use for chat requests. + pub fn model_name(mut self, model_name: String) -> Self { + self.model_name = Some(model_name); + self + } + + /// Sets the temperature for chat requests. + pub fn temperature(mut self, temperature: f64) -> Self { + self.temperature = Some(temperature); + self + } + + /// Sets the workspace directory for the agent. + pub fn workspace_dir(mut self, workspace_dir: std::path::PathBuf) -> Self { + self.workspace_dir = Some(workspace_dir); + self + } + + pub fn action_dir(mut self, action_dir: std::path::PathBuf) -> Self { + self.action_dir = Some(action_dir); + self + } + + /// Sets the skills available to the agent. + pub fn skills(mut self, skills: Vec) -> Self { + self.skills = Some(skills); + self + } + + /// Enables or disables automatic saving of conversation history to memory. + pub fn auto_save(mut self, auto_save: bool) -> Self { + self.auto_save = Some(auto_save); + self + } + + /// Sets the post-turn hooks to be executed after each turn. + pub fn post_turn_hooks( + mut self, + hooks: Vec>, + ) -> Self { + self.post_turn_hooks = hooks; + self + } + + /// Enables or disables learning features. + pub fn learning_enabled(mut self, enabled: bool) -> Self { + self.learning_enabled = enabled; + self + } + + /// Enables or disables explicit-preference injection. + /// + /// When `true` (the default), preferences stored via `remember_preference` + /// are fetched from the `user_profile` namespace and injected into the + /// system prompt on every turn, independent of `learning_enabled`. + pub fn explicit_preferences_enabled(mut self, enabled: bool) -> Self { + self.explicit_preferences_enabled = enabled; + self + } + + /// Sets the event-bus `session_id` and `channel` used to tag + /// `DomainEvent`s emitted by this agent. + /// + /// - `session_id` groups all events for a single user / conversation so + /// downstream subscribers can correlate turns, tool calls, and errors. + /// - `channel` labels the source or stream the events originated from + /// (e.g. `"cli"`, `"telegram"`, `"rpc"`) — useful when multiple front + /// ends share the same subscriber pipeline. + /// + /// Both parameters are converted into owned `String`s and stored in + /// `event_session_id` / `event_channel` respectively. + pub fn event_context( + mut self, + session_id: impl Into, + channel: impl Into, + ) -> Self { + self.event_session_id = Some(session_id.into()); + self.event_channel = Some(channel.into()); + self + } + + /// Sets the agent definition id this session is running + /// (`welcome`, `orchestrator`, `integrations_agent`, …). + /// + /// This value is stamped onto the built [`Agent`] and surfaces in + /// the following places: + /// + /// * **Transcript filename on disk** — `transcript::write_transcript` + /// and `transcript::find_latest_transcript` use it as the + /// `{agent}` prefix in `sessions/DDMMYYYY/{agent}_{index}.md`. + /// Both the write path and the resume-lookup path read the same + /// field on `self`, so a session is always self-consistent; the + /// user-visible signal is which filename the transcript lands + /// under. Leaving it at the legacy `"main"` fallback silently + /// misfiles every non-orchestrator session under `main_*.md`. + /// * **Transcript metadata header** — `transcript::write_transcript` + /// stamps it into the `` + /// block at the top of every `.md` file. This is the ground-truth + /// signal for "which agent definition ran this session" when + /// inspecting transcripts after the fact. + /// * **[`PromptContext::agent_id`]** at prompt-build time (see + /// `turn.rs`). Today only one prompt section reads this field — + /// the `Connected Integrations` branch in `context/prompt.rs` + /// that special-cases `integrations_agent` vs every other agent — so + /// the current user-visible impact of a wrong id is limited to + /// the two bullets above. The stamped `prompt_builder` injected + /// by [`Agent::from_config_for_agent`] is what actually drives + /// prompt flavour per archetype, independent of this field. That + /// said, any future prompt section that branches on a + /// non-`integrations_agent` id (e.g. welcome-specific banner, planner- + /// specific rubric) would silently never fire if the field were + /// left at `"main"`, so keeping it correctly stamped closes a + /// latent foot-gun for code that hasn't been written yet. + /// + /// Callers building via [`Agent::from_config_for_agent`] get this + /// wired automatically inside `build_session_agent_inner`; direct + /// builder users (tests, CLI) must set it explicitly if they care + /// about any of the surfaces above. + pub fn agent_definition_name(mut self, name: impl Into) -> Self { + self.agent_definition_name = Some(name.into()); + self + } + + /// Set the parent session-key chain for a sub-agent. Passing + /// `Some("1713000000_orchestrator")` produces a sub-agent whose + /// transcript filename is prefixed with the parent's session key, + /// yielding a flat hierarchy on disk + /// (`session_raw/DDMMYYYY/{parent}__{child}.jsonl`). Nested + /// delegations chain further prefixes with `__`. Leave `None` + /// (default) for root sessions. + pub fn session_parent_prefix(mut self, prefix: Option) -> Self { + self.session_parent_prefix = prefix; + self + } + + /// Forward the target agent definition's `omit_profile` flag so + /// [`Agent::build_system_prompt`] can decide whether to inject + /// `PROFILE.md`. Only opt-in agents (welcome, orchestrator, the + /// trigger pair) should set this to `false`. + pub fn omit_profile(mut self, omit: bool) -> Self { + self.omit_profile = Some(omit); + self + } + + /// Forward the target agent definition's `omit_memory_md` flag so + /// [`Agent::build_system_prompt`] can decide whether to inject + /// `MEMORY.md`. Same opt-in set as `omit_profile`. + pub fn omit_memory_md(mut self, omit: bool) -> Self { + self.omit_memory_md = Some(omit); + self + } + + /// Wire an oversized-tool-result summarizer into the agent. When + /// set, [`Agent::execute_tool_call`] calls + /// [`crate::openhuman::agent::harness::payload_summarizer::PayloadSummarizer::maybe_summarize`] + /// on every successful tool output and replaces the raw payload + /// with the compressed summary on success. Currently set only for + /// the orchestrator session by + /// [`Agent::build_session_agent_inner`]. + pub fn payload_summarizer( + mut self, + summarizer: Arc< + dyn crate::openhuman::agent::harness::payload_summarizer::PayloadSummarizer, + >, + ) -> Self { + self.payload_summarizer = Some(summarizer); + self + } + + /// Installs pre-execution policy middleware for tool calls. + /// + /// The default policy allows all calls. Custom policies can deny a call + /// before `Tool::execute_with_options` runs. + pub fn tool_policy( + mut self, + policy: Arc, + ) -> Self { + self.tool_policy = Some(policy); + self + } + + /// Attach the production [`ArchivistHook`] instance so the session + /// turn loop can call [`ArchivistHook::flush_open_segment`] at + /// session-wind-down time, guaranteeing the trailing open segment is + /// always finalized with an LLM recap + embedding. + /// + /// Set from `build_session_agent_inner` when + /// `config.learning.episodic_capture_enabled` is `true` and a + /// SQLite connection is available. Callers that construct an `Agent` + /// directly (tests, CLI) can leave this `None` — flush is a no-op + /// when the hook is absent. + pub fn archivist_hook( + mut self, + hook: Option>, + ) -> Self { + self.archivist_hook = hook; + self + } + + /// Phase 1.5 — gate the unified compaction path. + /// + /// When `true` (the default) and an archivist hook is wired in via + /// [`Self::archivist_hook`], the session's `ContextManager` summarizer is + /// wrapped with a [`SegmentRecapSummarizer`] that routes autocompaction + /// through the archivist's rolling recap (one LLM summarizer, soft-fallback + /// to [`ProviderSummarizer`] when the recap is unavailable). + /// + /// When `false` the `ProviderSummarizer` is used directly and Phase 1.5 is + /// completely absent from the hot path — behaviour is identical to today's. + pub fn unified_compaction_enabled(mut self, enabled: bool) -> Self { + self.unified_compaction_enabled = enabled; + self + } + + /// Validates the configuration and constructs a new `Agent` instance. + /// + /// This method is responsible for wiring together the provided components, + /// setting up the context manager, and initializing the conversation history. + /// It ensures that all required fields (provider, tools, memory, etc.) are present. + pub fn build(self) -> Result { + let tools = self + .tools + .ok_or_else(|| anyhow::anyhow!("tools are required"))?; + let tool_specs: Vec = tools.iter().map(|tool| tool.spec()).collect(); + + let visible_names = self.visible_tool_names.unwrap_or_default(); + let config = self.config.clone().unwrap_or_default(); + let event_session_id = self + .event_session_id + .clone() + .unwrap_or_else(|| "standalone".to_string()); + let event_channel = self + .event_channel + .clone() + .unwrap_or_else(|| "internal".to_string()); + let agent_definition_name = self + .agent_definition_name + .clone() + .unwrap_or_else(|| "main".to_string()); + let tool_policy_session = ToolPolicyEngine::build_session( + &agent_definition_name, + &event_channel, + "session", + &config.channel_permissions, + &tools, + &visible_names, + ); + + // Build the filtered spec list that the main agent sends to the + // provider. The explicit visible-tool allowlist and the resolved + // channel permission policy must stay aligned so prompt-visible + // tools cannot exceed the runtime execution boundary. + let visible_tool_specs_unfiltered = + visible_tool_specs_for_policy(&tool_specs, &visible_names, &tool_policy_session); + + // Dedupe by tool name. Anthropic (and other strict providers) + // rejects a chat/completions request that lists two tools with + // the same name — OpenHuman's own backend and OpenAI silently + // accept duplicates, which hid this bug until #1710's per-role + // routing started sending the same tool list to Anthropic. + let visible_tool_specs: Vec = + dedup_visible_tool_specs(visible_tool_specs_unfiltered); + + let visible_names_list: Vec<&str> = + visible_tool_specs.iter().map(|s| s.name.as_str()).collect(); + log::info!( + "[agent] tool spec filter: total={} visible={} (filter_active={} policy_restricted={}) names=[{}]", + tool_specs.len(), + visible_tool_specs.len(), + !visible_names.is_empty(), + tool_policy_session.has_restrictions(), + visible_names_list.join(", ") + ); + + // Pull the provider out of the builder once. We store it on + // the Agent (for normal turn chat calls) and also clone the + // Arc into the ProviderSummarizer so the context manager can + // dispatch autocompaction through the same provider. + let provider = self + .provider + .ok_or_else(|| anyhow::anyhow!("provider is required"))?; + + let prompt_builder = self + .prompt_builder + .unwrap_or_else(crate::openhuman::context::prompt::SystemPromptBuilder::with_defaults); + + let model_name = self + .model_name + .unwrap_or_else(|| crate::openhuman::config::DEFAULT_MODEL.into()); + + // Assemble the per-session ContextManager. The manager owns + // the prompt builder, the reduction pipeline, and the + // summarizer — every concern that touches "what's in the + // model's context window" routes through this single handle. + let context_config = self.context_config.unwrap_or_default(); + + // Phase 1.5 — unified compaction. + // + // When `unified_compaction_enabled` is true AND an archivist hook + // is wired in, wrap the inner `ProviderSummarizer` with a + // `SegmentRecapSummarizer`. The outer type: + // 1. Tries the rolling segment recap from the open segment. + // 2. Falls back to the inner `ProviderSummarizer` if unavailable. + // + // With the flag off OR no archivist, the plain `ProviderSummarizer` + // is used and Phase 1.5 is completely absent from the hot path + // — behaviour is identical to Phase 1. + let inner_summarizer: Arc = + Arc::new(ProviderSummarizer::new(provider.clone())); + let session_id_for_recap = self + .event_session_id + .clone() + .unwrap_or_else(|| "standalone".to_string()); + let summarizer: Arc = + if self.unified_compaction_enabled { + if let Some(ref archivist) = self.archivist_hook { + log::debug!( + "[agent::builder] unified_compaction_enabled=true — \ + wrapping summarizer with SegmentRecapSummarizer \ + session_id={session_id_for_recap}" + ); + Arc::new(SegmentRecapSummarizer::new( + Arc::clone(archivist), + session_id_for_recap, + inner_summarizer, + )) + } else { + log::debug!( + "[agent::builder] unified_compaction_enabled=true but \ + no archivist hook — using ProviderSummarizer" + ); + inner_summarizer + } + } else { + log::debug!( + "[agent::builder] unified_compaction_enabled=false — \ + using ProviderSummarizer (Phase 1.5 disabled)" + ); + inner_summarizer + }; + + let context = ContextManager::new( + &context_config, + summarizer, + model_name.clone(), + prompt_builder, + ); + + let workspace_dir = self + .workspace_dir + .unwrap_or_else(|| std::path::PathBuf::from(".")); + let action_dir = self.action_dir.unwrap_or_else(|| workspace_dir.clone()); + + Ok(Agent { + provider, + tools: Arc::new(tools), + tool_specs: Arc::new(tool_specs), + visible_tool_specs: Arc::new(visible_tool_specs), + visible_tool_names: visible_names, + tool_policy_session, + memory: self + .memory + .ok_or_else(|| anyhow::anyhow!("memory is required"))?, + tool_dispatcher: std::sync::Arc::from( + self.tool_dispatcher + .ok_or_else(|| anyhow::anyhow!("tool_dispatcher is required"))?, + ), + memory_loader: self + .memory_loader + .unwrap_or_else(|| Box::new(DefaultMemoryLoader::default())), + config, + model_name, + temperature: self.temperature.unwrap_or(0.7), + workspace_dir, + action_dir, + skills: self.skills.unwrap_or_default(), + auto_save: self.auto_save.unwrap_or(false), + last_memory_context: None, + last_turn_citations: Vec::new(), + history: Vec::new(), + post_turn_hooks: self.post_turn_hooks, + learning_enabled: self.learning_enabled, + explicit_preferences_enabled: self.explicit_preferences_enabled, + event_session_id, + event_channel, + agent_definition_name: agent_definition_name.clone(), + // Canonical registry id — captured here at build time + // before any caller can call `set_agent_definition_name` + // and clobber the transcript-facing name. Used by + // `refresh_delegation_tools` to re-resolve the agent's + // `subagents` declaration against the global registry. + agent_definition_id: agent_definition_name.clone(), + session_transcript_path: None, + session_key: { + let unix_ts = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.as_secs()) + .unwrap_or(0); + let sanitized: String = agent_definition_name + .chars() + .map(|c| { + if c.is_ascii_alphanumeric() || c == '_' || c == '-' { + c + } else { + '_' + } + }) + .collect(); + format!("{unix_ts}_{sanitized}") + }, + session_parent_prefix: self.session_parent_prefix, + cached_transcript_messages: None, + context, + on_progress: None, + run_queue: None, + connected_integrations: Vec::new(), + connected_integrations_initialized: false, + integration_runtime_config: None, + // Default to `true` (omit) so legacy / custom agents built + // without a definition stay lean. Opt-in agents thread their + // `omit_profile = false` through the builder. + omit_profile: self.omit_profile.unwrap_or(true), + omit_memory_md: self.omit_memory_md.unwrap_or(true), + payload_summarizer: self.payload_summarizer, + tool_policy: self.tool_policy.unwrap_or_else(|| { + Arc::new(crate::openhuman::agent::tool_policy::AllowAllToolPolicy) + }), + last_seen_integrations_hash: 0, + composio_integrations_rx: None, + announced_integrations: std::collections::HashSet::new(), + pending_integration_announcement: Vec::new(), + archivist_hook: self.archivist_hook, + synthesized_tool_names: std::collections::HashSet::new(), + pending_synthesized_tools_mask: std::collections::HashSet::new(), + }) + } +} diff --git a/src/openhuman/agent/harness/session/turn.rs b/src/openhuman/agent/harness/session/turn.rs deleted file mode 100644 index 5ecc6bd057..0000000000 --- a/src/openhuman/agent/harness/session/turn.rs +++ /dev/null @@ -1,1931 +0,0 @@ -//! Turn lifecycle: running a single interaction, executing tools, and -//! wiring the context pipeline + sub-agent harness around them. -//! -//! This file owns the "hot path" methods on `Agent`: -//! -//! - [`Agent::turn`] — the big one. Orchestrates system-prompt build, -//! memory-context injection, the provider loop, tool dispatch, and -//! the context pipeline (tool-result budget → microcompact → -//! autocompact signal → session-memory extraction trigger). -//! - [`Agent::execute_tool_call`] / [`Agent::execute_tools`] — the -//! per-call runners. -//! - [`Agent::build_parent_execution_context`] — snapshot helper for -//! the parent-context task-local that sub-agents read. -//! - [`Agent::trim_history`], [`Agent::fetch_learned_context`], -//! [`Agent::build_system_prompt`] — the small helpers `turn()` leans -//! on every call. -//! - [`Agent::spawn_session_memory_extraction`] — the fire-and-forget -//! background archivist fork. - -use super::transcript; -use super::turn_engine_adapter::{AgentCheckpoint, AgentObserver, AgentToolSource}; -use super::types::Agent; -use crate::openhuman::agent::dispatcher::{ParsedToolCall, ToolExecutionResult}; -use crate::openhuman::agent::harness; -use crate::openhuman::agent::hooks::{self, ToolCallRecord, TurnContext}; -use crate::openhuman::agent::memory_loader::collect_recall_citations; -use crate::openhuman::agent::progress::AgentProgress; -use crate::openhuman::agent_experience::{ - prepend_experience_block, render_experience_hits, AgentExperienceStore, ExperienceQuery, -}; -use crate::openhuman::agent_tool_policy::render_tool_policy_boundary; -use crate::openhuman::context::prompt::{ - LearnedContextData, NamespaceSummary, PromptContext, PromptTool, -}; -use crate::openhuman::context::ARCHIVIST_EXTRACTION_PROMPT; -use crate::openhuman::inference::provider::{ - ChatMessage, ChatRequest, ConversationMessage, ProviderDelta, UsageInfo, -}; -use crate::openhuman::memory::MemoryCategory; -use crate::openhuman::tools::Tool; -use crate::openhuman::util::truncate_with_ellipsis; - -use anyhow::Result; -use std::borrow::Cow; -use std::hash::{Hash, Hasher}; -use std::sync::Arc; - -/// True when `msg` is an `assistant` ChatMessage whose JSON-encoded content -/// carries a non-empty `tool_calls` array. -/// -/// `to_provider_messages` (in `agent/dispatcher.rs`) serialises an -/// `AssistantToolCalls` ConversationMessage as a single `assistant` ChatMessage -/// with a JSON body of the form `{"content": "...", "tool_calls": [...]}`. To -/// detect those at the `ChatMessage` boundary (where `bound_cached_transcript_messages` -/// operates) we have to peek inside the JSON. See TAURI-RUST-7 for the -/// failure mode this guards against. -use super::turn_checkpoint::{assistant_message_has_tool_calls, MAX_ITER_CHECKPOINT_INSTRUCTION}; - -/// Built-in direct tools that the orchestrator should call by name, not -/// wrapped in `run_workflow`. -const DIRECT_TOOL_NAMES: &[&str] = &[ - "cron_add", - "cron_list", - "cron_remove", - "cron_update", - "cron_run", - "cron_runs", - "current_time", -]; - -/// Recovery shim for legacy/wrong-model calls of the form: -/// `run_workflow({workflow_id: "", inputs: {...}})` (or the -/// pre-rename `run_skill({skill_id: ...})`). -/// -/// When this pattern appears, rewrite it into a direct tool call so the turn -/// can proceed without a manual retry. -fn normalize_tool_call<'a>(call: &'a ParsedToolCall) -> Cow<'a, ParsedToolCall> { - if call.name != "run_workflow" && call.name != "run_skill" { - return Cow::Borrowed(call); - } - // Accept either the current `workflow_id` arg or the legacy `skill_id`. - let Some(target) = call - .arguments - .get("workflow_id") - .or_else(|| call.arguments.get("skill_id")) - .and_then(|v| v.as_str()) - else { - return Cow::Borrowed(call); - }; - if !DIRECT_TOOL_NAMES.contains(&target) { - return Cow::Borrowed(call); - } - let Some(inputs) = call.arguments.get("inputs").and_then(|v| v.as_object()) else { - return Cow::Borrowed(call); - }; - - log::warn!( - "[agent_loop] rewrote legacy {}->{} call into direct tool invocation", - call.name, - target - ); - let skill_id = target; - Cow::Owned(ParsedToolCall { - name: skill_id.to_string(), - arguments: serde_json::Value::Object(inputs.clone()), - tool_call_id: call.tool_call_id.clone(), - }) -} - -/// Compute the one-shot mid-session connect announcement. -/// -/// Given the toolkit slugs currently connected and the set of slugs already -/// announced to the model this session, returns a natural-language note for -/// any genuinely-new slugs (and records them in `announced` so they are never -/// re-announced). Returns `None` when nothing new connected. -/// -/// Kept as a free function (no `&self`) so the delta logic is unit-testable -/// without standing up a full `Agent` — see `turn_tests.rs`. -/// Returns the toolkit slugs in `connected` that have not yet been announced -/// this session, marking them announced. Empty when nothing is new. -fn newly_connected_slugs( - connected: &[String], - announced: &mut std::collections::HashSet, -) -> Vec { - let newly: Vec = connected - .iter() - .filter(|slug| !announced.contains(*slug)) - .cloned() - .collect(); - for slug in &newly { - announced.insert(slug.clone()); - } - newly -} - -/// Render the one-shot user-turn note for a set of freshly-connected slugs. -/// Empty input yields `None`. -fn integration_announcement_note(slugs: &[String]) -> Option { - if slugs.is_empty() { - return None; - } - Some(format!( - "[integration update] These integration(s) connected during this conversation and are available right now: {}. \ -Use delegate_to_integrations_agent with the matching toolkit slug to act on them immediately — do not tell the user to reconnect or restart.", - slugs.join(", ") - )) -} - -impl Agent { - /// Executes a single interaction "turn" with the agent. - /// - /// This function is the primary driver of the agent's behavior. It manages the - /// end-to-end lifecycle of a user request: - /// - /// 1. **Initialization**: Resumes from a session transcript if this is a new turn - /// to preserve KV-cache stability. - /// 2. **Prompt Construction**: Builds the system prompt (only on the first turn) - /// incorporating learned context and tool instructions. - /// 3. **Context Injection**: Enriches the user message with relevant memories - /// fetched via the [`MemoryLoader`]. - /// 4. **Execution Loop**: Enters a loop (up to `max_tool_iterations`) where it: - /// - Manages the context window (reduction/summarization). - /// - Calls the LLM provider. - /// - Parses and executes tool calls. - /// - Accumulates results into history. - /// 5. **Synthesis**: Returns the final assistant response after all tools have - /// finished or the iteration budget is exhausted. - /// 6. **Background Tasks**: Triggers episodic memory indexing and facts - /// extraction asynchronously. - pub async fn turn(&mut self, user_message: &str) -> Result { - let turn_started = std::time::Instant::now(); - self.emit_progress(AgentProgress::TurnStarted).await; - log::info!("[agent] turn started — awaiting user message processing"); - log::info!( - "[agent_loop] turn start message_chars={} history_len={} max_tool_iterations={}", - user_message.chars().count(), - self.history.len(), - self.config.max_tool_iterations - ); - self.ensure_composio_integrations_listener(); - // ── Session transcript resume ───────────────────────────────── - // On a fresh session (empty history), look for a previous - // transcript to pre-populate the exact provider messages for - // KV cache prefix reuse. - if self.history.is_empty() && self.cached_transcript_messages.is_none() { - self.try_load_session_transcript(); - } - - if self.history.is_empty() { - // Learned context is only baked into the system prompt on the - // very first turn — once the history is non-empty we reuse the - // stored prompt verbatim to preserve the KV-cache prefix the - // inference backend has already tokenised. Fetching it later - // would just burn memory-store reads on data we throw away. - if !self.connected_integrations_initialized { - self.fetch_connected_integrations().await; - // Sessions born without a cached Composio view still need - // a one-shot delegation-surface reconcile before the system - // prompt is frozen. The shared-Arc failure path returns - // `false`, but on turn 1 the Arc should still be uniquely - // owned; a `false` return here indicates a programmer error - // and the warn-level log inside the helper already surfaces - // it, so we keep the existing best-effort contract. - let _ = self.refresh_delegation_tools(); - } - let learned = self.fetch_learned_context().await; - let rendered_prompt = self.build_system_prompt(learned)?; - log::info!("[agent] system prompt built — initialising conversation history"); - log::info!( - "[agent_loop] system prompt built chars={}", - rendered_prompt.chars().count() - ); - // User-file injection (PROFILE.md, MEMORY.md) puts - // potentially-sensitive content (LinkedIn scrape output, - // archivist-curated memories) into the system prompt. Avoid - // leaking that to debug logs — log a length + content hash - // instead. Narrow specialists (both flags off) keep the - // full-body log so prompt-engineering iteration on - // tools/safety sections stays easy. - if self.omit_profile && self.omit_memory_md { - log::debug!("[agent_loop] system prompt body:\n{}", rendered_prompt); - } else { - let mut hasher = std::collections::hash_map::DefaultHasher::new(); - rendered_prompt.hash(&mut hasher); - log::debug!( - "[agent_loop] system prompt body redacted (contains PROFILE/MEMORY): chars={} hash={:016x}", - rendered_prompt.chars().count(), - hasher.finish() - ); - } - self.history - .push(ConversationMessage::Chat(ChatMessage::system( - rendered_prompt, - ))); - // Seed the per-turn mid-session refresh baseline with the - // hash of whatever Composio actually returned just now. - // Subsequent turns short-circuit unless this hash changes. - self.last_seen_integrations_hash = - crate::openhuman::composio::connected_set_hash(&self.connected_integrations); - // Seed the announced set with the startup connected toolkits so - // only genuinely-new mid-session connects get announced later. - self.announced_integrations = self - .connected_integrations - .iter() - .map(|i| i.toolkit.clone()) - .collect(); - } else { - // Deliberately do NOT rebuild the system prompt on subsequent - // turns. The rendered prompt is the KV-cache prefix the inference - // backend has already tokenised; replacing its bytes (even - // cosmetically) forces the backend to re-prefill from scratch. - // - // Dynamic turn-to-turn context (memory recall, learned snippets) - // rides on the user message via `memory_loader.load_context()` - // — that's where the caller should inject anything that varies - // between turns. - // - // *** Mid-session schema-only refresh *** - // - // The system prompt stays frozen, but the function-calling - // schema (the `tools` field in the provider request) is sent - // fresh on every API call — it's not part of the KV-cache - // prefix. So we *can* react to Composio connect/disconnect - // events mid-session by re-synthesising the `delegate_` - // surface on `self.tools` / `self.tool_specs` and letting - // the next provider call carry the new schema. KV cache stays - // intact; the system prompt's `## Connected Integrations` - // block goes mildly stale until the next session, but the - // schema is the source of truth the model actually routes - // against. - // - // The signal we react to is the process-wide - // [`crate::openhuman::composio::INTEGRATIONS_CACHE`], kept - // current by (a) the desktop UI's 5 s - // `composio_list_connections` poll, (b) the post-OAuth - // `ComposioConnectionCreatedSubscriber` invalidation, and - // (c) the 60 s TTL fallback. We read it via the read-only - // [`crate::openhuman::composio::cached_active_integrations`] - // helper — never trigger a backend fetch ourselves, never - // block on a writer. - // Session agents built through `from_config_*` carry their - // runtime `Config` snapshot directly, so this read avoids the - // old `Config::load_or_init()` round-trip on every turn. - // - let _ = self.refresh_delegation_tools_from_cached_integrations("turn-boundary"); - // Cache empty/expired or config unavailable => no signal. - // We leave the current tool surface alone and pick up any - // real change on the next turn after the UI's 5 s poll has - // repopulated [`INTEGRATIONS_CACHE`]. - - log::trace!( - "[agent_loop] system prompt reused (history_len={}) — KV cache prefix preserved", - self.history.len() - ); - } - - if self.auto_save { - let _ = self - .memory - .store( - "", - "user_msg", - user_message, - MemoryCategory::Conversation, - None, - ) - .await; - } - - log::info!("[agent] loading memory context for user message"); - const MEMORY_CITATION_LIMIT: usize = 5; - const MEMORY_CITATION_MIN_RELEVANCE: f64 = 0.4; - match collect_recall_citations( - self.memory.as_ref(), - user_message, - MEMORY_CITATION_LIMIT, - MEMORY_CITATION_MIN_RELEVANCE, - ) - .await - { - Ok(citations) => { - log::debug!( - "[agent_loop] memory citations collected count={}", - citations.len() - ); - self.last_turn_citations = citations; - } - Err(err) => { - log::warn!("[agent_loop] memory citation collection failed: {err}"); - self.last_turn_citations.clear(); - } - } - let context = self - .memory_loader - .load_context(self.memory.as_ref(), user_message) - .await - .unwrap_or_default(); - - // ── Phase 3 STM preemptive recall ──────────────────────────── - // On the very first turn only, assemble a bounded cross-thread - // context block from the FTS5 episodic arm (keyword match) and the - let mut context = context; - - // ── Lane B: situational preferences (every turn) ───────────────────── - // Recall topic-scoped preferences semantically relevant to THIS message - // (model-aware embeddings, gated by vector similarity) and inject them - // under a banner. Runs every turn — unlike the first-turn-gated tree/STM - // blocks above — because the query changes per message; it rides the - // per-turn context that's prepended to the user message (no KV-cache - // cost). An unrelated message clears the similarity gate to nothing, so - // no block is injected. - { - let situational = - crate::openhuman::memory::preferences::recall_situational_preferences( - &self.memory, - user_message, - ) - .await; - if !situational.is_empty() { - log::info!( - "[pref_recall] situational block injected: {} item(s)", - situational.len() - ); - context.push_str("## Relevant preferences for this message\n\n"); - for pref in &situational { - context.push_str("- "); - context.push_str(pref.trim()); - context.push('\n'); - } - context.push('\n'); - } else { - log::debug!("[pref_recall] no situational preference relevant to this message"); - } - } - - let enriched = if context.is_empty() { - log::info!("[agent] no memory context found — using raw user message"); - self.last_memory_context = None; - user_message.to_string() - } else { - log::info!( - "[agent] memory context loaded — enriching user message context_chars={}", - context.chars().count() - ); - self.last_memory_context = Some(context.clone()); - format!("{context}{user_message}") - }; - - let enriched = self - .inject_agent_experience_context(user_message, enriched) - .await; - - // ── SKILL.md body injection (#781) ─────────────────────────── - // Match installed SKILL.md skills against the user message and - // prepend their bodies ahead of the memory-context block so the - // LLM sees them at the top of the user turn. See the module - // docs on [`crate::openhuman::workflows::inject`] for the matching - // heuristic and size cap rationale. - let enriched = { - use crate::openhuman::workflows::inject; - let matches = inject::match_workflows(&self.skills, user_message); - if matches.is_empty() { - log::debug!( - "[skills:inject] no skill matches for user message (skill_catalog_len={})", - self.skills.len() - ); - enriched - } else { - let injection = inject::render_injection( - &matches, - inject::DEFAULT_MAX_INJECTION_BYTES, - |skill| skill.read_body(), - ); - let matched_count = injection.decisions.iter().filter(|d| d.matched).count(); - log::info!( - "[skills:inject] summary candidates={} matched={} injected_bytes={} truncated_any={}", - injection.decisions.len(), - matched_count, - injection.injected_bytes, - injection.truncated - ); - if injection.rendered.is_empty() { - enriched - } else { - format!("{}\n{}", injection.rendered, enriched) - } - } - }; - - // Consume any one-shot mid-session connect announcement parked by - // `refresh_delegation_tools_from_cached_integrations`. It rides on the - // user turn (NOT a system message — `trim_history` hoists system - // messages to the front and would bust the KV-cache prefix) and - // `.take()` clears it so it fires exactly once. - let pending_slugs = std::mem::take(&mut self.pending_integration_announcement); - let enriched = match integration_announcement_note(&pending_slugs) { - Some(note) => format!("{note}\n\n{enriched}"), - None => enriched, - }; - - self.history - .push(ConversationMessage::Chat(ChatMessage::user(enriched))); - - // Pin the main agent to its configured model for the lifetime of - // the session. Per-turn classification used to run here, but it - // would flip `effective_model` mid-conversation (e.g. reasoning → - // coding based on a single keyword). Every flip invalidates the - // backend's KV cache namespace for this session, costing full - // re-prefill on the very next turn. The main agent's job is to - // decide *which sub-agent* to spawn — that routing lives in the - // model prompt, not in the Rust-side classifier. Sub-agents pick - // their own tier via `ModelSpec::Hint(...)` in their definition. - let effective_model = self.model_name.clone(); - log::info!( - "[agent_loop] model pinned model={} (per-turn classification disabled for KV cache stability)", - effective_model - ); - - // Snapshot the parent's runtime once per turn so any - // `spawn_subagent` invocation that fires inside this turn can - // read it via the PARENT_CONTEXT task-local. We override the - // model field with the post-classification effective model. - let mut parent_context = self.build_parent_execution_context(); - parent_context.model_name = effective_model.clone(); - - // Bump the session-memory turn counter. Used later by - // `should_extract_session_memory` to decide whether to spawn a - // background archivist fork at end-of-turn. - self.context.tick_turn(); - - let turn_body = async { - // Capture everything the engine seams need as locals/clones *before* - // the observer takes `&mut self`, so the borrow checker is happy: - // the tool source + parser + checkpoint hold clones disjoint from - // the `Agent`, and the observer alone borrows it mutably. - let dispatcher = self.tool_dispatcher.clone(); - let provider = self.provider.clone(); - let provider_name = self.event_channel().to_string(); - let temperature = self.temperature; - let max_iterations = self.config.max_tool_iterations; - // Source multimodal limits from the session's runtime config when - // present so [IMAGE:…] / [FILE:…] markers in user messages are - // resolved with the operator-configured caps (max files, max size, - // max extracted text). Without this, agents fall back to the - // crate-default caps and `MultimodalFileConfig::default()` - // disables file expansion entirely. - let multimodal = self - .integration_runtime_config - .as_ref() - .map(|c| c.multimodal.clone()) - .unwrap_or_default(); - let multimodal_files = self - .integration_runtime_config - .as_ref() - .map(|c| c.multimodal_files.clone()) - .unwrap_or_default(); - let artifact_store = Some( - crate::openhuman::agent::harness::tool_result_artifacts::ToolResultArtifactStore::new( - self.action_dir.clone(), - self.session_key.clone(), - ), - ); - let mut tool_source = AgentToolSource { - tools: self.tools.clone(), - visible_tool_names: self.visible_tool_names.clone(), - tool_policy_session: self.tool_policy_session.clone(), - tool_policy: self.tool_policy.clone(), - payload_summarizer: self.payload_summarizer.clone(), - event_session_id: self.event_session_id().to_string(), - event_channel: self.event_channel().to_string(), - agent_definition_id: self.agent_definition_id.clone(), - prefer_markdown: self.context.prefer_markdown_tool_output(), - budget_bytes: self.context.tool_result_budget_bytes(), - artifact_store: artifact_store.clone(), - should_send_specs: self.tool_dispatcher.should_send_tool_specs(), - advertised_specs: self.visible_tool_specs.as_ref().clone(), - records: Vec::new(), - }; - let progress = super::super::engine::TurnProgress::new(self.on_progress.clone()); - let parser = super::super::engine::DispatcherParser { - dispatcher: dispatcher.as_ref(), - }; - let checkpoint = AgentCheckpoint { - provider: self.provider.clone(), - dispatcher: self.tool_dispatcher.clone(), - model: effective_model.clone(), - temperature, - on_progress: self.on_progress.clone(), - user_message: user_message.to_string(), - max_iterations, - }; - let turn_run_queue = self.run_queue.clone(); - let cached_prefix = self.cached_transcript_messages.take(); - let mut observer = AgentObserver { - agent: self, - artifact_store, - effective_model: effective_model.clone(), - cumulative_input: 0, - cumulative_output: 0, - cumulative_cached: 0, - cumulative_charged: 0.0, - last_turn_usage: None, - cached_prefix, - pending_results: Vec::new(), - did_push_final: false, - }; - let mut buf: Vec = Vec::new(); - - // Box-pin the parent agent's engine call so its ~600-line - // generator state lives on the heap. Tools that delegate to - // sub-agents (orchestrator → researcher / personality / - // archetype / skill) recurse back into another - // `run_turn_engine` via `run_subagent`; without the box, - // both engines' state machines pile up on the same tokio - // worker stack and overflow the 2 MiB default. The inner - // boxes inside `run_typed_mode` aren't reached if the - // overflow happens during the parent's poll on the way in - // — verified against the `chat-harness-subagent` Playwright - // lane crash on PR #3151. - let outcome = Box::pin(super::super::engine::run_turn_engine( - provider.as_ref(), - &mut buf, - &mut tool_source, - &progress, - &mut observer, - &checkpoint, - &parser, - &provider_name, - &effective_model, - temperature, - true, // silent — the channel/UI renders via progress + the return value - &multimodal, - &multimodal_files, - max_iterations, - None, // the web bridge streams via on_progress deltas, not on_delta - &[], - turn_run_queue, - )) - .await?; - - // Pull the observer's accounting out, then drop it to release the - // `&mut self` borrow so the epilogue can use `self`. - let did_push_final = observer.did_push_final; - let cumulative_input = observer.cumulative_input; - let cumulative_output = observer.cumulative_output; - let cumulative_cached = observer.cumulative_cached; - let cumulative_charged = observer.cumulative_charged; - let last_turn_usage = observer.last_turn_usage.take(); - drop(observer); - let records = std::mem::take(&mut tool_source.records); - - self.context.record_tool_calls(records.len()); - - // For a clean final response the observer already pushed the - // assistant message + persisted. For a max-iteration checkpoint or - // circuit-breaker halt the engine returned the text without pushing - // it, so finish the history + transcript here (mirrors the old - // final/max-iter branches). - if !did_push_final { - self.history - .push(ConversationMessage::Chat(ChatMessage::assistant( - outcome.text.clone(), - ))); - self.trim_history(); - // Note: the engine already emits `TurnCompleted` on the - // checkpoint exit (and every other terminal path), so we don't - // re-emit it here — doing so would double-fire for the UI. - let messages = self.tool_dispatcher.to_provider_messages(&self.history); - self.persist_session_transcript( - &messages, - cumulative_input, - cumulative_output, - cumulative_cached, - cumulative_charged, - last_turn_usage.as_ref(), - ); - } - - // Auto-save a short memory of the final reply (not on a capped turn, - // matching the prior behavior). - if self.auto_save && !outcome.hit_cap { - let summary = truncate_with_ellipsis(&outcome.text, 100); - let _ = self - .memory - .store("", "assistant_resp", &summary, MemoryCategory::Daily, None) - .await; - } - - // Fire post-turn hooks (non-blocking). - if !self.post_turn_hooks.is_empty() { - let ctx = TurnContext { - user_message: user_message.to_string(), - assistant_response: outcome.text.clone(), - tool_calls: records, - turn_duration_ms: turn_started.elapsed().as_millis() as u64, - session_id: Some(self.event_session_id.clone()) - .filter(|session_id| !session_id.trim().is_empty()), - agent_id: Some(self.agent_definition_id.clone()) - .filter(|agent_id| !agent_id.trim().is_empty()), - entrypoint: Some(self.event_channel.clone()) - .filter(|entrypoint| !entrypoint.trim().is_empty()), - iteration_count: outcome.iterations as usize, - }; - hooks::fire_hooks(&self.post_turn_hooks, ctx); - } - - Ok(outcome.text) - }; // end of `turn_body` async block - - // Run the turn body inside the parent-execution-context scope so - // that any `spawn_subagent` tool call fired during the loop can - // read the parent's provider, tools, model, and workspace via - // the PARENT_CONTEXT task-local. - let result = harness::with_parent_context(parent_context, turn_body).await; - - // Session transcript persistence lives INSIDE the turn body — - // one write per provider response, fired right after the - // response lands (see the tool-call and terminal branches in - // `turn_body`). A crash during tool execution no longer drops - // the assistant's reply because it was already flushed to - // disk before tool dispatch started. No outer-loop save is - // needed here. - - // ── Session-memory extraction (stage 5) ─────────────────────── - // - // If the pipeline's deltas have crossed all three thresholds - // (token growth, tool calls, turn count), spawn a *background* - // archivist sub-agent that will distil durable facts into the - // workspace MEMORY.md file via the `update_memory_md` tool. - // - // The spawn is fire-and-forget: the main turn returns the - // user-visible response immediately, and the archivist runs - // asynchronously on the `agentic` tier. We optimistically mark - // the extraction complete right away — if it actually fails, - // we'll just retry on the next threshold window (a few turns - // later), which is the right amount of retry behaviour for a - // librarian task that's idempotent across reruns. - if result.is_ok() && self.context.should_extract_session_memory() { - self.spawn_session_memory_extraction().await; - // Sibling pipeline (#1399): heuristic transcript ingestion - // turns the just-written transcript into durable - // conversational memory + reflections so a brand-new chat - // can recover continuity. Background-only, never blocks the - // user-facing turn return. - self.spawn_transcript_ingestion(); - } - - result - } - - async fn inject_agent_experience_context( - &self, - user_message: &str, - enriched: String, - ) -> String { - const MAX_EXPERIENCE_HITS: usize = 3; - const MAX_EXPERIENCE_BLOCK_BYTES: usize = 2048; - - if !self.learning_enabled { - return enriched; - } - - let tools = self - .visible_tool_specs - .iter() - .map(|spec| spec.name.clone()) - .collect(); - let store = AgentExperienceStore::new(self.memory.clone()); - let query = ExperienceQuery { - query: user_message.to_string(), - tools, - tags: Vec::new(), - agent_id: Some(self.agent_definition_id.clone()).filter(|id| !id.trim().is_empty()), - entrypoint: Some(self.event_channel.clone()) - .filter(|entrypoint| !entrypoint.trim().is_empty()), - max_hits: MAX_EXPERIENCE_HITS, - }; - - match store.retrieve(query).await { - Ok(hits) => { - let matched_hits: Vec<_> = hits - .into_iter() - .filter(|hit| !hit.match_reasons.is_empty()) - .collect(); - let block = render_experience_hits(&matched_hits, MAX_EXPERIENCE_BLOCK_BYTES); - if block.is_empty() { - return enriched; - } - log::debug!( - "[agent-experience] injected {} experience hit(s) bytes={}", - matched_hits.len(), - block.len() - ); - prepend_experience_block(&enriched, &block) - } - Err(err) => { - log::warn!("[agent-experience] retrieval failed (non-fatal): {err}"); - enriched - } - } - } - - // ───────────────────────────────────────────────────────────────── - // Per-call tool execution - // ───────────────────────────────────────────────────────────────── - - /// Executes a single tool call and returns the result and execution record. - /// - /// This method: - /// 1. Emits telemetry events for the start of execution. - /// 2. Handles the special `spawn_subagent` tool with `fork` context. - /// 3. Validates tool visibility and availability. - /// 4. Dispatches to the underlying tool implementation. - /// 5. Applies per-result byte budgets to prevent context window bloat. - /// 6. Sanitizes and records the outcome for post-turn hooks. - pub(super) async fn execute_tool_call( - &self, - call: &ParsedToolCall, - iteration: usize, - ) -> (ToolExecutionResult, ToolCallRecord) { - let normalized_call = normalize_tool_call(call); - let call: &ParsedToolCall = &normalized_call; - // The per-call execution path lives in the shared - // [`super::agent_tool_exec::run_agent_tool_call`] so `Agent::turn` - // (when migrated to the turn engine, via `AgentToolSource`) and any - // direct caller run the identical logic. Progress is emitted through a - // `TurnProgress` over this agent's sink. Legacy `run_skill`-wrapped - // built-in cron tool calls are normalized to direct calls first. - let progress = super::super::engine::TurnProgress::new(self.on_progress.clone()); - let artifact_store = - crate::openhuman::agent::harness::tool_result_artifacts::ToolResultArtifactStore::new( - self.action_dir.clone(), - self.session_key.clone(), - ); - let ctx = super::agent_tool_exec::AgentToolExecCtx { - tools: &self.tools, - visible_tool_names: &self.visible_tool_names, - tool_policy_session: &self.tool_policy_session, - tool_policy: self.tool_policy.as_ref(), - payload_summarizer: self.payload_summarizer.as_deref(), - event_session_id: self.event_session_id(), - event_channel: self.event_channel(), - agent_definition_id: &self.agent_definition_id, - prefer_markdown: self.context.prefer_markdown_tool_output(), - budget_bytes: self.context.tool_result_budget_bytes(), - artifact_store: Some(&artifact_store), - }; - super::agent_tool_exec::run_agent_tool_call(&ctx, &progress, call, iteration).await - } - - /// Executes multiple tool calls in sequence. - /// - /// Collects results and execution records for all requested tools in a single batch. - pub(super) async fn execute_tools( - &self, - calls: &[ParsedToolCall], - iteration: usize, - ) -> (Vec, Vec) { - let mut results = Vec::with_capacity(calls.len()); - let mut records = Vec::with_capacity(calls.len()); - for call in calls { - let (exec_result, record) = self.execute_tool_call(call, iteration).await; - results.push(exec_result); - records.push(record); - } - (results, records) - } - - // ───────────────────────────────────────────────────────────────── - // Sub-agent context snapshots - // ───────────────────────────────────────────────────────────────── - - /// Snapshot the parent's runtime so spawned sub-agents can read - /// it via the [`harness::PARENT_CONTEXT`] task-local. - pub(super) fn build_parent_execution_context(&self) -> harness::ParentExecutionContext { - let allowed_subagent_ids = crate::openhuman::agent::harness::definition::AgentDefinitionRegistry::global() - .and_then(|registry| registry.get(&self.agent_definition_id)) - .map(|definition| { - definition - .subagents - .iter() - .filter_map(|entry| match entry { - crate::openhuman::agent::harness::definition::SubagentEntry::AgentId(id) => { - Some(id.clone()) - } - crate::openhuman::agent::harness::definition::SubagentEntry::Skills(wildcard) - if wildcard.matches_all() => - { - Some("integrations_agent".to_string()) - } - crate::openhuman::agent::harness::definition::SubagentEntry::Skills(_) => None, - }) - .collect() - }) - .unwrap_or_default(); - - harness::ParentExecutionContext { - agent_definition_id: self.agent_definition_id.clone(), - allowed_subagent_ids, - provider: Arc::clone(&self.provider), - all_tools: Arc::clone(&self.tools), - all_tool_specs: Arc::clone(&self.tool_specs), - model_name: self.model_name.clone(), - temperature: self.temperature, - workspace_dir: self.workspace_dir.clone(), - memory: Arc::clone(&self.memory), - agent_config: self.config.clone(), - skills: Arc::new(self.skills.clone()), - memory_context: Arc::new(self.last_memory_context.clone()), - session_id: self.event_session_id().to_string(), - channel: self.event_channel().to_string(), - connected_integrations: self.connected_integrations.clone(), - tool_call_format: self.tool_dispatcher.tool_call_format(), - session_key: self.session_key.clone(), - session_parent_prefix: self.session_parent_prefix.clone(), - on_progress: self.on_progress.clone(), - run_queue: self.run_queue.clone(), - } - } - - // ───────────────────────────────────────────────────────────────── - // History & prompt helpers - // ───────────────────────────────────────────────────────────────── - - /// Emit a lifecycle progress event. Uses `send().await` so control - /// events (turn/iteration boundaries, tool_call_started/completed, - /// turn_completed) survive downstream backpressure from the - /// higher-frequency streamed deltas that share the same `on_progress` - /// channel — dropping one of these would desync the web-channel - /// progress bridge (e.g. a tool row stuck in `running` forever). - /// A closed sink is logged and ignored; no progress subscriber is - /// equivalent to success. - async fn emit_progress(&self, event: AgentProgress) { - if let Some(ref tx) = self.on_progress { - if let Err(e) = tx.send(event).await { - log::warn!("[agent] progress sink closed while emitting lifecycle event: {e}"); - } - } - } - - /// Truncates the conversation history to the configured maximum message count. - /// - /// System messages are always preserved. Older non-system messages are - /// dropped first. - pub(super) fn trim_history(&mut self) { - let max = self.config.max_history_messages; - if self.history.len() <= max { - return; - } - - let mut system_messages = Vec::new(); - let mut other_messages = Vec::new(); - - for msg in self.history.drain(..) { - match &msg { - ConversationMessage::Chat(chat) if chat.role == "system" => { - system_messages.push(msg); - } - _ => other_messages.push(msg), - } - } - - if other_messages.len() > max { - let drop_count = other_messages.len() - max; - other_messages.drain(0..drop_count); - } - - // A cut that lands *between* an `AssistantToolCalls` and its - // `ToolResults` leaves the window opening on an orphaned `ToolResults`. - // Serialized, that is a `tool` message with no preceding `tool_calls`, - // which the provider rejects with a 400 (the response streams back - // empty and surfaces to the user as "Something went wrong"). Snap the - // boundary forward past any leading orphaned results so the window - // always starts on a clean turn (a `Chat` or an `AssistantToolCalls`). - let orphan_lead = other_messages - .iter() - .take_while(|m| matches!(m, ConversationMessage::ToolResults(_))) - .count(); - if orphan_lead > 0 { - log::debug!( - "[agent] trim_history snapped window past {orphan_lead} orphaned ToolResults \ - (tool-cycle bisected by the {max}-message cap)" - ); - other_messages.drain(0..orphan_lead); - } - - self.history = system_messages; - self.history.extend(other_messages); - } - - /// Bound a resumed transcript prefix to the agent history window. - /// - /// Resume paths may load a long prior transcript directly into - /// `cached_transcript_messages` (provider-ready `ChatMessage`s), which - /// bypasses `self.history`-based trimming/reduction. Keep at most - /// `max_history_messages` entries while preserving the leading system - /// message when present. - pub(super) fn bound_cached_transcript_messages( - &self, - messages: Vec, - ) -> Vec { - let max = self.config.max_history_messages.max(1); - if messages.len() <= max { - return messages; - } - - let has_system = matches!(messages.first(), Some(msg) if msg.role == "system"); - let keep_tail = if has_system { - max.saturating_sub(1) - } else { - max - }; - let start = messages.len().saturating_sub(keep_tail); - - // Same hazard as `trim_history`: the tail slice can open on a `tool` - // message whose `tool_calls` opener fell outside the window, which the - // provider rejects. Advance past any leading orphaned `tool` results so - // the window starts on a clean turn. - let tail = &messages[start..]; - let orphan_lead = tail.iter().take_while(|m| m.role == "tool").count(); - if orphan_lead > 0 { - log::debug!( - "[agent] bound_cached_transcript_messages snapped window past {orphan_lead} \ - orphaned tool result(s) (tool-cycle bisected by the {max}-message cap)" - ); - } - let tail = &tail[orphan_lead..]; - - let mut bounded = Vec::with_capacity(tail.len() + usize::from(has_system)); - if has_system { - bounded.push(messages[0].clone()); - } - bounded.extend(tail.iter().cloned()); - - // TAURI-RUST-7: symmetric guard to the leading-orphan strip above. A - // resumed transcript that ends on an `assistant` message containing - // `tool_calls` (because the cached transcript was captured mid-cycle, - // before the tool responses were persisted) is rejected by the - // provider with `400 An assistant message with 'tool_calls' must be - // followed by tool messages`. Pop any such trailing assistant - // tool_calls so the bounded transcript ends on a clean turn boundary. - let mut dropped_tail = 0usize; - while bounded - .last() - .map(assistant_message_has_tool_calls) - .unwrap_or(false) - { - bounded.pop(); - dropped_tail += 1; - } - if dropped_tail > 0 { - log::debug!( - "[agent] bound_cached_transcript_messages stripped {dropped_tail} trailing \ - assistant tool_calls message(s) without paired tool responses" - ); - } - - bounded - } - - /// Pre-fetches learned context data from memory (observations, patterns, user profile). - /// - /// This is an async, non-blocking operation that populates the context - /// for the system prompt. - /// - /// # Explicit-preferences narrow path - /// - /// When `learning_enabled` is `false` but `explicit_preferences_enabled` - /// is `true`, only the `user_profile` namespace (pinned preferences from - /// the `remember_preference` tool) is fetched and returned. All other - /// inference-derived data (observations, patterns, reflections, tree - /// summaries) remains empty — the inference stack is not touched. - pub(super) async fn fetch_learned_context(&self) -> LearnedContextData { - // Fast path: neither the full learning subsystem nor the explicit - // preferences path is active — skip all memory reads. - if !self.learning_enabled && !self.explicit_preferences_enabled { - tracing::debug!( - "[learning] fetch_learned_context: both learning_enabled and \ - explicit_preferences_enabled are false — returning empty context" - ); - return LearnedContextData::default(); - } - - // Narrow explicit-preferences path (Lane A): inject the latest-N general - // (always-on) preferences written via `save_preference`. Topic-scoped - // (situational) prefs are NOT injected here — they ride the user message - // via per-turn recall (Lane B). The legacy `user_profile` pinned namespace - // is no longer read here; explicit prefs now live in `user_pref_general`. - if !self.learning_enabled && self.explicit_preferences_enabled { - let general = crate::openhuman::memory::preferences::load_general_preferences( - &self.memory, - crate::openhuman::memory::preferences::STANDING_PREFS_LIMIT, - ) - .await; - tracing::debug!( - "[learning] fetch_learned_context: explicit_preferences_enabled — loaded {} general preference(s) for the system prompt", - general.len() - ); - return LearnedContextData { - user_profile: general, - ..LearnedContextData::default() - }; - } - - // Full learning path: fetch all inference-derived data. - tracing::debug!( - "[learning] fetch_learned_context: learning_enabled=true — fetching full context" - ); - - let obs_entries = self - .memory - .list( - Some("learning_observations"), - Some(&MemoryCategory::Custom("learning_observations".into())), - None, - ) - .await - .unwrap_or_default(); - - let pat_entries = self - .memory - .list( - Some("learning_patterns"), - Some(&MemoryCategory::Custom("learning_patterns".into())), - None, - ) - .await - .unwrap_or_default(); - - // Standing preferences come from the explicit two-lane store (Lane A), - // not the inferred `user_profile` facets — those are demoted: no longer - // injected as ground truth. A high-confidence inferred facet should be - // *proposed* to the user (and pinned via `save_preference` on - // confirmation), not silently treated as a standing preference. - let general = crate::openhuman::memory::preferences::load_general_preferences( - &self.memory, - crate::openhuman::memory::preferences::STANDING_PREFS_LIMIT, - ) - .await; - - // Explicit user reflections — privileged memory class. Pulled - // separately from observations/patterns so the prompt assembly - // can render them ahead of generic tree summaries. - let reflection_entries = self - .memory - .list( - Some(crate::openhuman::learning::reflection::REFLECTIONS_NAMESPACE), - Some(&MemoryCategory::Custom( - crate::openhuman::learning::reflection::REFLECTIONS_NAMESPACE.into(), - )), - None, - ) - .await - .unwrap_or_default(); - - // Pull every namespace's root-level summary from the tree - // summarizer. This is the densest user memory we can hand the - // orchestrator: each root holds up to 20 000 tokens of distilled - // long-term context. Done synchronously here because the calls - // are filesystem reads, not provider/network round-trips, and - // happen exactly once per session (only on the first turn). - // - // Per-namespace + total caps come from the user-facing memory - // window preset on `AgentConfig` so changing the slider in the - // UI takes effect on the very next session-start. - let limits = self.config.resolved_memory_limits(); - let tree_root_summaries = collect_tree_root_summaries( - &self.workspace_dir, - limits.per_namespace_max_chars, - limits.total_tree_max_chars, - ); - - LearnedContextData { - observations: obs_entries - .iter() - .rev() - .take(5) - .map(|e| sanitize_learned_entry(&e.content)) - .collect(), - patterns: pat_entries - .iter() - .take(3) - .map(|e| sanitize_learned_entry(&e.content)) - .collect(), - user_profile: general, - // Cap reflections at 10 to keep the privileged section - // bounded — the issue requires reflections improve context - // rather than flood it. Newest first. - reflections: reflection_entries - .iter() - .rev() - .take(10) - .map(|e| sanitize_learned_entry(&e.content)) - .collect(), - tree_root_summaries, - } - } - - /// Fetches the user's active Composio connections and populates - /// `self.connected_integrations` so the system prompt can surface them. - /// - /// Delegates to the shared [`crate::openhuman::composio::fetch_connected_integrations`] - /// which is the single source of truth for integration discovery. - /// - /// **No session-scoped Composio client is cached on the agent any - /// more (#1710 Wave 2)**. Every downstream caller that needs to - /// dispatch a Composio action now resolves a fresh client via - /// [`crate::openhuman::composio::client::create_composio_client`] - /// at call time so the live `composio.mode` toggle is honoured - /// without rebuilding the session — see `ComposioActionTool`, - /// `ProviderContext::execute`, the 5 migrated agent tools in - /// `composio/tools.rs`, and the spawn-time per-action tool build - /// path in `subagent_runner/ops.rs`. - pub async fn fetch_connected_integrations(&mut self) { - let config = match self.integration_runtime_config.clone() { - Some(config) => config, - None => match crate::openhuman::config::Config::load_or_init().await { - Ok(config) => config, - Err(e) => { - log::debug!( - "[agent] skipping connected integrations fetch: config load failed: {e}" - ); - return; - } - }, - }; - self.connected_integrations = - crate::openhuman::composio::fetch_connected_integrations(&config).await; - self.connected_integrations_initialized = true; - } - - /// Lazily attach this session to the global event bus so it can - /// observe `ComposioIntegrationsChanged` notifications. - pub(super) fn ensure_composio_integrations_listener(&mut self) { - if self.composio_integrations_rx.is_some() { - return; - } - if let Some(bus) = crate::core::event_bus::global() { - self.composio_integrations_rx = Some(bus.raw_receiver()); - log::debug!( - "[agent_loop] armed composio integrations listener for session='{}'", - self.event_session_id - ); - } - } - - /// Drain pending `ComposioIntegrationsChanged` events. - /// - /// Returns `true` when we observed at least one relevant event (or lag) and - /// should re-check cached integrations before the next provider call. - pub(super) fn drain_composio_integrations_changed_events(&mut self) -> bool { - self.ensure_composio_integrations_listener(); - let Some(rx) = self.composio_integrations_rx.as_mut() else { - return false; - }; - use tokio::sync::broadcast::error::TryRecvError; - - let mut saw_signal = false; - let mut closed = false; - loop { - match rx.try_recv() { - Ok(crate::core::event_bus::DomainEvent::ComposioIntegrationsChanged { - toolkits, - }) => { - saw_signal = true; - log::info!( - "[agent_loop] received composio integrations changed event (active_toolkits={:?})", - toolkits - ); - } - Ok(_) => {} - Err(TryRecvError::Empty) => break, - Err(TryRecvError::Lagged(skipped)) => { - saw_signal = true; - log::warn!( - "[agent_loop] composio integrations listener lagged by {} event(s); forcing cache re-check", - skipped - ); - } - Err(TryRecvError::Closed) => { - closed = true; - break; - } - } - } - if closed { - self.composio_integrations_rx = None; - } - saw_signal - } - - /// Reconcile the session's delegation schema against the latest cached - /// integrations snapshot. Returns `true` only when a refresh applied. - pub(super) fn refresh_delegation_tools_from_cached_integrations( - &mut self, - trigger: &str, - ) -> bool { - let Some(cfg) = self.integration_runtime_config.as_ref() else { - return false; - }; - let Some(cache_view) = crate::openhuman::composio::cached_active_integrations(cfg) else { - return false; - }; - - let new_hash = crate::openhuman::composio::connected_set_hash(&cache_view); - if new_hash == self.last_seen_integrations_hash { - return false; - } - - log::info!( - "[agent_loop] composio set changed ({trigger}) hash {:x} -> {:x}; refreshing delegation schema (system prompt unchanged for KV cache)", - self.last_seen_integrations_hash, - new_hash - ); - - let prev_integrations = std::mem::replace(&mut self.connected_integrations, cache_view); - if self.refresh_delegation_tools() { - self.last_seen_integrations_hash = new_hash; - self.connected_integrations_initialized = true; - // Surface newly-connected toolkits onto the next user message so - // the model acts on them on the FIRST post-connect ask instead of - // refusing from stale chat context. Schema-only refresh already - // updated the enum; this closes the prose/decision gap. - let connected_slugs: Vec = self - .connected_integrations - .iter() - .map(|i| i.toolkit.clone()) - .collect(); - // Append (don't overwrite) so a second connect before the next - // user turn doesn't drop the first one's announcement. Slugs are - // already de-duped against `announced_integrations`, but guard the - // pending list too in case the same slug is re-queued. - for slug in newly_connected_slugs(&connected_slugs, &mut self.announced_integrations) { - if !self.pending_integration_announcement.contains(&slug) { - self.pending_integration_announcement.push(slug); - } - } - true - } else { - self.connected_integrations = prev_integrations; - false - } - } - - /// Re-synthesise `delegate_*` tools for the orchestrator's `subagents` - /// declaration using the live `connected_integrations` slice, and - /// reconcile the resulting set into `self.tools` / `self.tool_specs` / - /// `self.visible_tool_specs` / `self.visible_tool_names`. - /// - /// **Reconciliation strategy** — full rebuild of the synthesised - /// subset: - /// - /// 1. Drop every tool whose name was in [`Self::synthesized_tool_names`] - /// from the previous synthesis. Direct tools (`query_memory`, - /// `cron_add`, …) are untouched because their names are not in - /// that set. - /// 2. Append the freshly collected synthesis output verbatim. - /// 3. Replace `synthesized_tool_names` with the new set so the - /// next refresh has a clean mask to undo. - /// - /// This is safer than appending-only or strict-diff reconcile: - /// - /// * Stale tools after a revoke can never leak — anything from the - /// previous synthesis is unconditionally dropped, the new set is - /// authoritative. - /// * Direct tools can never be accidentally removed — only names - /// in `synthesized_tool_names` are touched. - /// * Duplicate registration is impossible — retain+extend - /// guarantees every final entry is either a non-synthesised - /// direct tool or a member of the fresh `synthed` set. - /// - /// **When to call**: on turn 1 only when the session was built - /// without a prewarmed Composio cache snapshot, and on any - /// subsequent turn where the connection set has changed since the - /// last reconcile (detected via - /// [`Self::last_seen_integrations_hash`] vs. - /// [`crate::openhuman::composio::cached_active_integrations`]). - /// - /// **Shared-Arc behavior**: when `self.tools` is currently shared - /// (e.g. an in-flight turn cloned the Arc into its tool source), we - /// still refresh `self.tool_specs` / `self.visible_tool_specs` so the - /// provider-facing schema updates immediately. The executable tool - /// registry is refreshed only when `self.tools` has unique ownership. - /// This keeps same-turn routing unblocked while preserving ownership - /// safety for non-cloneable `Box` values. - /// - /// **Return value** — `true` when schema reconciliation succeeded (or - /// no reconcile was needed). Returns `false` only when a non-shared - /// reconcile path failed unexpectedly. - pub fn refresh_delegation_tools(&mut self) -> bool { - use crate::openhuman::agent::harness::definition::AgentDefinitionRegistry; - use crate::openhuman::tools::orchestrator_tools::collect_orchestrator_tools; - - let Some(reg) = AgentDefinitionRegistry::global() else { - // No registry — there's nothing we can do until the - // registry is initialised. The agent's surface stays at - // whatever the builder produced; callers can safely treat - // this as "no reconcile needed right now". - return true; - }; - let Some(def) = reg.get(&self.agent_definition_id) else { - log::debug!( - "[agent] refresh_delegation_tools: definition '{}' not in registry — skipping", - self.agent_definition_id - ); - return true; - }; - if def.subagents.is_empty() { - return true; - } - - let synthed = collect_orchestrator_tools(def, reg, &self.connected_integrations); - let synthed_names: std::collections::HashSet = - synthed.iter().map(|t| t.name().to_string()).collect(); - let synthed_specs: Vec = - synthed.iter().map(|t| t.spec()).collect(); - - // Skip mutation when neither the previous nor the next synthesis - // produced any names — saves work on agents without dynamic - // delegation. - if self.synthesized_tool_names.is_empty() && synthed_names.is_empty() { - return true; - } - - // Mask of the previous synthesis — the names whose `tool_specs` are - // currently live (this set is kept in lock-step with `tool_specs`). - let old_synth = std::mem::take(&mut self.synthesized_tool_names); - - // `tool_specs` are plain data and therefore cloneable; we can always - // reconcile schema even when the Arc is shared. Drop exactly the - // previous synthesised spec set, then append the fresh one. - { - let specs_vec = Arc::make_mut(&mut self.tool_specs); - specs_vec.retain(|s| !old_synth.contains(&s.name)); - specs_vec.extend(synthed_specs); - } - - // `tools` contains non-cloneable trait objects. Reconcile it only when - // uniquely owned. The set of stale synthesised *instances* to drop is - // the previous synthesis (`old_synth`) plus any instances a prior - // shared-Arc refresh couldn't remove (`pending_synthesized_tools_mask`). - let tools_remove_mask: std::collections::HashSet = old_synth - .iter() - .chain(self.pending_synthesized_tools_mask.iter()) - .cloned() - .collect(); - let tools_reconciled = if let Some(tools_vec) = Arc::get_mut(&mut self.tools) { - tools_vec.retain(|t| !tools_remove_mask.contains(t.name())); - tools_vec.extend(synthed); - // `tools` now matches `tool_specs` exactly — nothing pending. - self.pending_synthesized_tools_mask.clear(); - true - } else { - // Schema (`tool_specs`) was updated to the new set, but the stale - // tool *instances* still sit in `self.tools`. Record their names - // so the next unique-owner refresh removes them. Crucially we do - // NOT roll `synthesized_tool_names` back to `old_synth` here — that - // would desync it from `tool_specs` and cause duplicate specs on - // the following refresh (#3044). - self.pending_synthesized_tools_mask = tools_remove_mask; - log::warn!( - "[agent] refresh_delegation_tools: tools Arc is shared — refreshed schema only \ - ({} synthesised tool name(s)); {} stale tool instance(s) pending removal on the next unique-owner refresh", - synthed_names.len(), - self.pending_synthesized_tools_mask.len() - ); - false - }; - - // `visible_tool_names` carries an explicit allowlist for - // [`ToolScope::Named`] agents. Drop the previously-synthesised - // names and add the new ones so the visible set tracks the - // tool list. Wildcard-scope agents keep this empty ("no - // filter") and never need touching. - if !self.visible_tool_names.is_empty() { - for name in &old_synth { - self.visible_tool_names.remove(name); - } - for name in &synthed_names { - self.visible_tool_names.insert(name.clone()); - } - } - - // Rebuild the visible-spec cache from the new tool_specs so the - // next provider call carries the reconciled schema. Dedup - // afterward so a delegate synthesised here (e.g. - // `delegate_name = "research"`) doesn't collide with a - // same-named skill tool on the wire — Anthropic 400s on dup - // tool names where OpenHuman's backend silently accepts. - self.rebuild_tool_policy_session(); - - // Compute add/remove deltas for the log line — useful when - // diagnosing a Composio connect/revoke that should have rebuilt - // the surface but didn't. Materialise to owned `Vec` - // so we can move `synthed_names` into `self.synthesized_tool_names` - // below without the log-statement reborrow blocking the move. - let added: Vec = synthed_names - .iter() - .filter(|n| !old_synth.contains(n.as_str())) - .cloned() - .collect(); - let removed: Vec = old_synth - .iter() - .filter(|n| !synthed_names.contains(n.as_str())) - .cloned() - .collect(); - - // `tool_specs` always reconciled to the new set, so the name mask must - // track that set unconditionally — whether or not `tools` (the - // executable instances) could be reconciled this pass. - self.synthesized_tool_names = synthed_names.clone(); - - log::info!( - "[agent] refresh_delegation_tools: reconciled delegation schema for agent '{}' (display='{}'); now {} synthesised tool name(s); added={:?} removed={:?} tools_reconciled={} pending_tool_instances={}", - self.agent_definition_id, - self.agent_definition_name, - synthed_names.len(), - added, - removed, - tools_reconciled, - self.pending_synthesized_tools_mask.len() - ); - true - } - - /// Builds the system prompt for the current turn, including tool - /// instructions and learned context. - pub fn build_system_prompt(&self, learned: LearnedContextData) -> Result { - let tools_slice: &[Box] = self.tools.as_slice(); - let instructions = self - .tool_dispatcher - .prompt_instructions_for_specs(self.visible_tool_specs.as_slice()) - .unwrap_or_else(|| self.tool_dispatcher.prompt_instructions(tools_slice)); - // Adapt the owned Box slice into the shared PromptTool - // shape that every prompt-building call-site uses. Temporary vec - // borrows from `tools_slice` and lives for the duration of the - // prompt build. - let prompt_tools = PromptTool::from_tools(tools_slice); - let prompt_visible_tool_names = self.tool_policy_session.visible_tool_names_for_prompt(); - let ctx = PromptContext { - workspace_dir: &self.workspace_dir, - model_name: &self.model_name, - agent_id: &self.agent_definition_name, - tools: &prompt_tools, - skills: &self.skills, - dispatcher_instructions: &instructions, - learned, - visible_tool_names: &prompt_visible_tool_names, - tool_call_format: self.tool_dispatcher.tool_call_format(), - connected_integrations: &self.connected_integrations, - connected_identities_md: crate::openhuman::agent::prompts::render_connected_identities( - ), - include_profile: !self.omit_profile, - include_memory_md: !self.omit_memory_md, - curated_snapshot: None, - user_identity: crate::openhuman::app_state::peek_cached_current_user_identity(), - // TODO(phase-2): Wire personality context into the live agent turn. - // Currently personalities only take effect during delegate_to_personality sub-agent runs. - // To activate: load the active profile via AgentProfileStore::resolve(), build - // PersonalityContext::from_profile(), and populate these fields. - personality_soul_md: None, // TODO: personality_ctx.soul_md_override - personality_memory_md: None, // TODO: personality_ctx.memory_md_override - personality_roster: vec![], // TODO: build_personality_roster(&workspace_dir) - }; - // Route through the global context manager so every - // prompt-building call-site — main agent, sub-agent runner, - // channel runtimes — shares one builder configuration. - let mut prompt = self.context.build_system_prompt(&ctx)?; - if let Some(boundary) = render_tool_policy_boundary(&self.tool_policy_session, 2048) { - prompt = format!("{boundary}\n\n{prompt}"); - } - Ok(prompt) - } - - // ───────────────────────────────────────────────────────────────── - // Session transcript helpers - // ───────────────────────────────────────────────────────────────── - - /// Try to load a previous session transcript for KV cache resume. - /// - /// Best-effort: failures are logged and silently ignored. - pub(super) fn try_load_session_transcript(&mut self) { - match transcript::find_latest_transcript(&self.workspace_dir, &self.agent_definition_name) { - Some(path) => { - log::info!( - "[transcript] found previous transcript path={}", - path.display() - ); - match transcript::read_transcript(&path) { - Ok(session) => { - if session.messages.is_empty() { - log::debug!( - "[transcript] previous transcript is empty — skipping resume" - ); - return; - } - let loaded_count = session.messages.len(); - log::info!("[transcript] loaded {} messages for resume", loaded_count); - let bounded = self.bound_cached_transcript_messages(session.messages); - if bounded.len() < loaded_count { - log::warn!( - "[transcript] resume prefix trimmed from {} to {} messages (max_history_messages={})", - loaded_count, - bounded.len(), - self.config.max_history_messages - ); - } - self.cached_transcript_messages = Some(bounded); - } - Err(err) => { - log::warn!( - "[transcript] failed to parse previous transcript {}: {err}", - path.display() - ); - } - } - } - None => { - log::debug!( - "[transcript] no previous transcript found for agent={}", - self.agent_definition_name - ); - } - } - } - - /// Ask the provider for a resumable checkpoint summary when a turn - /// hits the tool-call iteration cap, with native tools **disabled** so - /// the model returns prose rather than another tool call. Streams text - /// deltas to the progress sink (when attached) so the checkpoint - /// appears in the UI like any other reply. - /// - /// Returns the summary text (empty when the provider call fails or - /// yields nothing — the caller then falls back to - /// [`build_deterministic_checkpoint`] so the thread is never left on an - /// unterminated tool cycle, bug-report-2026-05-26 A1) **paired with the - /// provider usage** for this extra call, so the caller can fold it into - /// the turn's cumulative token/cost accounting instead of silently - /// dropping it. - async fn summarize_iteration_checkpoint( - &self, - base_messages: &[ChatMessage], - effective_model: &str, - iteration_for_stream: u32, - ) -> (String, Option) { - let mut messages = base_messages.to_vec(); - messages.push(ChatMessage::user(MAX_ITER_CHECKPOINT_INSTRUCTION)); - - // Mirror the main loop's streaming sink so the checkpoint renders - // incrementally. Only text deltas are relevant here (tools are - // disabled for this call). - let (delta_tx_opt, delta_forwarder) = if self.on_progress.is_some() { - let (tx, mut rx) = tokio::sync::mpsc::channel::(128); - let progress_tx = self.on_progress.clone(); - let forwarder = tokio::spawn(async move { - while let Some(event) = rx.recv().await { - let Some(ref sink) = progress_tx else { - continue; - }; - if let ProviderDelta::TextDelta { delta } = event { - if sink - .send(AgentProgress::TextDelta { - delta, - iteration: iteration_for_stream, - }) - .await - .is_err() - { - break; - } - } - } - }); - (Some(tx), Some(forwarder)) - } else { - (None, None) - }; - - let result = self - .provider - .chat( - ChatRequest { - messages: &messages, - tools: None, - stream: delta_tx_opt.as_ref(), - }, - effective_model, - self.temperature, - ) - .await; - drop(delta_tx_opt); - if let Some(handle) = delta_forwarder { - let _ = handle.await; - } - - match result { - Ok(resp) => { - let usage = resp.usage.clone(); - // Strip any stray tool-call XML a text-mode model may have - // emitted; keep only the prose. - let (text, calls) = self.tool_dispatcher.parse_response(&resp); - let checkpoint = if !text.trim().is_empty() { - text - } else if calls.is_empty() { - // No tool-call markup was present, so the raw text (if - // any) is genuine prose — safe to use. - resp.text.unwrap_or_default() - } else { - // `parse_response` stripped tool-call markup and left no - // prose. Do NOT re-emit `resp.text` here: it would persist - // the raw `…` markup verbatim as the checkpoint. - // Return empty so the caller uses the deterministic - // fallback instead (bug-report-2026-05-26 A1). - String::new() - }; - (checkpoint, usage) - } - Err(e) => { - log::warn!("[agent_loop] checkpoint summary call failed: {e:#}"); - (String::new(), None) - } - } - } - - /// Persist the exact provider messages as a session transcript. - /// - /// Writes JSONL as source of truth and re-renders the companion `.md` - /// for human readability. Best-effort: failures are logged and silently - /// ignored. The JSONL conversation store remains the authoritative - /// persistence layer; session transcripts are an optimization for KV - /// cache stability. - /// - /// `turn_usage` — when `Some`, attributes per-message token/cost figures - /// to the last assistant message in the written transcript. - pub(super) fn persist_session_transcript( - &mut self, - messages: &[ChatMessage], - input_tokens: u64, - output_tokens: u64, - cached_input_tokens: u64, - charged_amount_usd: f64, - turn_usage: Option<&transcript::TurnUsage>, - ) { - // Resolve the transcript path on first write. The stem is - // `{parent_prefix}__{session_key}` for sub-agents (producing a - // flat hierarchical filename) or just `{session_key}` for a - // root session. Prefix chaining is already done by the - // sub-agent runner when it populates `session_parent_prefix`. - if self.session_transcript_path.is_none() { - let stem = match &self.session_parent_prefix { - Some(prefix) => format!("{}__{}", prefix, self.session_key), - None => self.session_key.clone(), - }; - match transcript::resolve_keyed_transcript_path(&self.workspace_dir, &stem) { - Ok(path) => { - log::info!( - "[transcript] new session transcript path={}", - path.display() - ); - self.session_transcript_path = Some(path); - } - Err(err) => { - log::warn!("[transcript] failed to resolve transcript path: {err}"); - return; - } - } - } - - let path = self.session_transcript_path.as_ref().unwrap(); - let now = chrono::Utc::now().to_rfc3339(); - - let meta = transcript::TranscriptMeta { - agent_name: self.agent_definition_name.clone(), - dispatcher: if self.tool_dispatcher.should_send_tool_specs() { - "native".into() - } else { - "xml".into() - }, - created: now.clone(), - updated: now, - turn_count: self.context.stats().session_memory_current_turn as usize, - input_tokens, - output_tokens, - cached_input_tokens, - charged_amount_usd, - thread_id: crate::openhuman::inference::provider::thread_context::current_thread_id(), - }; - - if let Err(err) = transcript::write_transcript(path, messages, &meta, turn_usage) { - log::warn!( - "[transcript] failed to write transcript {}: {err}", - path.display() - ); - } - } - - // ───────────────────────────────────────────────────────────────── - // Session-memory extraction (stage 5 of the context pipeline) - // ───────────────────────────────────────────────────────────────── - - /// Spawn a background archivist sub-agent to extract durable facts - /// from the recent conversation into `MEMORY.md`. Fire-and-forget. - /// - /// Gated by [`context_pipeline::SessionMemoryState::should_extract`] - /// — see its docs for the threshold invariants. Safe to call from - /// inside `turn()` after the turn body has settled. - pub(super) async fn spawn_session_memory_extraction(&mut self) { - // ── Flush the trailing open segment before the session winds down ── - // - // The ArchivistHook manages per-turn segment lifecycle but cannot - // force-close the *last* open segment because there is no explicit - // "session end" event in the turn loop. `spawn_session_memory_extraction` - // is the closest available signal: it fires when the context manager - // decides the session has accumulated enough material to archive. - // - // GUARANTEE: the flush is *awaited* here (not fire-and-forget) so - // the trailing segment always receives its recap + embedding + tree - // ingest before the function returns, even during runtime wind-down. - // This honours the doc-comment guarantee on `flush_open_segment` in - // `archivist.rs`. No deadlock risk: no mutex guard is held across - // this await point. - if let Some(ref archivist) = self.archivist_hook { - let session_id = self.event_session_id.clone(); - log::debug!( - "[archivist] awaiting flush_open_segment for session={session_id} at session wind-down" - ); - archivist.flush_open_segment(&session_id).await; - } - - let Some(registry) = harness::AgentDefinitionRegistry::global() else { - log::debug!("[session_memory] registry not initialised — skipping extraction spawn"); - return; - }; - let Some(definition) = registry.get("archivist").cloned() else { - log::debug!( - "[session_memory] archivist definition not found — skipping extraction spawn" - ); - return; - }; - - // Build a dedicated ParentExecutionContext for the background - // task. The in-progress turn's context has already been - // consumed by the `with_parent_context` scope above, so this is - // a fresh snapshot. - let parent_ctx = self.build_parent_execution_context(); - let extraction_prompt = ARCHIVIST_EXTRACTION_PROMPT.to_string(); - - // Flip the extraction state to "in-progress" so future - // should_extract checks return false until the archivist - // finishes. We then hand a shared handle to the spawned task - // so it can mark the extraction complete (resets deltas) on - // success, or failed (keeps deltas intact for retry) on error. - // This replaces the old optimistic `mark_complete` that - // silently dropped the retry window when extractions failed. - let stats_snapshot = self.context.stats(); - self.context.mark_session_memory_started(); - let sm_handle = self.context.session_memory_handle(); - - log::info!( - "[session_memory] spawning background archivist extraction (turn={}, tokens={})", - stats_snapshot.session_memory_current_turn, - stats_snapshot.session_memory_total_tokens - ); - - tokio::spawn(async move { - let options = harness::SubagentRunOptions::default(); - let fut = harness::run_subagent(&definition, &extraction_prompt, options); - let result = harness::with_parent_context(parent_ctx, fut).await; - match result { - Ok(outcome) => { - tracing::info!( - agent_id = %outcome.agent_id, - task_id = %outcome.task_id, - iterations = outcome.iterations, - output_chars = outcome.output.chars().count(), - "[session_memory] archivist extraction completed" - ); - if let Ok(mut sm) = sm_handle.lock() { - sm.mark_extraction_complete(); - } - } - Err(err) => { - tracing::warn!( - error = %err, - "[session_memory] archivist extraction failed — will retry after next threshold crossing" - ); - // Leave the deltas intact so the next threshold - // crossing schedules another attempt. Clearing - // `extraction_in_progress` lets the retry - // actually fire. - if let Ok(mut sm) = sm_handle.lock() { - sm.mark_extraction_failed(); - } - } - } - }); - } - - /// Spawn a background task that ingests the current session - /// transcript into the conversational-memory store. - /// - /// Issue #1399: complements `spawn_session_memory_extraction`. The - /// archivist path writes dense bullets into `MEMORY.md`; this path - /// extracts importance-tagged, provenance-bearing memories via the - /// heuristic [`crate::openhuman::learning::transcript_ingest`] - /// pipeline. The two are deliberately independent so the prompt - /// retrieval layer can pull from `conversation_memory` without - /// needing the archivist's extraction to have fired this session. - /// - /// Fire-and-forget: failures are logged, never propagated. - pub(super) fn spawn_transcript_ingestion(&self) { - let Some(path) = self.session_transcript_path.clone() else { - log::debug!("[transcript_ingest] no session transcript path yet — skipping spawn"); - return; - }; - let memory = std::sync::Arc::clone(&self.memory); - - tokio::spawn(async move { - match crate::openhuman::learning::transcript_ingest::ingest_transcript_path( - memory.as_ref(), - &path, - ) - .await - { - Ok(report) => tracing::info!( - transcript = %path.display(), - extracted = report.extracted, - stored = report.stored, - deduped = report.deduped, - reflections_stored = report.reflections_stored, - "[transcript_ingest] background ingest complete" - ), - Err(err) => tracing::warn!( - transcript = %path.display(), - error = %err, - "[transcript_ingest] background ingest failed — will retry next threshold window" - ), - } - }); - } -} - -/// Wrapper around -/// [`crate::openhuman::memory_tree::tree_runtime::store::collect_root_summaries_with_caps`] -/// that takes user-resolved per-namespace and total caps. The actual -/// limits are derived from the active -/// [`crate::openhuman::config::schema::agent::MemoryContextWindow`] -/// preset by [`crate::openhuman::config::schema::agent::AgentConfig::resolved_memory_limits`]. -fn collect_tree_root_summaries( - workspace_dir: &std::path::Path, - per_namespace_cap: usize, - total_cap: usize, -) -> Vec { - crate::openhuman::memory_tree::tree_runtime::store::collect_root_summaries_with_caps( - workspace_dir, - per_namespace_cap, - total_cap, - ) - .into_iter() - .map(|(namespace, body, updated_at)| NamespaceSummary { - namespace, - body, - updated_at, - }) - .collect() -} - -/// Sanitize a learned memory entry before injecting into the system prompt. -/// Strips raw data, limits length, and removes potential secrets. -fn sanitize_learned_entry(content: &str) -> String { - let trimmed = content.trim(); - if trimmed.is_empty() { - return String::new(); - } - // Truncate to a safe length - let max_len = 200; - let sanitized: String = trimmed.chars().take(max_len).collect(); - // Strip anything that looks like a secret/token - if sanitized.contains("Bearer ") - || sanitized.contains("sk-") - || sanitized.contains("ghp_") - || sanitized.contains("-----BEGIN") - { - return "[redacted: potential secret]".to_string(); - } - sanitized -} - -#[cfg(test)] -#[path = "turn_tests.rs"] -mod tests; diff --git a/src/openhuman/agent/harness/session/turn/context.rs b/src/openhuman/agent/harness/session/turn/context.rs new file mode 100644 index 0000000000..85cbbd2f9f --- /dev/null +++ b/src/openhuman/agent/harness/session/turn/context.rs @@ -0,0 +1,325 @@ +//! History, context, and system prompt management. + +use super::super::turn_checkpoint::assistant_message_has_tool_calls; +use super::super::types::Agent; +use super::{collect_tree_root_summaries, sanitize_learned_entry}; +use crate::openhuman::agent_tool_policy::render_tool_policy_boundary; +use crate::openhuman::context::prompt::{LearnedContextData, PromptContext, PromptTool}; +use crate::openhuman::inference::provider::{ChatMessage, ConversationMessage}; +use crate::openhuman::memory::MemoryCategory; +use crate::openhuman::tools::Tool; + +use anyhow::Result; + +impl Agent { + // ───────────────────────────────────────────────────────────────── + // History & prompt helpers + // ───────────────────────────────────────────────────────────────── + + /// Truncates the conversation history to the configured maximum message count. + /// + /// System messages are always preserved. Older non-system messages are + /// dropped first. + pub(in super::super) fn trim_history(&mut self) { + let max = self.config.max_history_messages; + if self.history.len() <= max { + return; + } + + let mut system_messages = Vec::new(); + let mut other_messages = Vec::new(); + + for msg in self.history.drain(..) { + match &msg { + ConversationMessage::Chat(chat) if chat.role == "system" => { + system_messages.push(msg); + } + _ => other_messages.push(msg), + } + } + + if other_messages.len() > max { + let drop_count = other_messages.len() - max; + other_messages.drain(0..drop_count); + } + + // A cut that lands *between* an `AssistantToolCalls` and its + // `ToolResults` leaves the window opening on an orphaned `ToolResults`. + // Serialized, that is a `tool` message with no preceding `tool_calls`, + // which the provider rejects with a 400 (the response streams back + // empty and surfaces to the user as "Something went wrong"). Snap the + // boundary forward past any leading orphaned results so the window + // always starts on a clean turn (a `Chat` or an `AssistantToolCalls`). + let orphan_lead = other_messages + .iter() + .take_while(|m| matches!(m, ConversationMessage::ToolResults(_))) + .count(); + if orphan_lead > 0 { + log::debug!( + "[agent] trim_history snapped window past {orphan_lead} orphaned ToolResults \ + (tool-cycle bisected by the {max}-message cap)" + ); + other_messages.drain(0..orphan_lead); + } + + self.history = system_messages; + self.history.extend(other_messages); + } + + /// Bound a resumed transcript prefix to the agent history window. + /// + /// Resume paths may load a long prior transcript directly into + /// `cached_transcript_messages` (provider-ready `ChatMessage`s), which + /// bypasses `self.history`-based trimming/reduction. Keep at most + /// `max_history_messages` entries while preserving the leading system + /// message when present. + pub(in super::super) fn bound_cached_transcript_messages( + &self, + messages: Vec, + ) -> Vec { + let max = self.config.max_history_messages.max(1); + if messages.len() <= max { + return messages; + } + + let has_system = matches!(messages.first(), Some(msg) if msg.role == "system"); + let keep_tail = if has_system { + max.saturating_sub(1) + } else { + max + }; + let start = messages.len().saturating_sub(keep_tail); + + // Same hazard as `trim_history`: the tail slice can open on a `tool` + // message whose `tool_calls` opener fell outside the window, which the + // provider rejects. Advance past any leading orphaned `tool` results so + // the window starts on a clean turn. + let tail = &messages[start..]; + let orphan_lead = tail.iter().take_while(|m| m.role == "tool").count(); + if orphan_lead > 0 { + log::debug!( + "[agent] bound_cached_transcript_messages snapped window past {orphan_lead} \ + orphaned tool result(s) (tool-cycle bisected by the {max}-message cap)" + ); + } + let tail = &tail[orphan_lead..]; + + let mut bounded = Vec::with_capacity(tail.len() + usize::from(has_system)); + if has_system { + bounded.push(messages[0].clone()); + } + bounded.extend(tail.iter().cloned()); + + // TAURI-RUST-7: symmetric guard to the leading-orphan strip above. A + // resumed transcript that ends on an `assistant` message containing + // `tool_calls` (because the cached transcript was captured mid-cycle, + // before the tool responses were persisted) is rejected by the + // provider with `400 An assistant message with 'tool_calls' must be + // followed by tool messages`. Pop any such trailing assistant + // tool_calls so the bounded transcript ends on a clean turn boundary. + let mut dropped_tail = 0usize; + while bounded + .last() + .map(assistant_message_has_tool_calls) + .unwrap_or(false) + { + bounded.pop(); + dropped_tail += 1; + } + if dropped_tail > 0 { + log::debug!( + "[agent] bound_cached_transcript_messages stripped {dropped_tail} trailing \ + assistant tool_calls message(s) without paired tool responses" + ); + } + + bounded + } + + /// Pre-fetches learned context data from memory (observations, patterns, user profile). + /// + /// This is an async, non-blocking operation that populates the context + /// for the system prompt. + /// + /// # Explicit-preferences narrow path + /// + /// When `learning_enabled` is `false` but `explicit_preferences_enabled` + /// is `true`, only the `user_profile` namespace (pinned preferences from + /// the `remember_preference` tool) is fetched and returned. All other + /// inference-derived data (observations, patterns, reflections, tree + /// summaries) remains empty — the inference stack is not touched. + pub(in super::super) async fn fetch_learned_context(&self) -> LearnedContextData { + // Fast path: neither the full learning subsystem nor the explicit + // preferences path is active — skip all memory reads. + if !self.learning_enabled && !self.explicit_preferences_enabled { + tracing::debug!( + "[learning] fetch_learned_context: both learning_enabled and \ + explicit_preferences_enabled are false — returning empty context" + ); + return LearnedContextData::default(); + } + + // Narrow explicit-preferences path (Lane A): inject the latest-N general + // (always-on) preferences written via `save_preference`. Topic-scoped + // (situational) prefs are NOT injected here — they ride the user message + // via per-turn recall (Lane B). The legacy `user_profile` pinned namespace + // is no longer read here; explicit prefs now live in `user_pref_general`. + if !self.learning_enabled && self.explicit_preferences_enabled { + let general = crate::openhuman::memory::preferences::load_general_preferences( + &self.memory, + crate::openhuman::memory::preferences::STANDING_PREFS_LIMIT, + ) + .await; + tracing::debug!( + "[learning] fetch_learned_context: explicit_preferences_enabled — loaded {} general preference(s) for the system prompt", + general.len() + ); + return LearnedContextData { + user_profile: general, + ..LearnedContextData::default() + }; + } + + // Full learning path: fetch all inference-derived data. + tracing::debug!( + "[learning] fetch_learned_context: learning_enabled=true — fetching full context" + ); + + let obs_entries = self + .memory + .list( + Some("learning_observations"), + Some(&MemoryCategory::Custom("learning_observations".into())), + None, + ) + .await + .unwrap_or_default(); + + let pat_entries = self + .memory + .list( + Some("learning_patterns"), + Some(&MemoryCategory::Custom("learning_patterns".into())), + None, + ) + .await + .unwrap_or_default(); + + // Standing preferences come from the explicit two-lane store (Lane A), + // not the inferred `user_profile` facets — those are demoted: no longer + // injected as ground truth. A high-confidence inferred facet should be + // *proposed* to the user (and pinned via `save_preference` on + // confirmation), not silently treated as a standing preference. + let general = crate::openhuman::memory::preferences::load_general_preferences( + &self.memory, + crate::openhuman::memory::preferences::STANDING_PREFS_LIMIT, + ) + .await; + + // Explicit user reflections — privileged memory class. Pulled + // separately from observations/patterns so the prompt assembly + // can render them ahead of generic tree summaries. + let reflection_entries = self + .memory + .list( + Some(crate::openhuman::learning::reflection::REFLECTIONS_NAMESPACE), + Some(&MemoryCategory::Custom( + crate::openhuman::learning::reflection::REFLECTIONS_NAMESPACE.into(), + )), + None, + ) + .await + .unwrap_or_default(); + + // Pull every namespace's root-level summary from the tree + // summarizer. This is the densest user memory we can hand the + // orchestrator: each root holds up to 20 000 tokens of distilled + // long-term context. Done synchronously here because the calls + // are filesystem reads, not provider/network round-trips, and + // happen exactly once per session (only on the first turn). + // + // Per-namespace + total caps come from the user-facing memory + // window preset on `AgentConfig` so changing the slider in the + // UI takes effect on the very next session-start. + let limits = self.config.resolved_memory_limits(); + let tree_root_summaries = collect_tree_root_summaries( + &self.workspace_dir, + limits.per_namespace_max_chars, + limits.total_tree_max_chars, + ); + + LearnedContextData { + observations: obs_entries + .iter() + .rev() + .take(5) + .map(|e| sanitize_learned_entry(&e.content)) + .collect(), + patterns: pat_entries + .iter() + .take(3) + .map(|e| sanitize_learned_entry(&e.content)) + .collect(), + user_profile: general, + // Cap reflections at 10 to keep the privileged section + // bounded — the issue requires reflections improve context + // rather than flood it. Newest first. + reflections: reflection_entries + .iter() + .rev() + .take(10) + .map(|e| sanitize_learned_entry(&e.content)) + .collect(), + tree_root_summaries, + } + } + + /// Builds the system prompt for the current turn, including tool + /// instructions and learned context. + pub fn build_system_prompt(&self, learned: LearnedContextData) -> Result { + let tools_slice: &[Box] = self.tools.as_slice(); + let instructions = self + .tool_dispatcher + .prompt_instructions_for_specs(self.visible_tool_specs.as_slice()) + .unwrap_or_else(|| self.tool_dispatcher.prompt_instructions(tools_slice)); + // Adapt the owned Box slice into the shared PromptTool + // shape that every prompt-building call-site uses. Temporary vec + // borrows from `tools_slice` and lives for the duration of the + // prompt build. + let prompt_tools = PromptTool::from_tools(tools_slice); + let prompt_visible_tool_names = self.tool_policy_session.visible_tool_names_for_prompt(); + let ctx = PromptContext { + workspace_dir: &self.workspace_dir, + model_name: &self.model_name, + agent_id: &self.agent_definition_name, + tools: &prompt_tools, + skills: &self.skills, + dispatcher_instructions: &instructions, + learned, + visible_tool_names: &prompt_visible_tool_names, + tool_call_format: self.tool_dispatcher.tool_call_format(), + connected_integrations: &self.connected_integrations, + connected_identities_md: crate::openhuman::agent::prompts::render_connected_identities( + ), + include_profile: !self.omit_profile, + include_memory_md: !self.omit_memory_md, + curated_snapshot: None, + user_identity: crate::openhuman::app_state::peek_cached_current_user_identity(), + // TODO(phase-2): Wire personality context into the live agent turn. + // Currently personalities only take effect during delegate_to_personality sub-agent runs. + // To activate: load the active profile via AgentProfileStore::resolve(), build + // PersonalityContext::from_profile(), and populate these fields. + personality_soul_md: None, // TODO: personality_ctx.soul_md_override + personality_memory_md: None, // TODO: personality_ctx.memory_md_override + personality_roster: vec![], // TODO: build_personality_roster(&workspace_dir) + }; + // Route through the global context manager so every + // prompt-building call-site — main agent, sub-agent runner, + // channel runtimes — shares one builder configuration. + let mut prompt = self.context.build_system_prompt(&ctx)?; + if let Some(boundary) = render_tool_policy_boundary(&self.tool_policy_session, 2048) { + prompt = format!("{boundary}\n\n{prompt}"); + } + Ok(prompt) + } +} diff --git a/src/openhuman/agent/harness/session/turn/core.rs b/src/openhuman/agent/harness/session/turn/core.rs new file mode 100644 index 0000000000..b17e9166c2 --- /dev/null +++ b/src/openhuman/agent/harness/session/turn/core.rs @@ -0,0 +1,615 @@ +//! Core turn execution: the main `turn()` method and `inject_agent_experience_context()`. + +use super::super::transcript; +use super::super::turn_engine_adapter::{AgentCheckpoint, AgentObserver, AgentToolSource}; +use super::super::types::Agent; +use super::{integration_announcement_note, normalize_tool_call}; +use crate::openhuman::agent::harness; +use crate::openhuman::agent::hooks::{self, TurnContext}; +use crate::openhuman::agent::memory_loader::collect_recall_citations; +use crate::openhuman::agent::progress::AgentProgress; +use crate::openhuman::agent_experience::{ + prepend_experience_block, render_experience_hits, AgentExperienceStore, ExperienceQuery, +}; +use crate::openhuman::inference::provider::{ChatMessage, ConversationMessage}; +use crate::openhuman::memory::MemoryCategory; +use crate::openhuman::util::truncate_with_ellipsis; + +use anyhow::Result; +use std::hash::{Hash, Hasher}; +use std::sync::Arc; + +impl Agent { + /// Executes a single interaction "turn" with the agent. + /// + /// This function is the primary driver of the agent's behavior. It manages the + /// end-to-end lifecycle of a user request: + /// + /// 1. **Initialization**: Resumes from a session transcript if this is a new turn + /// to preserve KV-cache stability. + /// 2. **Prompt Construction**: Builds the system prompt (only on the first turn) + /// incorporating learned context and tool instructions. + /// 3. **Context Injection**: Enriches the user message with relevant memories + /// fetched via the [`MemoryLoader`]. + /// 4. **Execution Loop**: Enters a loop (up to `max_tool_iterations`) where it: + /// - Manages the context window (reduction/summarization). + /// - Calls the LLM provider. + /// - Parses and executes tool calls. + /// - Accumulates results into history. + /// 5. **Synthesis**: Returns the final assistant response after all tools have + /// finished or the iteration budget is exhausted. + /// 6. **Background Tasks**: Triggers episodic memory indexing and facts + /// extraction asynchronously. + pub async fn turn(&mut self, user_message: &str) -> Result { + let turn_started = std::time::Instant::now(); + self.emit_progress(AgentProgress::TurnStarted).await; + log::info!("[agent] turn started — awaiting user message processing"); + log::info!( + "[agent_loop] turn start message_chars={} history_len={} max_tool_iterations={}", + user_message.chars().count(), + self.history.len(), + self.config.max_tool_iterations + ); + self.ensure_composio_integrations_listener(); + // ── Session transcript resume ───────────────────────────────── + // On a fresh session (empty history), look for a previous + // transcript to pre-populate the exact provider messages for + // KV cache prefix reuse. + if self.history.is_empty() && self.cached_transcript_messages.is_none() { + self.try_load_session_transcript(); + } + + if self.history.is_empty() { + // Learned context is only baked into the system prompt on the + // very first turn — once the history is non-empty we reuse the + // stored prompt verbatim to preserve the KV-cache prefix the + // inference backend has already tokenised. Fetching it later + // would just burn memory-store reads on data we throw away. + if !self.connected_integrations_initialized { + self.fetch_connected_integrations().await; + // Sessions born without a cached Composio view still need + // a one-shot delegation-surface reconcile before the system + // prompt is frozen. The shared-Arc failure path returns + // `false`, but on turn 1 the Arc should still be uniquely + // owned; a `false` return here indicates a programmer error + // and the warn-level log inside the helper already surfaces + // it, so we keep the existing best-effort contract. + let _ = self.refresh_delegation_tools(); + } + let learned = self.fetch_learned_context().await; + let rendered_prompt = self.build_system_prompt(learned)?; + log::info!("[agent] system prompt built — initialising conversation history"); + log::info!( + "[agent_loop] system prompt built chars={}", + rendered_prompt.chars().count() + ); + // User-file injection (PROFILE.md, MEMORY.md) puts + // potentially-sensitive content (LinkedIn scrape output, + // archivist-curated memories) into the system prompt. Avoid + // leaking that to debug logs — log a length + content hash + // instead. Narrow specialists (both flags off) keep the + // full-body log so prompt-engineering iteration on + // tools/safety sections stays easy. + if self.omit_profile && self.omit_memory_md { + log::debug!("[agent_loop] system prompt body:\n{}", rendered_prompt); + } else { + let mut hasher = std::collections::hash_map::DefaultHasher::new(); + rendered_prompt.hash(&mut hasher); + log::debug!( + "[agent_loop] system prompt body redacted (contains PROFILE/MEMORY): chars={} hash={:016x}", + rendered_prompt.chars().count(), + hasher.finish() + ); + } + self.history + .push(ConversationMessage::Chat(ChatMessage::system( + rendered_prompt, + ))); + // Seed the per-turn mid-session refresh baseline with the + // hash of whatever Composio actually returned just now. + // Subsequent turns short-circuit unless this hash changes. + self.last_seen_integrations_hash = + crate::openhuman::composio::connected_set_hash(&self.connected_integrations); + // Seed the announced set with the startup connected toolkits so + // only genuinely-new mid-session connects get announced later. + self.announced_integrations = self + .connected_integrations + .iter() + .map(|i| i.toolkit.clone()) + .collect(); + } else { + // Deliberately do NOT rebuild the system prompt on subsequent + // turns. The rendered prompt is the KV-cache prefix the inference + // backend has already tokenised; replacing its bytes (even + // cosmetically) forces the backend to re-prefill from scratch. + // + // Dynamic turn-to-turn context (memory recall, learned snippets) + // rides on the user message via `memory_loader.load_context()` + // — that's where the caller should inject anything that varies + // between turns. + // + // *** Mid-session schema-only refresh *** + // + // The system prompt stays frozen, but the function-calling + // schema (the `tools` field in the provider request) is sent + // fresh on every API call — it's not part of the KV-cache + // prefix. So we *can* react to Composio connect/disconnect + // events mid-session by re-synthesising the `delegate_` + // surface on `self.tools` / `self.tool_specs` and letting + // the next provider call carry the new schema. KV cache stays + // intact; the system prompt's `## Connected Integrations` + // block goes mildly stale until the next session, but the + // schema is the source of truth the model actually routes + // against. + // + // The signal we react to is the process-wide + // [`crate::openhuman::composio::INTEGRATIONS_CACHE`], kept + // current by (a) the desktop UI's 5 s + // `composio_list_connections` poll, (b) the post-OAuth + // `ComposioConnectionCreatedSubscriber` invalidation, and + // (c) the 60 s TTL fallback. We read it via the read-only + // [`crate::openhuman::composio::cached_active_integrations`] + // helper — never trigger a backend fetch ourselves, never + // block on a writer. + // Session agents built through `from_config_*` carry their + // runtime `Config` snapshot directly, so this read avoids the + // old `Config::load_or_init()` round-trip on every turn. + // + let _ = self.refresh_delegation_tools_from_cached_integrations("turn-boundary"); + // Cache empty/expired or config unavailable => no signal. + // We leave the current tool surface alone and pick up any + // real change on the next turn after the UI's 5 s poll has + // repopulated [`INTEGRATIONS_CACHE`]. + + log::trace!( + "[agent_loop] system prompt reused (history_len={}) — KV cache prefix preserved", + self.history.len() + ); + } + + if self.auto_save { + let _ = self + .memory + .store( + "", + "user_msg", + user_message, + MemoryCategory::Conversation, + None, + ) + .await; + } + + log::info!("[agent] loading memory context for user message"); + const MEMORY_CITATION_LIMIT: usize = 5; + const MEMORY_CITATION_MIN_RELEVANCE: f64 = 0.4; + match collect_recall_citations( + self.memory.as_ref(), + user_message, + MEMORY_CITATION_LIMIT, + MEMORY_CITATION_MIN_RELEVANCE, + ) + .await + { + Ok(citations) => { + log::debug!( + "[agent_loop] memory citations collected count={}", + citations.len() + ); + self.last_turn_citations = citations; + } + Err(err) => { + log::warn!("[agent_loop] memory citation collection failed: {err}"); + self.last_turn_citations.clear(); + } + } + let context = self + .memory_loader + .load_context(self.memory.as_ref(), user_message) + .await + .unwrap_or_default(); + + // ── Phase 3 STM preemptive recall ──────────────────────────── + // On the very first turn only, assemble a bounded cross-thread + // context block from the FTS5 episodic arm (keyword match) and the + let mut context = context; + + // ── Lane B: situational preferences (every turn) ───────────────────── + // Recall topic-scoped preferences semantically relevant to THIS message + // (model-aware embeddings, gated by vector similarity) and inject them + // under a banner. Runs every turn — unlike the first-turn-gated tree/STM + // blocks above — because the query changes per message; it rides the + // per-turn context that's prepended to the user message (no KV-cache + // cost). An unrelated message clears the similarity gate to nothing, so + // no block is injected. + { + let situational = + crate::openhuman::memory::preferences::recall_situational_preferences( + &self.memory, + user_message, + ) + .await; + if !situational.is_empty() { + log::info!( + "[pref_recall] situational block injected: {} item(s)", + situational.len() + ); + context.push_str("## Relevant preferences for this message\n\n"); + for pref in &situational { + context.push_str("- "); + context.push_str(pref.trim()); + context.push('\n'); + } + context.push('\n'); + } else { + log::debug!("[pref_recall] no situational preference relevant to this message"); + } + } + + let enriched = if context.is_empty() { + log::info!("[agent] no memory context found — using raw user message"); + self.last_memory_context = None; + user_message.to_string() + } else { + log::info!( + "[agent] memory context loaded — enriching user message context_chars={}", + context.chars().count() + ); + self.last_memory_context = Some(context.clone()); + format!("{context}{user_message}") + }; + + let enriched = self + .inject_agent_experience_context(user_message, enriched) + .await; + + // ── SKILL.md body injection (#781) ─────────────────────────── + // Match installed SKILL.md skills against the user message and + // prepend their bodies ahead of the memory-context block so the + // LLM sees them at the top of the user turn. See the module + // docs on [`crate::openhuman::workflows::inject`] for the matching + // heuristic and size cap rationale. + let enriched = { + use crate::openhuman::workflows::inject; + let matches = inject::match_workflows(&self.skills, user_message); + if matches.is_empty() { + log::debug!( + "[skills:inject] no skill matches for user message (skill_catalog_len={})", + self.skills.len() + ); + enriched + } else { + let injection = inject::render_injection( + &matches, + inject::DEFAULT_MAX_INJECTION_BYTES, + |skill| skill.read_body(), + ); + let matched_count = injection.decisions.iter().filter(|d| d.matched).count(); + log::info!( + "[skills:inject] summary candidates={} matched={} injected_bytes={} truncated_any={}", + injection.decisions.len(), + matched_count, + injection.injected_bytes, + injection.truncated + ); + if injection.rendered.is_empty() { + enriched + } else { + format!("{}\n{}", injection.rendered, enriched) + } + } + }; + + // Consume any one-shot mid-session connect announcement parked by + // `refresh_delegation_tools_from_cached_integrations`. It rides on the + // user turn (NOT a system message — `trim_history` hoists system + // messages to the front and would bust the KV-cache prefix) and + // `.take()` clears it so it fires exactly once. + let pending_slugs = std::mem::take(&mut self.pending_integration_announcement); + let enriched = match integration_announcement_note(&pending_slugs) { + Some(note) => format!("{note}\n\n{enriched}"), + None => enriched, + }; + + self.history + .push(ConversationMessage::Chat(ChatMessage::user(enriched))); + + // Pin the main agent to its configured model for the lifetime of + // the session. Per-turn classification used to run here, but it + // would flip `effective_model` mid-conversation (e.g. reasoning → + // coding based on a single keyword). Every flip invalidates the + // backend's KV cache namespace for this session, costing full + // re-prefill on the very next turn. The main agent's job is to + // decide *which sub-agent* to spawn — that routing lives in the + // model prompt, not in the Rust-side classifier. Sub-agents pick + // their own tier via `ModelSpec::Hint(...)` in their definition. + let effective_model = self.model_name.clone(); + log::info!( + "[agent_loop] model pinned model={} (per-turn classification disabled for KV cache stability)", + effective_model + ); + + // Snapshot the parent's runtime once per turn so any + // `spawn_subagent` invocation that fires inside this turn can + // read it via the PARENT_CONTEXT task-local. We override the + // model field with the post-classification effective model. + let mut parent_context = self.build_parent_execution_context(); + parent_context.model_name = effective_model.clone(); + + // Bump the session-memory turn counter. Used later by + // `should_extract_session_memory` to decide whether to spawn a + // background archivist fork at end-of-turn. + self.context.tick_turn(); + + let turn_body = async { + // Capture everything the engine seams need as locals/clones *before* + // the observer takes `&mut self`, so the borrow checker is happy: + // the tool source + parser + checkpoint hold clones disjoint from + // the `Agent`, and the observer alone borrows it mutably. + let dispatcher = self.tool_dispatcher.clone(); + let provider = self.provider.clone(); + let provider_name = self.event_channel().to_string(); + let temperature = self.temperature; + let max_iterations = self.config.max_tool_iterations; + // Source multimodal limits from the session's runtime config when + // present so [IMAGE:…] / [FILE:…] markers in user messages are + // resolved with the operator-configured caps (max files, max size, + // max extracted text). Without this, agents fall back to the + // crate-default caps and `MultimodalFileConfig::default()` + // disables file expansion entirely. + let multimodal = self + .integration_runtime_config + .as_ref() + .map(|c| c.multimodal.clone()) + .unwrap_or_default(); + let multimodal_files = self + .integration_runtime_config + .as_ref() + .map(|c| c.multimodal_files.clone()) + .unwrap_or_default(); + let artifact_store = Some( + crate::openhuman::agent::harness::tool_result_artifacts::ToolResultArtifactStore::new( + self.action_dir.clone(), + self.session_key.clone(), + ), + ); + let mut tool_source = AgentToolSource { + tools: self.tools.clone(), + visible_tool_names: self.visible_tool_names.clone(), + tool_policy_session: self.tool_policy_session.clone(), + tool_policy: self.tool_policy.clone(), + payload_summarizer: self.payload_summarizer.clone(), + event_session_id: self.event_session_id().to_string(), + event_channel: self.event_channel().to_string(), + agent_definition_id: self.agent_definition_id.clone(), + prefer_markdown: self.context.prefer_markdown_tool_output(), + budget_bytes: self.context.tool_result_budget_bytes(), + artifact_store: artifact_store.clone(), + should_send_specs: self.tool_dispatcher.should_send_tool_specs(), + advertised_specs: self.visible_tool_specs.as_ref().clone(), + records: Vec::new(), + }; + let progress = super::super::super::engine::TurnProgress::new(self.on_progress.clone()); + let parser = super::super::super::engine::DispatcherParser { + dispatcher: dispatcher.as_ref(), + }; + let checkpoint = AgentCheckpoint { + provider: self.provider.clone(), + dispatcher: self.tool_dispatcher.clone(), + model: effective_model.clone(), + temperature, + on_progress: self.on_progress.clone(), + user_message: user_message.to_string(), + max_iterations, + }; + let turn_run_queue = self.run_queue.clone(); + let cached_prefix = self.cached_transcript_messages.take(); + let mut observer = AgentObserver { + agent: self, + artifact_store, + effective_model: effective_model.clone(), + cumulative_input: 0, + cumulative_output: 0, + cumulative_cached: 0, + cumulative_charged: 0.0, + last_turn_usage: None, + cached_prefix, + pending_results: Vec::new(), + did_push_final: false, + }; + let mut buf: Vec = Vec::new(); + + // Box-pin the parent agent's engine call so its ~600-line + // generator state lives on the heap. Tools that delegate to + // sub-agents (orchestrator → researcher / personality / + // archetype / skill) recurse back into another + // `run_turn_engine` via `run_subagent`; without the box, + // both engines' state machines pile up on the same tokio + // worker stack and overflow the 2 MiB default. The inner + // boxes inside `run_typed_mode` aren't reached if the + // overflow happens during the parent's poll on the way in + // — verified against the `chat-harness-subagent` Playwright + // lane crash on PR #3151. + let outcome = Box::pin(super::super::super::engine::run_turn_engine( + provider.as_ref(), + &mut buf, + &mut tool_source, + &progress, + &mut observer, + &checkpoint, + &parser, + &provider_name, + &effective_model, + temperature, + true, // silent — the channel/UI renders via progress + the return value + &multimodal, + &multimodal_files, + max_iterations, + None, // the web bridge streams via on_progress deltas, not on_delta + &[], + turn_run_queue, + )) + .await?; + + // Pull the observer's accounting out, then drop it to release the + // `&mut self` borrow so the epilogue can use `self`. + let did_push_final = observer.did_push_final; + let cumulative_input = observer.cumulative_input; + let cumulative_output = observer.cumulative_output; + let cumulative_cached = observer.cumulative_cached; + let cumulative_charged = observer.cumulative_charged; + let last_turn_usage = observer.last_turn_usage.take(); + drop(observer); + let records = std::mem::take(&mut tool_source.records); + + self.context.record_tool_calls(records.len()); + + // For a clean final response the observer already pushed the + // assistant message + persisted. For a max-iteration checkpoint or + // circuit-breaker halt the engine returned the text without pushing + // it, so finish the history + transcript here (mirrors the old + // final/max-iter branches). + if !did_push_final { + self.history + .push(ConversationMessage::Chat(ChatMessage::assistant( + outcome.text.clone(), + ))); + self.trim_history(); + // Note: the engine already emits `TurnCompleted` on the + // checkpoint exit (and every other terminal path), so we don't + // re-emit it here — doing so would double-fire for the UI. + let messages = self.tool_dispatcher.to_provider_messages(&self.history); + self.persist_session_transcript( + &messages, + cumulative_input, + cumulative_output, + cumulative_cached, + cumulative_charged, + last_turn_usage.as_ref(), + ); + } + + // Auto-save a short memory of the final reply (not on a capped turn, + // matching the prior behavior). + if self.auto_save && !outcome.hit_cap { + let summary = truncate_with_ellipsis(&outcome.text, 100); + let _ = self + .memory + .store("", "assistant_resp", &summary, MemoryCategory::Daily, None) + .await; + } + + // Fire post-turn hooks (non-blocking). + if !self.post_turn_hooks.is_empty() { + let ctx = TurnContext { + user_message: user_message.to_string(), + assistant_response: outcome.text.clone(), + tool_calls: records, + turn_duration_ms: turn_started.elapsed().as_millis() as u64, + session_id: Some(self.event_session_id.clone()) + .filter(|session_id| !session_id.trim().is_empty()), + agent_id: Some(self.agent_definition_id.clone()) + .filter(|agent_id| !agent_id.trim().is_empty()), + entrypoint: Some(self.event_channel.clone()) + .filter(|entrypoint| !entrypoint.trim().is_empty()), + iteration_count: outcome.iterations as usize, + }; + hooks::fire_hooks(&self.post_turn_hooks, ctx); + } + + Ok(outcome.text) + }; // end of `turn_body` async block + + // Run the turn body inside the parent-execution-context scope so + // that any `spawn_subagent` tool call fired during the loop can + // read the parent's provider, tools, model, and workspace via + // the PARENT_CONTEXT task-local. + let result = harness::with_parent_context(parent_context, turn_body).await; + + // Session transcript persistence lives INSIDE the turn body — + // one write per provider response, fired right after the + // response lands (see the tool-call and terminal branches in + // `turn_body`). A crash during tool execution no longer drops + // the assistant's reply because it was already flushed to + // disk before tool dispatch started. No outer-loop save is + // needed here. + + // ── Session-memory extraction (stage 5) ─────────────────────── + // + // If the pipeline's deltas have crossed all three thresholds + // (token growth, tool calls, turn count), spawn a *background* + // archivist sub-agent that will distil durable facts into the + // workspace MEMORY.md file via the `update_memory_md` tool. + // + // The spawn is fire-and-forget: the main turn returns the + // user-visible response immediately, and the archivist runs + // asynchronously on the `agentic` tier. We optimistically mark + // the extraction complete right away — if it actually fails, + // we'll just retry on the next threshold window (a few turns + // later), which is the right amount of retry behaviour for a + // librarian task that's idempotent across reruns. + if result.is_ok() && self.context.should_extract_session_memory() { + self.spawn_session_memory_extraction().await; + // Sibling pipeline (#1399): heuristic transcript ingestion + // turns the just-written transcript into durable + // conversational memory + reflections so a brand-new chat + // can recover continuity. Background-only, never blocks the + // user-facing turn return. + self.spawn_transcript_ingestion(); + } + + result + } + + pub(super) async fn inject_agent_experience_context( + &self, + user_message: &str, + enriched: String, + ) -> String { + const MAX_EXPERIENCE_HITS: usize = 3; + const MAX_EXPERIENCE_BLOCK_BYTES: usize = 2048; + + if !self.learning_enabled { + return enriched; + } + + let tools = self + .visible_tool_specs + .iter() + .map(|spec| spec.name.clone()) + .collect(); + let store = AgentExperienceStore::new(self.memory.clone()); + let query = ExperienceQuery { + query: user_message.to_string(), + tools, + tags: Vec::new(), + agent_id: Some(self.agent_definition_id.clone()).filter(|id| !id.trim().is_empty()), + entrypoint: Some(self.event_channel.clone()) + .filter(|entrypoint| !entrypoint.trim().is_empty()), + max_hits: MAX_EXPERIENCE_HITS, + }; + + match store.retrieve(query).await { + Ok(hits) => { + let matched_hits: Vec<_> = hits + .into_iter() + .filter(|hit| !hit.match_reasons.is_empty()) + .collect(); + let block = render_experience_hits(&matched_hits, MAX_EXPERIENCE_BLOCK_BYTES); + if block.is_empty() { + return enriched; + } + log::debug!( + "[agent-experience] injected {} experience hit(s) bytes={}", + matched_hits.len(), + block.len() + ); + prepend_experience_block(&enriched, &block) + } + Err(err) => { + log::warn!("[agent-experience] retrieval failed (non-fatal): {err}"); + enriched + } + } + } +} diff --git a/src/openhuman/agent/harness/session/turn/mod.rs b/src/openhuman/agent/harness/session/turn/mod.rs new file mode 100644 index 0000000000..bfd5d9827b --- /dev/null +++ b/src/openhuman/agent/harness/session/turn/mod.rs @@ -0,0 +1,166 @@ +//! Turn lifecycle: running a single interaction, executing tools, and +//! wiring the context pipeline + sub-agent harness around them. + +mod context; +mod core; +mod session_io; +mod tools; + +pub(crate) use core::*; + +use crate::openhuman::agent::dispatcher::ParsedToolCall; + +use std::borrow::Cow; + +/// Built-in direct tools that the orchestrator should call by name, not +/// wrapped in `run_workflow`. +const DIRECT_TOOL_NAMES: &[&str] = &[ + "cron_add", + "cron_list", + "cron_remove", + "cron_update", + "cron_run", + "cron_runs", + "current_time", +]; + +/// Recovery shim for legacy/wrong-model calls of the form: +/// `run_workflow({workflow_id: "", inputs: {...}})` (or the +/// pre-rename `run_skill({skill_id: ...})`). +/// +/// When this pattern appears, rewrite it into a direct tool call so the turn +/// can proceed without a manual retry. +pub(super) fn normalize_tool_call<'a>(call: &'a ParsedToolCall) -> Cow<'a, ParsedToolCall> { + if call.name != "run_workflow" && call.name != "run_skill" { + return Cow::Borrowed(call); + } + // Accept either the current `workflow_id` arg or the legacy `skill_id`. + let Some(target) = call + .arguments + .get("workflow_id") + .or_else(|| call.arguments.get("skill_id")) + .and_then(|v| v.as_str()) + else { + return Cow::Borrowed(call); + }; + if !DIRECT_TOOL_NAMES.contains(&target) { + return Cow::Borrowed(call); + } + let Some(inputs) = call.arguments.get("inputs").and_then(|v| v.as_object()) else { + return Cow::Borrowed(call); + }; + + log::warn!( + "[agent_loop] rewrote legacy {}->{} call into direct tool invocation", + call.name, + target + ); + let skill_id = target; + Cow::Owned(ParsedToolCall { + name: skill_id.to_string(), + arguments: serde_json::Value::Object(inputs.clone()), + tool_call_id: call.tool_call_id.clone(), + }) +} + +/// Compute the one-shot mid-session connect announcement. +/// +/// Given the toolkit slugs currently connected and the set of slugs already +/// announced to the model this session, returns a natural-language note for +/// any genuinely-new slugs (and records them in `announced` so they are never +/// re-announced). Returns `None` when nothing new connected. +/// +/// Kept as a free function (no `&self`) so the delta logic is unit-testable +/// without standing up a full `Agent` — see `turn_tests.rs`. +/// Returns the toolkit slugs in `connected` that have not yet been announced +/// this session, marking them announced. Empty when nothing is new. +pub(super) fn newly_connected_slugs( + connected: &[String], + announced: &mut std::collections::HashSet, +) -> Vec { + let newly: Vec = connected + .iter() + .filter(|slug| !announced.contains(*slug)) + .cloned() + .collect(); + for slug in &newly { + announced.insert(slug.clone()); + } + newly +} + +/// Render the one-shot user-turn note for a set of freshly-connected slugs. +/// Empty input yields `None`. +pub(super) fn integration_announcement_note(slugs: &[String]) -> Option { + if slugs.is_empty() { + return None; + } + Some(format!( + "[integration update] These integration(s) connected during this conversation and are available right now: {}. \ +Use delegate_to_integrations_agent with the matching toolkit slug to act on them immediately — do not tell the user to reconnect or restart.", + slugs.join(", ") + )) +} + +/// Wrapper around +/// [`crate::openhuman::memory_tree::tree_runtime::store::collect_root_summaries_with_caps`] +/// that takes user-resolved per-namespace and total caps. The actual +/// limits are derived from the active +/// [`crate::openhuman::config::schema::agent::MemoryContextWindow`] +/// preset by [`crate::openhuman::config::schema::agent::AgentConfig::resolved_memory_limits`]. +pub(super) fn collect_tree_root_summaries( + workspace_dir: &std::path::Path, + per_namespace_cap: usize, + total_cap: usize, +) -> Vec { + crate::openhuman::memory_tree::tree_runtime::store::collect_root_summaries_with_caps( + workspace_dir, + per_namespace_cap, + total_cap, + ) + .into_iter() + .map( + |(namespace, body, updated_at)| crate::openhuman::context::prompt::NamespaceSummary { + namespace, + body, + updated_at, + }, + ) + .collect() +} + +/// Sanitize a learned memory entry before injecting into the system prompt. +/// Strips raw data, limits length, and removes potential secrets. +pub(super) fn sanitize_learned_entry(content: &str) -> String { + let trimmed = content.trim(); + if trimmed.is_empty() { + return String::new(); + } + // Truncate to a safe length + let max_len = 200; + let sanitized: String = trimmed.chars().take(max_len).collect(); + // Strip anything that looks like a secret/token + if sanitized.contains("Bearer ") + || sanitized.contains("sk-") + || sanitized.contains("ghp_") + || sanitized.contains("-----BEGIN") + { + return "[redacted: potential secret]".to_string(); + } + sanitized +} + +#[cfg(test)] +pub(crate) use super::transcript; +#[cfg(test)] +pub(crate) use super::turn_checkpoint::assistant_message_has_tool_calls; +#[cfg(test)] +pub(crate) use super::types::Agent; +#[cfg(test)] +pub(crate) use crate::openhuman::context::prompt::LearnedContextData; +#[cfg(test)] +pub(crate) use anyhow::Result; + +#[cfg(test)] +#[path = "../turn_tests.rs"] +mod tests; diff --git a/src/openhuman/agent/harness/session/turn/session_io.rs b/src/openhuman/agent/harness/session/turn/session_io.rs new file mode 100644 index 0000000000..63c09d64e1 --- /dev/null +++ b/src/openhuman/agent/harness/session/turn/session_io.rs @@ -0,0 +1,378 @@ +//! Session persistence: transcript loading, checkpointing, and background tasks. + +use super::super::transcript; +use super::super::turn_checkpoint::MAX_ITER_CHECKPOINT_INSTRUCTION; +use super::super::types::Agent; +use crate::openhuman::agent::harness; +use crate::openhuman::agent::progress::AgentProgress; +use crate::openhuman::context::ARCHIVIST_EXTRACTION_PROMPT; +use crate::openhuman::inference::provider::{ChatMessage, ChatRequest, ProviderDelta, UsageInfo}; + +impl Agent { + // ───────────────────────────────────────────────────────────────── + // Session transcript helpers + // ───────────────────────────────────────────────────────────────── + + /// Try to load a previous session transcript for KV cache resume. + /// + /// Best-effort: failures are logged and silently ignored. + pub(in super::super) fn try_load_session_transcript(&mut self) { + match transcript::find_latest_transcript(&self.workspace_dir, &self.agent_definition_name) { + Some(path) => { + log::info!( + "[transcript] found previous transcript path={}", + path.display() + ); + match transcript::read_transcript(&path) { + Ok(session) => { + if session.messages.is_empty() { + log::debug!( + "[transcript] previous transcript is empty — skipping resume" + ); + return; + } + let loaded_count = session.messages.len(); + log::info!("[transcript] loaded {} messages for resume", loaded_count); + let bounded = self.bound_cached_transcript_messages(session.messages); + if bounded.len() < loaded_count { + log::warn!( + "[transcript] resume prefix trimmed from {} to {} messages (max_history_messages={})", + loaded_count, + bounded.len(), + self.config.max_history_messages + ); + } + self.cached_transcript_messages = Some(bounded); + } + Err(err) => { + log::warn!( + "[transcript] failed to parse previous transcript {}: {err}", + path.display() + ); + } + } + } + None => { + log::debug!( + "[transcript] no previous transcript found for agent={}", + self.agent_definition_name + ); + } + } + } + + /// Ask the provider for a resumable checkpoint summary when a turn + /// hits the tool-call iteration cap, with native tools **disabled** so + /// the model returns prose rather than another tool call. Streams text + /// deltas to the progress sink (when attached) so the checkpoint + /// appears in the UI like any other reply. + /// + /// Returns the summary text (empty when the provider call fails or + /// yields nothing — the caller then falls back to + /// [`build_deterministic_checkpoint`] so the thread is never left on an + /// unterminated tool cycle, bug-report-2026-05-26 A1) **paired with the + /// provider usage** for this extra call, so the caller can fold it into + /// the turn's cumulative token/cost accounting instead of silently + /// dropping it. + pub(super) async fn summarize_iteration_checkpoint( + &self, + base_messages: &[ChatMessage], + effective_model: &str, + iteration_for_stream: u32, + ) -> (String, Option) { + let mut messages = base_messages.to_vec(); + messages.push(ChatMessage::user(MAX_ITER_CHECKPOINT_INSTRUCTION)); + + // Mirror the main loop's streaming sink so the checkpoint renders + // incrementally. Only text deltas are relevant here (tools are + // disabled for this call). + let (delta_tx_opt, delta_forwarder) = if self.on_progress.is_some() { + let (tx, mut rx) = tokio::sync::mpsc::channel::(128); + let progress_tx = self.on_progress.clone(); + let forwarder = tokio::spawn(async move { + while let Some(event) = rx.recv().await { + let Some(ref sink) = progress_tx else { + continue; + }; + if let ProviderDelta::TextDelta { delta } = event { + if sink + .send(AgentProgress::TextDelta { + delta, + iteration: iteration_for_stream, + }) + .await + .is_err() + { + break; + } + } + } + }); + (Some(tx), Some(forwarder)) + } else { + (None, None) + }; + + let result = self + .provider + .chat( + ChatRequest { + messages: &messages, + tools: None, + stream: delta_tx_opt.as_ref(), + }, + effective_model, + self.temperature, + ) + .await; + drop(delta_tx_opt); + if let Some(handle) = delta_forwarder { + let _ = handle.await; + } + + match result { + Ok(resp) => { + let usage = resp.usage.clone(); + // Strip any stray tool-call XML a text-mode model may have + // emitted; keep only the prose. + let (text, calls) = self.tool_dispatcher.parse_response(&resp); + let checkpoint = if !text.trim().is_empty() { + text + } else if calls.is_empty() { + // No tool-call markup was present, so the raw text (if + // any) is genuine prose — safe to use. + resp.text.unwrap_or_default() + } else { + // `parse_response` stripped tool-call markup and left no + // prose. Do NOT re-emit `resp.text` here: it would persist + // the raw `…` markup verbatim as the checkpoint. + // Return empty so the caller uses the deterministic + // fallback instead (bug-report-2026-05-26 A1). + String::new() + }; + (checkpoint, usage) + } + Err(e) => { + log::warn!("[agent_loop] checkpoint summary call failed: {e:#}"); + (String::new(), None) + } + } + } + + /// Persist the exact provider messages as a session transcript. + /// + /// Writes JSONL as source of truth and re-renders the companion `.md` + /// for human readability. Best-effort: failures are logged and silently + /// ignored. The JSONL conversation store remains the authoritative + /// persistence layer; session transcripts are an optimization for KV + /// cache stability. + /// + /// `turn_usage` — when `Some`, attributes per-message token/cost figures + /// to the last assistant message in the written transcript. + pub(in super::super) fn persist_session_transcript( + &mut self, + messages: &[ChatMessage], + input_tokens: u64, + output_tokens: u64, + cached_input_tokens: u64, + charged_amount_usd: f64, + turn_usage: Option<&transcript::TurnUsage>, + ) { + // Resolve the transcript path on first write. The stem is + // `{parent_prefix}__{session_key}` for sub-agents (producing a + // flat hierarchical filename) or just `{session_key}` for a + // root session. Prefix chaining is already done by the + // sub-agent runner when it populates `session_parent_prefix`. + if self.session_transcript_path.is_none() { + let stem = match &self.session_parent_prefix { + Some(prefix) => format!("{}__{}", prefix, self.session_key), + None => self.session_key.clone(), + }; + match transcript::resolve_keyed_transcript_path(&self.workspace_dir, &stem) { + Ok(path) => { + log::info!( + "[transcript] new session transcript path={}", + path.display() + ); + self.session_transcript_path = Some(path); + } + Err(err) => { + log::warn!("[transcript] failed to resolve transcript path: {err}"); + return; + } + } + } + + let path = self.session_transcript_path.as_ref().unwrap(); + let now = chrono::Utc::now().to_rfc3339(); + + let meta = transcript::TranscriptMeta { + agent_name: self.agent_definition_name.clone(), + dispatcher: if self.tool_dispatcher.should_send_tool_specs() { + "native".into() + } else { + "xml".into() + }, + created: now.clone(), + updated: now, + turn_count: self.context.stats().session_memory_current_turn as usize, + input_tokens, + output_tokens, + cached_input_tokens, + charged_amount_usd, + thread_id: crate::openhuman::inference::provider::thread_context::current_thread_id(), + }; + + if let Err(err) = transcript::write_transcript(path, messages, &meta, turn_usage) { + log::warn!( + "[transcript] failed to write transcript {}: {err}", + path.display() + ); + } + } + + // ───────────────────────────────────────────────────────────────── + // Session-memory extraction (stage 5 of the context pipeline) + // ───────────────────────────────────────────────────────────────── + + /// Spawn a background archivist sub-agent to extract durable facts + /// from the recent conversation into `MEMORY.md`. Fire-and-forget. + /// + /// Gated by [`context_pipeline::SessionMemoryState::should_extract`] + /// — see its docs for the threshold invariants. Safe to call from + /// inside `turn()` after the turn body has settled. + pub(in super::super) async fn spawn_session_memory_extraction(&mut self) { + // ── Flush the trailing open segment before the session winds down ── + // + // The ArchivistHook manages per-turn segment lifecycle but cannot + // force-close the *last* open segment because there is no explicit + // "session end" event in the turn loop. `spawn_session_memory_extraction` + // is the closest available signal: it fires when the context manager + // decides the session has accumulated enough material to archive. + // + // GUARANTEE: the flush is *awaited* here (not fire-and-forget) so + // the trailing segment always receives its recap + embedding + tree + // ingest before the function returns, even during runtime wind-down. + // This honours the doc-comment guarantee on `flush_open_segment` in + // `archivist.rs`. No deadlock risk: no mutex guard is held across + // this await point. + if let Some(ref archivist) = self.archivist_hook { + let session_id = self.event_session_id.clone(); + log::debug!( + "[archivist] awaiting flush_open_segment for session={session_id} at session wind-down" + ); + archivist.flush_open_segment(&session_id).await; + } + + let Some(registry) = harness::AgentDefinitionRegistry::global() else { + log::debug!("[session_memory] registry not initialised — skipping extraction spawn"); + return; + }; + let Some(definition) = registry.get("archivist").cloned() else { + log::debug!( + "[session_memory] archivist definition not found — skipping extraction spawn" + ); + return; + }; + + // Build a dedicated ParentExecutionContext for the background + // task. The in-progress turn's context has already been + // consumed by the `with_parent_context` scope above, so this is + // a fresh snapshot. + let parent_ctx = self.build_parent_execution_context(); + let extraction_prompt = ARCHIVIST_EXTRACTION_PROMPT.to_string(); + + // Flip the extraction state to "in-progress" so future + // should_extract checks return false until the archivist + // finishes. We then hand a shared handle to the spawned task + // so it can mark the extraction complete (resets deltas) on + // success, or failed (keeps deltas intact for retry) on error. + // This replaces the old optimistic `mark_complete` that + // silently dropped the retry window when extractions failed. + let stats_snapshot = self.context.stats(); + self.context.mark_session_memory_started(); + let sm_handle = self.context.session_memory_handle(); + + log::info!( + "[session_memory] spawning background archivist extraction (turn={}, tokens={})", + stats_snapshot.session_memory_current_turn, + stats_snapshot.session_memory_total_tokens + ); + + tokio::spawn(async move { + let options = harness::SubagentRunOptions::default(); + let fut = harness::run_subagent(&definition, &extraction_prompt, options); + let result = harness::with_parent_context(parent_ctx, fut).await; + match result { + Ok(outcome) => { + tracing::info!( + agent_id = %outcome.agent_id, + task_id = %outcome.task_id, + iterations = outcome.iterations, + output_chars = outcome.output.chars().count(), + "[session_memory] archivist extraction completed" + ); + if let Ok(mut sm) = sm_handle.lock() { + sm.mark_extraction_complete(); + } + } + Err(err) => { + tracing::warn!( + error = %err, + "[session_memory] archivist extraction failed — will retry after next threshold crossing" + ); + // Leave the deltas intact so the next threshold + // crossing schedules another attempt. Clearing + // `extraction_in_progress` lets the retry + // actually fire. + if let Ok(mut sm) = sm_handle.lock() { + sm.mark_extraction_failed(); + } + } + } + }); + } + + /// Spawn a background task that ingests the current session + /// transcript into the conversational-memory store. + /// + /// Issue #1399: complements `spawn_session_memory_extraction`. The + /// archivist path writes dense bullets into `MEMORY.md`; this path + /// extracts importance-tagged, provenance-bearing memories via the + /// heuristic [`crate::openhuman::learning::transcript_ingest`] + /// pipeline. The two are deliberately independent so the prompt + /// retrieval layer can pull from `conversation_memory` without + /// needing the archivist's extraction to have fired this session. + /// + /// Fire-and-forget: failures are logged, never propagated. + pub(in super::super) fn spawn_transcript_ingestion(&self) { + let Some(path) = self.session_transcript_path.clone() else { + log::debug!("[transcript_ingest] no session transcript path yet — skipping spawn"); + return; + }; + let memory = std::sync::Arc::clone(&self.memory); + + tokio::spawn(async move { + match crate::openhuman::learning::transcript_ingest::ingest_transcript_path( + memory.as_ref(), + &path, + ) + .await + { + Ok(report) => tracing::info!( + transcript = %path.display(), + extracted = report.extracted, + stored = report.stored, + deduped = report.deduped, + reflections_stored = report.reflections_stored, + "[transcript_ingest] background ingest complete" + ), + Err(err) => tracing::warn!( + transcript = %path.display(), + error = %err, + "[transcript_ingest] background ingest failed — will retry next threshold window" + ), + } + }); + } +} diff --git a/src/openhuman/agent/harness/session/turn/tools.rs b/src/openhuman/agent/harness/session/turn/tools.rs new file mode 100644 index 0000000000..4a4d13f504 --- /dev/null +++ b/src/openhuman/agent/harness/session/turn/tools.rs @@ -0,0 +1,482 @@ +//! Tool execution and Composio delegation refresh. + +use super::super::agent_tool_exec; +use super::super::types::Agent; +use super::newly_connected_slugs; +use crate::openhuman::agent::dispatcher::ParsedToolCall; +use crate::openhuman::agent::harness; +use crate::openhuman::agent::hooks::ToolCallRecord; +use crate::openhuman::agent::progress::AgentProgress; + +use std::sync::Arc; + +impl Agent { + // ───────────────────────────────────────────────────────────────── + // Per-call tool execution + // ───────────────────────────────────────────────────────────────── + + /// Executes a single tool call and returns the result and execution record. + /// + /// This method: + /// 1. Emits telemetry events for the start of execution. + /// 2. Handles the special `spawn_subagent` tool with `fork` context. + /// 3. Validates tool visibility and availability. + /// 4. Dispatches to the underlying tool implementation. + /// 5. Applies per-result byte budgets to prevent context window bloat. + /// 6. Sanitizes and records the outcome for post-turn hooks. + pub(in super::super) async fn execute_tool_call( + &self, + call: &ParsedToolCall, + iteration: usize, + ) -> ( + crate::openhuman::agent::dispatcher::ToolExecutionResult, + ToolCallRecord, + ) { + let normalized_call = super::normalize_tool_call(call); + let call: &ParsedToolCall = &normalized_call; + // The per-call execution path lives in the shared + // [`super::agent_tool_exec::run_agent_tool_call`] so `Agent::turn` + // (when migrated to the turn engine, via `AgentToolSource`) and any + // direct caller run the identical logic. Progress is emitted through a + // `TurnProgress` over this agent's sink. Legacy `run_skill`-wrapped + // built-in cron tool calls are normalized to direct calls first. + let progress = super::super::super::engine::TurnProgress::new(self.on_progress.clone()); + let artifact_store = + crate::openhuman::agent::harness::tool_result_artifacts::ToolResultArtifactStore::new( + self.action_dir.clone(), + self.session_key.clone(), + ); + let ctx = agent_tool_exec::AgentToolExecCtx { + tools: &self.tools, + visible_tool_names: &self.visible_tool_names, + tool_policy_session: &self.tool_policy_session, + tool_policy: self.tool_policy.as_ref(), + payload_summarizer: self.payload_summarizer.as_deref(), + event_session_id: self.event_session_id(), + event_channel: self.event_channel(), + agent_definition_id: &self.agent_definition_id, + prefer_markdown: self.context.prefer_markdown_tool_output(), + budget_bytes: self.context.tool_result_budget_bytes(), + artifact_store: Some(&artifact_store), + }; + agent_tool_exec::run_agent_tool_call(&ctx, &progress, call, iteration).await + } + + /// Executes multiple tool calls in sequence. + /// + /// Collects results and execution records for all requested tools in a single batch. + pub(in super::super) async fn execute_tools( + &self, + calls: &[ParsedToolCall], + iteration: usize, + ) -> ( + Vec, + Vec, + ) { + let mut results = Vec::with_capacity(calls.len()); + let mut records = Vec::with_capacity(calls.len()); + for call in calls { + let (exec_result, record) = self.execute_tool_call(call, iteration).await; + results.push(exec_result); + records.push(record); + } + (results, records) + } + + // ───────────────────────────────────────────────────────────────── + // Sub-agent context snapshots + // ───────────────────────────────────────────────────────────────── + + /// Snapshot the parent's runtime so spawned sub-agents can read + /// it via the [`harness::PARENT_CONTEXT`] task-local. + pub(in super::super) fn build_parent_execution_context( + &self, + ) -> harness::ParentExecutionContext { + let allowed_subagent_ids = crate::openhuman::agent::harness::definition::AgentDefinitionRegistry::global() + .and_then(|registry| registry.get(&self.agent_definition_id)) + .map(|definition| { + definition + .subagents + .iter() + .filter_map(|entry| match entry { + crate::openhuman::agent::harness::definition::SubagentEntry::AgentId(id) => { + Some(id.clone()) + } + crate::openhuman::agent::harness::definition::SubagentEntry::Skills(wildcard) + if wildcard.matches_all() => + { + Some("integrations_agent".to_string()) + } + crate::openhuman::agent::harness::definition::SubagentEntry::Skills(_) => None, + }) + .collect() + }) + .unwrap_or_default(); + + harness::ParentExecutionContext { + agent_definition_id: self.agent_definition_id.clone(), + allowed_subagent_ids, + provider: Arc::clone(&self.provider), + all_tools: Arc::clone(&self.tools), + all_tool_specs: Arc::clone(&self.tool_specs), + model_name: self.model_name.clone(), + temperature: self.temperature, + workspace_dir: self.workspace_dir.clone(), + memory: Arc::clone(&self.memory), + agent_config: self.config.clone(), + skills: Arc::new(self.skills.clone()), + memory_context: Arc::new(self.last_memory_context.clone()), + session_id: self.event_session_id().to_string(), + channel: self.event_channel().to_string(), + connected_integrations: self.connected_integrations.clone(), + tool_call_format: self.tool_dispatcher.tool_call_format(), + session_key: self.session_key.clone(), + session_parent_prefix: self.session_parent_prefix.clone(), + on_progress: self.on_progress.clone(), + run_queue: self.run_queue.clone(), + } + } + + /// Emit a lifecycle progress event. Uses `send().await` so control + /// events (turn/iteration boundaries, tool_call_started/completed, + /// turn_completed) survive downstream backpressure from the + /// higher-frequency streamed deltas that share the same `on_progress` + /// channel — dropping one of these would desync the web-channel + /// progress bridge (e.g. a tool row stuck in `running` forever). + /// A closed sink is logged and ignored; no progress subscriber is + /// equivalent to success. + pub(in super::super) async fn emit_progress(&self, event: AgentProgress) { + if let Some(ref tx) = self.on_progress { + if let Err(e) = tx.send(event).await { + log::warn!("[agent] progress sink closed while emitting lifecycle event: {e}"); + } + } + } + + /// Fetches the user's active Composio connections and populates + /// `self.connected_integrations` so the system prompt can surface them. + /// + /// Delegates to the shared [`crate::openhuman::composio::fetch_connected_integrations`] + /// which is the single source of truth for integration discovery. + /// + /// **No session-scoped Composio client is cached on the agent any + /// more (#1710 Wave 2)**. Every downstream caller that needs to + /// dispatch a Composio action now resolves a fresh client via + /// [`crate::openhuman::composio::client::create_composio_client`] + /// at call time so the live `composio.mode` toggle is honoured + /// without rebuilding the session — see `ComposioActionTool`, + /// `ProviderContext::execute`, the 5 migrated agent tools in + /// `composio/tools.rs`, and the spawn-time per-action tool build + /// path in `subagent_runner/ops.rs`. + pub async fn fetch_connected_integrations(&mut self) { + let config = match self.integration_runtime_config.clone() { + Some(config) => config, + None => match crate::openhuman::config::Config::load_or_init().await { + Ok(config) => config, + Err(e) => { + log::debug!( + "[agent] skipping connected integrations fetch: config load failed: {e}" + ); + return; + } + }, + }; + self.connected_integrations = + crate::openhuman::composio::fetch_connected_integrations(&config).await; + self.connected_integrations_initialized = true; + } + + /// Lazily attach this session to the global event bus so it can + /// observe `ComposioIntegrationsChanged` notifications. + pub(in super::super) fn ensure_composio_integrations_listener(&mut self) { + if self.composio_integrations_rx.is_some() { + return; + } + if let Some(bus) = crate::core::event_bus::global() { + self.composio_integrations_rx = Some(bus.raw_receiver()); + log::debug!( + "[agent_loop] armed composio integrations listener for session='{}'", + self.event_session_id + ); + } + } + + /// Drain pending `ComposioIntegrationsChanged` events. + /// + /// Returns `true` when we observed at least one relevant event (or lag) and + /// should re-check cached integrations before the next provider call. + pub(in super::super) fn drain_composio_integrations_changed_events(&mut self) -> bool { + self.ensure_composio_integrations_listener(); + let Some(rx) = self.composio_integrations_rx.as_mut() else { + return false; + }; + use tokio::sync::broadcast::error::TryRecvError; + + let mut saw_signal = false; + let mut closed = false; + loop { + match rx.try_recv() { + Ok(crate::core::event_bus::DomainEvent::ComposioIntegrationsChanged { + toolkits, + }) => { + saw_signal = true; + log::info!( + "[agent_loop] received composio integrations changed event (active_toolkits={:?})", + toolkits + ); + } + Ok(_) => {} + Err(TryRecvError::Empty) => break, + Err(TryRecvError::Lagged(skipped)) => { + saw_signal = true; + log::warn!( + "[agent_loop] composio integrations listener lagged by {} event(s); forcing cache re-check", + skipped + ); + } + Err(TryRecvError::Closed) => { + closed = true; + break; + } + } + } + if closed { + self.composio_integrations_rx = None; + } + saw_signal + } + + /// Reconcile the session's delegation schema against the latest cached + /// integrations snapshot. Returns `true` only when a refresh applied. + pub(in super::super) fn refresh_delegation_tools_from_cached_integrations( + &mut self, + trigger: &str, + ) -> bool { + let Some(cfg) = self.integration_runtime_config.as_ref() else { + return false; + }; + let Some(cache_view) = crate::openhuman::composio::cached_active_integrations(cfg) else { + return false; + }; + + let new_hash = crate::openhuman::composio::connected_set_hash(&cache_view); + if new_hash == self.last_seen_integrations_hash { + return false; + } + + log::info!( + "[agent_loop] composio set changed ({trigger}) hash {:x} -> {:x}; refreshing delegation schema (system prompt unchanged for KV cache)", + self.last_seen_integrations_hash, + new_hash + ); + + let prev_integrations = std::mem::replace(&mut self.connected_integrations, cache_view); + if self.refresh_delegation_tools() { + self.last_seen_integrations_hash = new_hash; + self.connected_integrations_initialized = true; + // Surface newly-connected toolkits onto the next user message so + // the model acts on them on the FIRST post-connect ask instead of + // refusing from stale chat context. Schema-only refresh already + // updated the enum; this closes the prose/decision gap. + let connected_slugs: Vec = self + .connected_integrations + .iter() + .map(|i| i.toolkit.clone()) + .collect(); + // Append (don't overwrite) so a second connect before the next + // user turn doesn't drop the first one's announcement. Slugs are + // already de-duped against `announced_integrations`, but guard the + // pending list too in case the same slug is re-queued. + for slug in newly_connected_slugs(&connected_slugs, &mut self.announced_integrations) { + if !self.pending_integration_announcement.contains(&slug) { + self.pending_integration_announcement.push(slug); + } + } + true + } else { + self.connected_integrations = prev_integrations; + false + } + } + + /// Re-synthesise `delegate_*` tools for the orchestrator's `subagents` + /// declaration using the live `connected_integrations` slice, and + /// reconcile the resulting set into `self.tools` / `self.tool_specs` / + /// `self.visible_tool_specs` / `self.visible_tool_names`. + /// + /// **Reconciliation strategy** — full rebuild of the synthesised + /// subset: + /// + /// 1. Drop every tool whose name was in [`Self::synthesized_tool_names`] + /// from the previous synthesis. Direct tools (`query_memory`, + /// `cron_add`, …) are untouched because their names are not in + /// that set. + /// 2. Append the freshly collected synthesis output verbatim. + /// 3. Replace `synthesized_tool_names` with the new set so the + /// next refresh has a clean mask to undo. + /// + /// This is safer than appending-only or strict-diff reconcile: + /// + /// * Stale tools after a revoke can never leak — anything from the + /// previous synthesis is unconditionally dropped, the new set is + /// authoritative. + /// * Direct tools can never be accidentally removed — only names + /// in `synthesized_tool_names` are touched. + /// * Duplicate registration is impossible — retain+extend + /// guarantees every final entry is either a non-synthesised + /// direct tool or a member of the fresh `synthed` set. + /// + /// **When to call**: on turn 1 only when the session was built + /// without a prewarmed Composio cache snapshot, and on any + /// subsequent turn where the connection set has changed since the + /// last reconcile (detected via + /// [`Self::last_seen_integrations_hash`] vs. + /// [`crate::openhuman::composio::cached_active_integrations`]). + /// + /// **Shared-Arc behavior**: when `self.tools` is currently shared + /// (e.g. an in-flight turn cloned the Arc into its tool source), we + /// still refresh `self.tool_specs` / `self.visible_tool_specs` so the + /// provider-facing schema updates immediately. The executable tool + /// registry is refreshed only when `self.tools` has unique ownership. + /// This keeps same-turn routing unblocked while preserving ownership + /// safety for non-cloneable `Box` values. + /// + /// **Return value** — `true` when schema reconciliation succeeded (or + /// no reconcile was needed). Returns `false` only when a non-shared + /// reconcile path failed unexpectedly. + pub fn refresh_delegation_tools(&mut self) -> bool { + use crate::openhuman::agent::harness::definition::AgentDefinitionRegistry; + use crate::openhuman::tools::orchestrator_tools::collect_orchestrator_tools; + + let Some(reg) = AgentDefinitionRegistry::global() else { + // No registry — there's nothing we can do until the + // registry is initialised. The agent's surface stays at + // whatever the builder produced; callers can safely treat + // this as "no reconcile needed right now". + return true; + }; + let Some(def) = reg.get(&self.agent_definition_id) else { + log::debug!( + "[agent] refresh_delegation_tools: definition '{}' not in registry — skipping", + self.agent_definition_id + ); + return true; + }; + if def.subagents.is_empty() { + return true; + } + + let synthed = collect_orchestrator_tools(def, reg, &self.connected_integrations); + let synthed_names: std::collections::HashSet = + synthed.iter().map(|t| t.name().to_string()).collect(); + let synthed_specs: Vec = + synthed.iter().map(|t| t.spec()).collect(); + + // Skip mutation when neither the previous nor the next synthesis + // produced any names — saves work on agents without dynamic + // delegation. + if self.synthesized_tool_names.is_empty() && synthed_names.is_empty() { + return true; + } + + // Mask of the previous synthesis — the names whose `tool_specs` are + // currently live (this set is kept in lock-step with `tool_specs`). + let old_synth = std::mem::take(&mut self.synthesized_tool_names); + + // `tool_specs` are plain data and therefore cloneable; we can always + // reconcile schema even when the Arc is shared. Drop exactly the + // previous synthesised spec set, then append the fresh one. + { + let specs_vec = Arc::make_mut(&mut self.tool_specs); + specs_vec.retain(|s| !old_synth.contains(&s.name)); + specs_vec.extend(synthed_specs); + } + + // `tools` contains non-cloneable trait objects. Reconcile it only when + // uniquely owned. The set of stale synthesised *instances* to drop is + // the previous synthesis (`old_synth`) plus any instances a prior + // shared-Arc refresh couldn't remove (`pending_synthesized_tools_mask`). + let tools_remove_mask: std::collections::HashSet = old_synth + .iter() + .chain(self.pending_synthesized_tools_mask.iter()) + .cloned() + .collect(); + let tools_reconciled = if let Some(tools_vec) = Arc::get_mut(&mut self.tools) { + tools_vec.retain(|t| !tools_remove_mask.contains(t.name())); + tools_vec.extend(synthed); + // `tools` now matches `tool_specs` exactly — nothing pending. + self.pending_synthesized_tools_mask.clear(); + true + } else { + // Schema (`tool_specs`) was updated to the new set, but the stale + // tool *instances* still sit in `self.tools`. Record their names + // so the next unique-owner refresh removes them. Crucially we do + // NOT roll `synthesized_tool_names` back to `old_synth` here — that + // would desync it from `tool_specs` and cause duplicate specs on + // the following refresh (#3044). + self.pending_synthesized_tools_mask = tools_remove_mask; + log::warn!( + "[agent] refresh_delegation_tools: tools Arc is shared — refreshed schema only \ + ({} synthesised tool name(s)); {} stale tool instance(s) pending removal on the next unique-owner refresh", + synthed_names.len(), + self.pending_synthesized_tools_mask.len() + ); + false + }; + + // `visible_tool_names` carries an explicit allowlist for + // [`ToolScope::Named`] agents. Drop the previously-synthesised + // names and add the new ones so the visible set tracks the + // tool list. Wildcard-scope agents keep this empty ("no + // filter") and never need touching. + if !self.visible_tool_names.is_empty() { + for name in &old_synth { + self.visible_tool_names.remove(name); + } + for name in &synthed_names { + self.visible_tool_names.insert(name.clone()); + } + } + + // Rebuild the visible-spec cache from the new tool_specs so the + // next provider call carries the reconciled schema. Dedup + // afterward so a delegate synthesised here (e.g. + // `delegate_name = "research"`) doesn't collide with a + // same-named skill tool on the wire — Anthropic 400s on dup + // tool names where OpenHuman's backend silently accepts. + self.rebuild_tool_policy_session(); + + // Compute add/remove deltas for the log line — useful when + // diagnosing a Composio connect/revoke that should have rebuilt + // the surface but didn't. Materialise to owned `Vec` + // so we can move `synthed_names` into `self.synthesized_tool_names` + // below without the log-statement reborrow blocking the move. + let added: Vec = synthed_names + .iter() + .filter(|n| !old_synth.contains(n.as_str())) + .cloned() + .collect(); + let removed: Vec = old_synth + .iter() + .filter(|n| !synthed_names.contains(n.as_str())) + .cloned() + .collect(); + + // `tool_specs` always reconciled to the new set, so the name mask must + // track that set unconditionally — whether or not `tools` (the + // executable instances) could be reconciled this pass. + self.synthesized_tool_names = synthed_names.clone(); + + log::info!( + "[agent] refresh_delegation_tools: reconciled delegation schema for agent '{}' (display='{}'); now {} synthesised tool name(s); added={:?} removed={:?} tools_reconciled={} pending_tool_instances={}", + self.agent_definition_id, + self.agent_definition_name, + synthed_names.len(), + added, + removed, + tools_reconciled, + self.pending_synthesized_tools_mask.len() + ); + true + } +} diff --git a/src/openhuman/agent/harness/session/turn_checkpoint.rs b/src/openhuman/agent/harness/session/turn_checkpoint.rs index 576658d35b..110fa71939 100644 --- a/src/openhuman/agent/harness/session/turn_checkpoint.rs +++ b/src/openhuman/agent/harness/session/turn_checkpoint.rs @@ -1,7 +1,7 @@ use crate::openhuman::agent::hooks::ToolCallRecord; use crate::openhuman::inference::provider::ChatMessage; -pub(super) fn assistant_message_has_tool_calls(msg: &ChatMessage) -> bool { +pub(crate) fn assistant_message_has_tool_calls(msg: &ChatMessage) -> bool { if msg.role != "assistant" { return false; } diff --git a/src/openhuman/agent/harness/session/turn_tests.rs b/src/openhuman/agent/harness/session/turn_tests.rs index 44090b41c6..81a47632c6 100644 --- a/src/openhuman/agent/harness/session/turn_tests.rs +++ b/src/openhuman/agent/harness/session/turn_tests.rs @@ -7,7 +7,10 @@ use crate::openhuman::agent::tool_policy::{ GeneratedToolRuntimeContext, GeneratedToolRuntimeRisk, ToolPolicy, ToolPolicyDecision, ToolPolicyRequest, }; -use crate::openhuman::inference::provider::{ChatRequest, ChatResponse, Provider, UsageInfo}; +use crate::openhuman::inference::provider::{ + ChatMessage, ChatRequest, ChatResponse, ConversationMessage, Provider, ToolResultMessage, + UsageInfo, +}; use crate::openhuman::memory::Memory; use crate::openhuman::tools::ToolResult; use crate::openhuman::tools::{PermissionLevel, Tool}; diff --git a/src/openhuman/agent/harness/subagent_runner/ops.rs b/src/openhuman/agent/harness/subagent_runner/ops.rs deleted file mode 100644 index 033d0cae14..0000000000 --- a/src/openhuman/agent/harness/subagent_runner/ops.rs +++ /dev/null @@ -1,1947 +0,0 @@ -//! Sub-agent execution entry points and the inner tool-call loop. -//! -//! The public runner lives in [`run_subagent`]. It dispatches to -//! [`run_typed_mode`] (narrow prompt + filtered tools) which builds a -//! brand-new system prompt and a filtered tool list for the requested -//! archetype, then drives provider calls and tool execution until the -//! model returns without further tool calls (or the iteration budget -//! is exhausted). - -use std::collections::HashSet; -use std::sync::Arc; -use std::time::Instant; - -use super::super::fork_context::{current_parent, ParentExecutionContext}; -use super::super::session::transcript; -use super::extract_tool::ExtractFromResultTool; -use super::handoff::{ - build_handoff_placeholder, clean_tool_output, ResultHandoffCache, - HANDOFF_OVERSIZE_THRESHOLD_TOKENS, -}; -use super::tool_prep::{ - build_text_mode_tool_instructions, filter_tool_indices, is_subagent_spawn_tool, - load_prompt_source, top_k_for_toolkit, -}; -use super::types::{SubagentMode, SubagentRunError, SubagentRunOptions, SubagentRunOutcome}; -use crate::openhuman::agent::harness::definition::{ - AgentDefinition, IterationPolicy, PromptSource, -}; -use crate::openhuman::agent::harness::{ - current_spawn_depth, with_current_sandbox_mode, with_spawn_depth, MAX_SPAWN_DEPTH, -}; -use crate::openhuman::context::prompt::{ - render_subagent_system_prompt, PromptContext, PromptTool, SubagentRenderOptions, -}; -use crate::openhuman::file_state::with_file_state_agent_id; -use crate::openhuman::inference::provider::{ChatMessage, ChatRequest, Provider}; -use crate::openhuman::memory_conversations::ConversationMessage; -use crate::openhuman::tools::{Tool, ToolCategory, ToolSpec}; - -/// Prompt suffix injected into every typed sub-agent run. -/// -/// Purpose: -/// - make the child explicitly aware it is acting as a sub-agent -/// - keep delegated outputs concise so parent-context growth stays bounded -/// - discourage verbose restatement of the delegated task/context -const SUBAGENT_ROLE_CONTRACT_SUFFIX: &str = "## Sub-agent Role Contract\n\n\ -You are a sub-agent working for a parent OpenHuman agent, not a direct end-user assistant.\n\ -- Stay tightly scoped to the delegated task.\n\ -- Keep tool arguments and follow-up prompts compact, include only required fields/context.\n\ -- Keep your final response concise and synthesis-ready for the parent, prefer short bullets or short paragraphs.\n\ -- Do not restate the full task/context unless strictly required for correctness.\n\ -\n\ -## Sub-agent Result Contract\n\n\ -Return a compact result with these headings:\n\ -- Answer\n\ -- Evidence used\n\ -- Actions taken\n\ -- Open uncertainties\n\ -- Failed tool calls\n\ -- Recommended next step\n\ -\n\ -Do not include facts in Answer that are not supported by Evidence used or Actions taken.\n\ -If a tool result was truncated, partial, or too large to inspect fully, say so under Open uncertainties and do not treat it as complete.\n"; - -fn append_subagent_role_contract(base_prompt: String, agent_id: &str) -> String { - if base_prompt.contains(SUBAGENT_ROLE_CONTRACT_SUFFIX.trim()) { - tracing::debug!( - agent_id = %agent_id, - base_chars = base_prompt.chars().count(), - "[subagent_runner] sub-agent role contract already present in system prompt" - ); - return base_prompt; - } - - let mut prompt = base_prompt; - if !prompt.ends_with('\n') { - prompt.push('\n'); - } - prompt.push('\n'); - prompt.push_str(SUBAGENT_ROLE_CONTRACT_SUFFIX); - - tracing::debug!( - agent_id = %agent_id, - suffix_chars = SUBAGENT_ROLE_CONTRACT_SUFFIX.chars().count(), - final_chars = prompt.chars().count(), - "[subagent_runner] appended sub-agent role contract to system prompt" - ); - - prompt -} - -/// Resolve a sub-agent's `(provider, model)` based on its declarative -/// `[model]` spec. -/// -/// - inline `model` override — highest precedence for one call. -/// - config-level pin — `[orchestrator] model` or `[teams.*]` -/// `lead_model` / `agent_model`, when present. -/// - `Inherit` — use the parent's provider AND model. Literally -/// "do what the parent does". -/// - `Hint(workload)` — build a fresh provider via the per-workload -/// factory (e.g. `integrations_agent`'s `[model] hint = "agentic"` -/// resolves to whatever `agentic_provider` is routed to in -/// AI Settings). The factory returns the *exact* model id for that -/// workload — the OpenHuman backend and every third-party provider -/// accept exact model names, so there's no `{hint}-v1` synthesis -/// anywhere on this path. -/// - `Exact(name)` — escape hatch: use the parent's provider with -/// this model name overriding the parent's. Callers are expected -/// to know the model is valid for the parent's provider; the enum -/// is the wrong place to encode provider switching, which belongs -/// to `Hint` + AI-settings routing. -/// -/// `config` is `None` when the live `Config::load_or_init()` failed -/// (rare — transient I/O). Both `None` config and factory build errors -/// fall back to `(parent_provider, parent_model)` so a config glitch -/// can't sink sub-agent execution entirely. -/// -/// The async part (config load) is hoisted out of the caller so this -/// helper stays sync and can be exercised by a focused unit test -/// without spinning up a `tokio::test` runtime per case. -pub(super) fn resolve_subagent_provider( - spec: &crate::openhuman::agent::harness::definition::ModelSpec, - agent_id: &str, - config: Option<&crate::openhuman::config::Config>, - parent_provider: std::sync::Arc, - parent_model: String, - is_team_lead: bool, - model_override: Option<&str>, -) -> (std::sync::Arc, String) { - use crate::openhuman::agent::harness::definition::ModelSpec; - if let Some(model) = model_override - .map(str::trim) - .filter(|model| !model.is_empty()) - { - log::debug!( - "[subagent_runner] agent_id={} using inline model override model={}", - agent_id, - model - ); - return (parent_provider, model.to_string()); - } - - if let Some(model) = config.and_then(|cfg| cfg.configured_agent_model(agent_id, is_team_lead)) { - log::debug!( - "[subagent_runner] agent_id={} using config-level model pin model={}", - agent_id, - model - ); - return (parent_provider, model.to_string()); - } - - match spec { - ModelSpec::Hint(workload) => match config { - Some(cfg) => { - match crate::openhuman::inference::provider::create_chat_provider(workload, cfg) { - Ok((p, m)) => { - log::info!( - "[subagent_runner] role={} agent_id={} resolved via workload factory model={}", - workload, agent_id, m - ); - (std::sync::Arc::from(p), m) - } - Err(e) => { - let suggested_key = match workload.as_str() { - "summarization" | "memory" => "memory_provider".to_string(), - _ => format!("{workload}_provider"), - }; - log::warn!( - "[subagent_runner] workload='{}' provider build failed for agent_id={} error='{}' \ - falling back to parent provider (parent_model='{}'). \ - Consider setting {} in config.", - workload, - agent_id, - e, - parent_model, - suggested_key - ); - (parent_provider, parent_model) - } - } - } - None => { - log::warn!( - "[subagent_runner] config load failed for workload '{}' (agent_id={}) — \ - falling back to parent provider + parent model '{}'", - workload, - agent_id, - parent_model - ); - (parent_provider, parent_model) - } - }, - ModelSpec::Inherit => (parent_provider, parent_model), - ModelSpec::Exact(name) => (parent_provider, name.clone()), - } -} - -/// Lazy resolver that lets `integrations_agent` recover when the model -/// calls a Composio action slug that exists in the bound toolkit's full -/// catalogue but was filtered out of the up-front fuzzy top-K. On a -/// match we build the [`ComposioActionTool`] on demand so the call -/// dispatches normally instead of dead-ending in -/// `Error: tool '...' is not available`. -/// -/// Holds an [`Arc`] rather than a pre-baked -/// [`crate::openhuman::composio::ComposioClient`] so the live -/// `composio.mode` toggle is honoured per execute — see -/// [`crate::openhuman::composio::ComposioActionTool`] and issue #1710. -struct LazyToolkitResolver { - config: std::sync::Arc, - actions: Vec, -} - -impl LazyToolkitResolver { - fn resolve(&self, name: &str) -> Option> { - let action = self.find_action(name)?; - Some(Box::new( - crate::openhuman::composio::ComposioActionTool::new( - self.config.clone(), - action.name.clone(), - action.description.clone(), - action.parameters.clone(), - ), - )) - } - - /// Match a model-supplied tool name to a real toolkit action, tolerant - /// of the near-miss slugs models routinely emit — case differences and - /// separator/prefix drift (bug-report-2026-05-26 A2). Tries, in order: - /// exact, case-insensitive, then a normalized alphanumeric match - /// (accepted only when **unique**, so a fabricated slug can't silently - /// resolve to the wrong action — those still fall through to the - /// "tool not available" error, which lists `known_slugs` for the model - /// to self-correct). - fn find_action( - &self, - name: &str, - ) -> Option<&crate::openhuman::context::prompt::ConnectedIntegrationTool> { - if let Some(action) = self.actions.iter().find(|a| a.name == name) { - return Some(action); - } - if let Some(action) = self - .actions - .iter() - .find(|a| a.name.eq_ignore_ascii_case(name)) - { - tracing::debug!( - requested = %name, - matched = %action.name, - "[subagent_runner] resolved tool by case-insensitive match" - ); - return Some(action); - } - let norm = normalize_slug(name); - if !norm.is_empty() { - let mut matches = self - .actions - .iter() - .filter(|a| normalize_slug(&a.name) == norm); - if let Some(action) = matches.next() { - if matches.next().is_none() { - tracing::info!( - requested = %name, - matched = %action.name, - "[subagent_runner] resolved tool by normalized-slug match" - ); - return Some(action); - } - // Ambiguous: 2+ actions normalize to the same slug (e.g. - // `read_file` and `ReadFile` → `readfile`). We deliberately - // refuse to guess. Warn (not debug): a slug collision is a - // toolkit configuration anomaly that should surface in normal - // operator logs, not stay hidden behind debug filtering. - tracing::warn!( - requested = %name, - norm = %norm, - "[subagent_runner] ambiguous normalized-slug match — multiple actions resolve to the same slug; not resolving" - ); - } - } - None - } - - /// Slugs from the bound toolkit, for inclusion in unknown-tool - /// errors so the model can self-correct without burning a turn. - fn known_slugs(&self) -> Vec<&str> { - self.actions.iter().map(|a| a.name.as_str()).collect() - } -} - -/// Lowercased, non-alphanumerics stripped — collapses separator/prefix -/// drift (`GOOGLESLIDES_BATCH_UPDATE` vs `googleslides_batch_update`) so -/// near-miss tool slugs still resolve, while genuinely different slugs -/// (e.g. a hallucinated `GMAIL_GET_LAST_3_MESSAGES`) stay distinct. -fn normalize_slug(s: &str) -> String { - s.chars() - .filter(|c| c.is_ascii_alphanumeric()) - .map(|c| c.to_ascii_lowercase()) - .collect() -} - -/// Run a sub-agent based on its definition and a task prompt. -/// -/// This is the primary entry point for agent delegation. It performs the following: -/// 1. Resolves the [`ParentExecutionContext`] task-local. -/// 2. Generates a unique `task_id` if one wasn't provided. -/// 3. Dispatches to `run_typed_mode`. -/// -/// On success returns a [`SubagentRunOutcome`] whose `output` is the -/// final assistant text. On failure the error is suitable for stringifying -/// into a `tool_result` block. -pub async fn run_subagent( - definition: &AgentDefinition, - task_prompt: &str, - options: SubagentRunOptions, -) -> Result { - // Unconditionally heap-allocate the entire run_subagent body so - // every caller — `dispatch_subagent`, `delegate_to_personality`, - // `spawn_subagent`, `spawn_parallel_agents`, `spawn_worker_thread`, - // `continue_subagent`, `escalation`, `payload_summarizer`, - // `session/turn.rs` extraction path, `agent_orchestration::ops`, and - // the recursive case from a sub-agent's own tool — doesn't have to - // carry this future's state inline. Tools that delegate run inside - // the parent agent's already-deep `run_turn_engine` poll, so the - // parent's stack would otherwise pile (parent engine state + - // dispatch_subagent state + run_subagent's wrapper state + - // run_typed_mode state + child engine state) onto tokio's 2 MiB - // worker stack and abort with "thread 'tokio-rt-worker' has - // overflowed its stack, fatal runtime error: stack overflow" - // — observed at `[subagent_runner] dispatching agent_id=researcher - // ...` in the `chat-harness-subagent` Playwright lane crash. The - // inner `Box::pin`s around `run_typed_mode` / `run_inner_loop` / - // child `run_turn_engine` further chunk the child's state so a - // single sub-agent run can't blow the stack either. - Box::pin(async move { - let parent = current_parent().ok_or(SubagentRunError::NoParentContext)?; - let task_id = options - .task_id - .clone() - .unwrap_or_else(|| format!("sub-{}", uuid::Uuid::new_v4())); - let started = Instant::now(); - let current_depth = current_spawn_depth(); - let attempted_depth = current_depth.saturating_add(1); - - if attempted_depth > MAX_SPAWN_DEPTH { - tracing::warn!( - agent_id = %definition.id, - task_id = %task_id, - current_depth, - attempted_depth, - max_depth = MAX_SPAWN_DEPTH, - "[subagent_runner] spawn depth exceeded" - ); - return Err(SubagentRunError::SpawnDepthExceeded { - attempted_depth, - max_depth: MAX_SPAWN_DEPTH, - }); - } - - tracing::info!( - agent_id = %definition.id, - task_id = %task_id, - spawn_depth = attempted_depth, - max_spawn_depth = MAX_SPAWN_DEPTH, - prompt_chars = task_prompt.chars().count(), - skill_filter = ?options.skill_filter_override.as_deref().or(definition.skill_filter.as_deref()), - "[subagent_runner] dispatching" - ); - - // Install the sub-agent's declared `sandbox_mode` as the active - // task-local for every tool invocation inside this run. Tools - // that want to gate on it (e.g. `composio_execute` rejecting - // Write/Admin slugs under `ReadOnly`) read it via - // `current_sandbox_mode()`; tools that don't care just ignore - // it. Box-pin the inner future so the large `run_typed_mode` - // state machine lives on the heap (#2234 CI failure under - // `cargo-llvm-cov`). - let mut outcome = with_spawn_depth(attempted_depth, async { - with_file_state_agent_id(task_id.clone(), async { - with_current_sandbox_mode(definition.sandbox_mode, async { - Box::pin(run_typed_mode( - definition, - task_prompt, - &options, - &parent, - &task_id, - )) - .await - }) - .await - }) - .await - }) - .await?; - - // Truncate result to the definition's cap if set. - // Use char-count (not byte-length) to avoid panicking on - // multi-byte UTF-8 sequences at the truncation boundary. - if let Some(cap) = definition.max_result_chars { - let original_chars = outcome.output.chars().count(); - if original_chars > cap { - tracing::debug!( - agent_id = %definition.id, - original_chars, - cap, - "[subagent_runner] truncating oversized result to max_result_chars cap" - ); - // Find the byte offset of the cap-th character boundary - // so `truncate` never lands mid-codepoint. - let byte_offset = outcome - .output - .char_indices() - .nth(cap) - .map(|(i, _)| i) - .unwrap_or(outcome.output.len()); - outcome.output.truncate(byte_offset); - outcome.output.push_str("\n[...truncated]"); - } - } - - tracing::info!( - agent_id = %definition.id, - task_id = %task_id, - spawn_depth = attempted_depth, - elapsed_ms = outcome.elapsed.as_millis() as u64, - iterations = outcome.iterations, - output_chars = outcome.output.chars().count(), - "[subagent_runner] completed" - ); - - let _ = started; // silence unused-warning if logging is compiled out - Ok(outcome) - }) - .await -} - -// ───────────────────────────────────────────────────────────────────────────── -// Typed mode — narrow prompt, filtered tools, cheaper model -// ───────────────────────────────────────────────────────────────────────────── - -/// Deduplicate assembled tool specs by name, keeping the first occurrence. -/// -/// The sub-agent's `filtered_specs` is a `Vec` assembled from -/// `parent.all_tool_specs` indices plus dynamic tools, so a delegation tool can -/// shadow a same-named skill/integration tool (common for the wide-set -/// `tools_agent`), leaving two specs with the same name. Strict providers reject -/// such a request with `400 "Tool names must be unique."` The main-agent path -/// dedups via [`session::builder::dedup_visible_tool_specs`]; this separate -/// sub-agent assembly must do the same. -/// -/// First occurrence wins so registration-order semantics are preserved (tool -/// dispatch still resolves by name). Dropped duplicates are logged at `debug` -/// (diagnostic instrumentation, per the repo Rust logging guideline). -/// -/// Extracted as a free function so the regression suite can exercise the dedup -/// without standing up the full `run_typed_mode` plumbing. -fn dedup_tool_specs_by_name(agent_id: &str, specs: Vec) -> Vec { - let mut seen: HashSet = HashSet::with_capacity(specs.len()); - let mut deduped: Vec = Vec::with_capacity(specs.len()); - let mut dropped: Vec = Vec::new(); - for spec in specs { - if seen.insert(spec.name.clone()) { - deduped.push(spec); - } else { - dropped.push(spec.name); - } - } - if !dropped.is_empty() { - tracing::debug!( - agent_id = %agent_id, - "[subagent_runner] dropped {} duplicate tool spec(s) before sending to provider: {:?}", - dropped.len(), - dropped - ); - } - deduped -} - -/// Execute a sub-agent in "Typed" mode. -/// -/// This mode builds a brand-new, minimized system prompt specifically for the -/// agent's archetype. It filters the parent's tools down to only those allowed -/// by the definition and per-spawn overrides. -async fn run_typed_mode( - definition: &AgentDefinition, - task_prompt: &str, - options: &SubagentRunOptions, - parent: &ParentExecutionContext, - task_id: &str, -) -> Result { - let started = Instant::now(); - - // Resolve provider + model. See `resolve_subagent_provider` for the - // semantics of each ModelSpec variant. `Config::load_or_init()` is - // async so the load is hoisted out of the helper — the helper itself - // is sync and unit-tested. - let config_loaded = crate::openhuman::config::Config::load_or_init().await; - let (subagent_provider, model) = resolve_subagent_provider( - &definition.model, - &definition.id, - config_loaded.as_ref().ok(), - parent.provider.clone(), - parent.model_name.clone(), - !definition.subagents.is_empty(), - options.model_override.as_deref(), - ); - let temperature = definition.temperature; - - // Archetype prompt loading is deferred until AFTER tool filtering so - // dynamic builders receive the final, filtered tool list (rather - // than the parent's full registry). The actual - // `load_prompt_source(...)` call lives just above - // `render_subagent_system_prompt` below. - - // ── Refresh connected-integrations at spawn time ─────────────────── - // - // The parent session's `connected_integrations` Vec is frozen at - // session-start (see `session/turn.rs::fetch_connected_integrations`, - // which only runs while `history.is_empty()` to preserve the - // KV-cache prefix). That means a toolkit the user authorised mid- - // thread — e.g. Calendly — is missing from `parent.connected_integrations`, - // and the spawn-time toolkit lookup further down rejects it as - // "not allowlisted / not connected" until the user starts a new - // thread or restarts the app. - // - // Re-fetch from the global integrations cache here. The cache is - // invalidated by `ComposioConnectionCreatedSubscriber` once the - // OAuth handshake reaches ACTIVE/CONNECTED, so this call returns - // the fresh list almost for free on the warm path. Fall back to - // the parent's frozen list when the live fetch returns empty (no - // signed-in user, backend unreachable, …) so offline / not-signed- - // in behaviour is unchanged. - let live_integrations: Vec = { - // Mode-aware "is the user able to call composio at all?" probe. - // `create_composio_client` returns `Ok(_)` whenever the user has - // EITHER a backend session token (backend mode) OR a stored - // direct-mode API key — so a direct-mode user with only a key - // in the keychain is now correctly recognised as "signed in" - // for the spawn-time refresh path (#1710 Wave 2). Pre-fix this - // gate read `parent.composio_client.is_none()`, which was only - // ever populated in backend mode and silently skipped the live - // refresh for direct-mode users. - // - // We resolve here purely as a probe — the client itself is - // dropped immediately. Per-action dispatch below (and inside - // `ComposioActionTool::execute`) re-resolves through the - // factory so the live `composio.mode` toggle keeps winning. - let probe_config = crate::openhuman::config::Config::load_or_init().await.ok(); - let signed_in = probe_config - .as_ref() - .map(user_is_signed_in_to_composio) - .unwrap_or(false); - if !signed_in { - parent.connected_integrations.clone() - } else { - match crate::openhuman::config::Config::load_or_init().await { - Ok(config) => { - use crate::openhuman::composio::FetchConnectedIntegrationsStatus; - // `fetch_connected_integrations_status` distinguishes - // an authoritative empty list (user disconnected - // their last integration mid-thread) from - // backend-unavailable (no client / transient error). - // Adopt the authoritative case as truth — even when - // empty — so a revoked toolkit really disappears - // from the spawn pre-flight; only fall back to the - // parent's frozen list when the backend explicitly - // can't answer. - match crate::openhuman::composio::fetch_connected_integrations_status(&config) - .await - { - FetchConnectedIntegrationsStatus::Authoritative(fresh) => { - tracing::debug!( - count = fresh.len(), - parent_count = parent.connected_integrations.len(), - "[subagent_runner] refreshed connected_integrations at spawn time" - ); - fresh - } - FetchConnectedIntegrationsStatus::Unavailable => { - tracing::debug!( - "[subagent_runner] integrations backend unavailable; falling back to parent's frozen list" - ); - parent.connected_integrations.clone() - } - } - } - Err(e) => { - // Real failure — config couldn't be read, so the - // backend client can't be built either. Use the - // parent's frozen list as a best-effort fallback so - // the spawn can still proceed for sessions that - // were established when config was healthy. - tracing::debug!( - error = %e, - "[subagent_runner] config load failed; falling back to parent's frozen integrations list" - ); - parent.connected_integrations.clone() - } - } - } - }; - - // ── Filter tools per definition + per-spawn override ─────────────── - let toolkit_filter = options.toolkit_override.as_deref(); - let mut allowed_indices = filter_tool_indices( - &parent.all_tools, - &definition.tools, - &definition.disallowed_tools, - options - .skill_filter_override - .as_deref() - .or(definition.skill_filter.as_deref()), - ); - - // Sub-agents must never spawn their own sub-agents. Nested spawns - // create a recursion tree the harness doesn't budget, observe, or - // cost-attribute — and historically produced runaway dispatch loops - // (e.g. summarizer → summarizer → …). The orchestrator is the only - // node that delegates; every archetype running here is, by - // definition, a sub-agent. Strip `spawn_subagent` and every - // synthesised `delegate_*` tool regardless of the archetype's - // declared scope. This is belt-and-braces: archetype definitions - // should not list these tools either, but we enforce it here so a - // misconfigured TOML can't bypass the rule. - let before = allowed_indices.len(); - allowed_indices.retain(|&i| { - let name = parent.all_tools[i].name(); - !is_subagent_spawn_tool(name) && name != "spawn_worker_thread" - }); - let stripped = before - allowed_indices.len(); - if stripped > 0 { - tracing::debug!( - agent_id = %definition.id, - stripped, - "[subagent_runner] removed sub-agent spawn tools from sub-agent's tool surface" - ); - } - - // ── Force-include extra_tools ────────────────────────────────────── - // - // `extra_tools` is a simple "also include these" hook that bypasses - // [`ToolScope`] / [`AgentDefinition::skill_filter`] but still honours - // `disallowed_tools`. Historically this was the bypass list for the - // now-removed `category_filter`; it remains useful for custom - // definitions that want to add a couple of named tools on top of a - // narrow scope. - if !definition.extra_tools.is_empty() { - let disallow_set: std::collections::HashSet<&str> = definition - .disallowed_tools - .iter() - .map(|s| s.as_str()) - .collect(); - for (i, tool) in parent.all_tools.iter().enumerate() { - let name = tool.name(); - if definition.extra_tools.iter().any(|n| n == name) - && !allowed_indices.contains(&i) - && !disallow_set.contains(name) - // `extra_tools` cannot be used to bypass the sub-agent - // spawn guard above — a stray TOML entry listing - // `spawn_subagent` there must still be dropped. - && !is_subagent_spawn_tool(name) - { - allowed_indices.push(i); - } - } - } - - // ── Dynamic per-action toolkit tools (integrations_agent + toolkit) ────── - // - // When `integrations_agent` is spawned with a `toolkit` argument (e.g. - // `toolkit="gmail"`), build one [`ComposioActionTool`] per action - // in that toolkit and inject them into the sub-agent's tool list. - // Each carries the action's real JSON schema, so the LLM's native - // tool-calling path validates arguments before they hit the wire - // — no more "guess parameters from prose then dispatch through - // composio_execute" round-trips. - // - // Generic dispatchers (`composio_execute`, `composio_list_tools`) - // are stripped from the parent-filtered indices in this path so - // the model only sees one way to call each action. - let mut dynamic_tools: Vec> = Vec::new(); - let mut lazy_resolver: Option = None; - let is_integrations_agent_with_toolkit = - definition.id == "integrations_agent" && toolkit_filter.is_some(); - - // `tools_agent` is the Composio-free counterpart to - // `integrations_agent`: it inherits the orchestrator's wildcard - // scope but must never see Workflow-category tools. Stripping them - // here (before any dynamic additions) keeps the parent-fed - // `allowed_indices` clean of composio_* meta-tools and - // toolkit-specific action tools. Delegation to integrations_agent - // is the orchestrator's job, not this agent's. - if definition.id == "tools_agent" { - allowed_indices.retain(|&i| parent.all_tools[i].category() != ToolCategory::Workflow); - } - - if is_integrations_agent_with_toolkit { - // Tool visibility is fully governed by the TOML scope - // (`agent.tools.named = [...]` on the integrations_agent - // definition) plus the dynamic per-action ComposioActionTools - // injected below. Anything the agent author explicitly named - // in the TOML is kept as-is — no extra stripping here. - // Previously we dropped every Workflow-category tool at this - // point, which also dropped `composio_list_tools` / - // `composio_execute` whenever they were declared in the TOML, - // making the TOML changes look like no-ops. - - if let Some(tk) = toolkit_filter { - // Load a fresh `Arc` for the dynamic - // `ComposioActionTool`s registered below. Pre-Wave-2 this - // path was gated on `parent.composio_client.as_ref()` — - // backend-only by construction, so direct-mode users were - // silently dropped here even after they'd connected the - // toolkit on `app.composio.dev`. Resolving the client - // through the mode-aware factory closes that gap and keeps - // the registration in lockstep with `ComposioActionTool`'s - // per-call dispatch (#1710). - let arc_config = match crate::openhuman::config::Config::load_or_init().await { - Ok(c) => std::sync::Arc::new(c), - Err(e) => { - tracing::warn!( - agent_id = %definition.id, - toolkit = %tk, - error = %e, - "[subagent_runner:typed] config load failed; dynamic composio tools won't be registered" - ); - return Err(SubagentRunError::Provider(anyhow::anyhow!( - "subagent_runner: config load failed building integrations_agent for toolkit `{tk}`: {e}" - ))); - } - }; - - // Resolve the live client kind for the catalogue refresh - // path. Backend mode keeps the existing - // `fetch_toolkit_actions` round-trip. Direct mode mirrors - // the `ComposioListToolsTool` short-circuit — the backend - // toolkit allowlist isn't authoritative for a personal - // Composio tenant, so we fall back to the parent's cached - // catalogue rather than emit a misleading "couldn't fetch" - // surface (#1710 Wave 2). - use crate::openhuman::composio::client::{create_composio_client, ComposioClientKind}; - let client_kind = match create_composio_client(arc_config.as_ref()) { - Ok(k) => Some(k), - Err(e) => { - tracing::warn!( - agent_id = %definition.id, - toolkit = %tk, - error = %e, - "[subagent_runner:typed] composio factory failed; dynamic per-action tools fall back to cached catalogue" - ); - None - } - }; - - // The spawn_subagent pre-flight already verified the - // toolkit is in the allowlist AND has an active - // connection, so the matching entry must be present and - // marked connected. Defensive lookup anyway. Reads from - // `live_integrations` (refreshed above) rather than the - // session-frozen `parent.connected_integrations` so a - // mid-thread `composio_authorize` is visible without a - // new thread / restart. - if let Some(cached_integration) = live_integrations - .iter() - .find(|ci| ci.connected && ci.toolkit.eq_ignore_ascii_case(tk)) - { - // Refresh the toolkit's action catalogue at spawn time - // by calling `composio_list_tools` for the bound toolkit. - // The cached list on `parent.connected_integrations` - // comes from the session-start bulk fetch, which can - // return zero actions for some toolkits even when the - // per-toolkit endpoint returns a full catalogue. Falling - // back to the cached list preserves the previous - // behaviour on network failure. - let fresh_actions = match &client_kind { - Some(ComposioClientKind::Backend(client)) => { - match crate::openhuman::composio::fetch_toolkit_actions(client, tk, None) - .await - { - Ok(actions) if !actions.is_empty() => actions, - Ok(_) => { - tracing::debug!( - agent_id = %definition.id, - toolkit = %tk, - "[subagent_runner:typed] fresh list_tools returned empty; falling back to cached catalogue" - ); - cached_integration.tools.clone() - } - Err(e) => { - tracing::warn!( - agent_id = %definition.id, - toolkit = %tk, - error = %e, - "[subagent_runner:typed] fresh list_tools failed; falling back to cached catalogue" - ); - cached_integration.tools.clone() - } - } - } - Some(ComposioClientKind::Direct(_)) => { - // Direct mode has no backend-allowlist catalogue - // refresh path — the personal Composio tenant - // governs availability. Mirror the - // `ComposioListToolsTool` direct-mode short- - // circuit and fall back to the cached catalogue - // bulk-fetched at session start (#1710 Wave 2). - tracing::info!( - agent_id = %definition.id, - toolkit = %tk, - cached_actions = cached_integration.tools.len(), - "[composio-direct] subagent_runner:typed: direct mode active — using cached catalogue, skipping backend list_tools refresh" - ); - cached_integration.tools.clone() - } - None => { - tracing::debug!( - agent_id = %definition.id, - toolkit = %tk, - cached_actions = cached_integration.tools.len(), - "[subagent_runner:typed] composio client unavailable; using cached catalogue" - ); - cached_integration.tools.clone() - } - }; - let integration = crate::openhuman::context::prompt::ConnectedIntegration { - toolkit: cached_integration.toolkit.clone(), - description: cached_integration.description.clone(), - tools: fresh_actions, - // Inherit the cached gated set: this spawn path only - // refreshes the *visible* (callable) actions from the - // backend; the gated/unlock-hint surface is computed - // by `fetch_connected_integrations_uncached` against - // the user pref and doesn't change per-spawn. - gated_tools: cached_integration.gated_tools.clone(), - connected: cached_integration.connected, - // Inherit the cached non-active status — this spawn - // path only fires on connected toolkits, but keep the - // field consistent with the source row for #2365. - non_active_status: cached_integration.non_active_status.clone(), - }; - let integration = &integration; - // Fuzzy-filter the toolkit's actions against the task prompt - // so large catalogues (e.g. github ~500 actions) are narrowed - // to the handful actually relevant to this delegation. The - // orchestrator's `SkillDelegationTool` schema forces the - // prompt to be a clear, context-rich instruction, so it's a - // reliable matching target. - // - // Heavy-schema toolkits (Gmail, Notion, GitHub, Salesforce, - // HubSpot, Google Workspace, Microsoft Teams) ship per-action - // JSON schemas so dense that even a moderate top-K blows the - // request past Fireworks' 65 535-rule grammar cap in native - // mode and the 196 607-token context cap in text mode. Tight - // top-K of 12 keeps those toolkits inside both ceilings while - // still giving the fuzzy scorer room for adjacent matches. - // Lighter toolkits (reddit, slack, linear, telegram, …) keep - // the looser top-K of 25. - // - // Fallback: if the filter yields fewer than - // `MIN_CONFIDENT_HITS` results, register every action. A - // too-narrow filter is worse than none — it starves the - // sub-agent and forces it to guess. - let top_k = top_k_for_toolkit(tk); - let filter_hits = super::super::tool_filter::filter_actions_by_prompt( - task_prompt, - &integration.tools, - top_k, - ); - let selected: Vec<&crate::openhuman::context::prompt::ConnectedIntegrationTool> = - if filter_hits.len() >= super::super::tool_filter::MIN_CONFIDENT_HITS { - tracing::info!( - agent_id = %definition.id, - toolkit = %tk, - total = integration.tools.len(), - kept = filter_hits.len(), - top_k = top_k, - "[subagent_runner:typed] fuzzy tool filter narrowed toolkit" - ); - filter_hits.iter().map(|&i| &integration.tools[i]).collect() - } else { - tracing::info!( - agent_id = %definition.id, - toolkit = %tk, - total = integration.tools.len(), - filter_hits = filter_hits.len(), - "[subagent_runner:typed] fuzzy filter thin; falling back to full toolkit" - ); - integration.tools.iter().collect() - }; - - for action in selected { - dynamic_tools.push(Box::new( - crate::openhuman::composio::ComposioActionTool::new( - arc_config.clone(), - action.name.clone(), - action.description.clone(), - action.parameters.clone(), - ), - )); - } - tracing::debug!( - agent_id = %definition.id, - toolkit = %tk, - action_count = dynamic_tools.len(), - "[subagent_runner:typed] dynamically registered per-action composio tools" - ); - // Stash the full catalogue so the inner loop can lazily - // register actions that the fuzzy top-K dropped — the - // model often picks the right slug anyway and the - // existing fuzzy filter exists only to keep schemas out - // of the system prompt, not to gate execution. - lazy_resolver = Some(LazyToolkitResolver { - config: arc_config.clone(), - actions: integration.tools.clone(), - }); - } else { - tracing::warn!( - agent_id = %definition.id, - toolkit = %tk, - "[subagent_runner:typed] toolkit not found among parent's connected integrations; sub-agent will have no callable actions (spawn_subagent pre-flight should have caught this)" - ); - } - } - } - - // ── Progressive-disclosure handoff cache ─────────────────────────── - // - // Built only for integrations_agent-with-toolkit because that's the only - // typed sub-agent that regularly calls external tools capable of - // returning megabyte-scale payloads (Composio actions). Every other - // typed sub-agent gets `None` and its tool results stay inline. - // - // When enabled, oversized tool results get stashed into this cache - // and their place in history is taken by a short placeholder (see - // `build_handoff_placeholder`). The sub-agent can then call the - // companion `extract_from_result` tool below to run a direct - // provider call against the cached payload with a targeted query. - // Lazy / pay-per-question, so trivial asks answerable from the - // preview don't pay any extra LLM cost. - let handoff_cache: Option> = if is_integrations_agent_with_toolkit { - let cache = Arc::new(ResultHandoffCache::new()); - - // `extract_from_result` is now a pure tool — it takes the - // parent's provider and calls `chat_with_system` directly - // against the extraction model, instead of spawning the - // `summarizer` sub-agent. Removes an entire layer of harness - // scaffolding (system prompt assembly, tool-loop, recursion - // guards) that this workload never needed. - // - // Transcript plumbing: the extraction LLM still costs tokens, - // so each call writes a self-contained transcript under - // `session_raw/DDMMYYYY/` (and its companion `.md`) keyed by - // the parent chain, to match the rest of the session tree. - let parent_chain = match parent.session_parent_prefix.as_deref() { - Some(prefix) => format!("{}__{}", prefix, parent.session_key), - None => parent.session_key.clone(), - }; - dynamic_tools.push(Box::new(ExtractFromResultTool::new( - cache.clone(), - parent.provider.clone(), - parent.workspace_dir.clone(), - parent_chain, - definition.id.clone(), - ))); - tracing::debug!( - agent_id = %definition.id, - "[subagent_runner:typed] registered extract_from_result tool + handoff cache" - ); - - Some(cache) - } else { - None - }; - - // Build provider-visible tool schemas in EXECUTION-PRECEDENCE order: - // `dynamic_tools` (extra_tools at runtime) before parent specs, because - // the inner loop's name lookup (see end of this fn) resolves - // `extra_tools` first and only falls back to `parent_tools`. Aligning - // the dedup order with the runtime lookup order guarantees the schema - // the model sees and the tool that actually executes describe the same - // behaviour. (CodeRabbit review on PR #2446.) - let mut filtered_specs: Vec = dynamic_tools.iter().map(|t| t.spec()).collect(); - filtered_specs.extend( - allowed_indices - .iter() - .map(|&i| parent.all_tool_specs[i].clone()), - ); - let mut allowed_names: HashSet = allowed_indices - .iter() - .map(|&i| parent.all_tools[i].name().to_string()) - .collect(); - // Dynamic tool names must also be in the allowlist so the inner loop - // accepts model tool_calls that reference them. - for tool in &dynamic_tools { - allowed_names.insert(tool.name().to_string()); - } - // Dedup by name: first occurrence wins. Dynamic Composio action tools - // can share a name with an inherited parent-registry spec when the - // agent's AllowedAll scope includes a same-named skill tool. Some - // providers (Anthropic, OpenHuman cloud after the uniqueness-enforcement - // rollout) 400 on duplicate tool names — see TAURI-RUST-4. Because - // `filtered_specs` is in execution order (dynamic first), the kept - // schema matches what the runtime will actually dispatch. - let filtered_specs = - crate::openhuman::agent::harness::session::dedup_visible_tool_specs(filtered_specs); - - // Dedup by tool name before the specs reach the provider (see - // `dedup_tool_specs_by_name` for why duplicates appear here). - let filtered_specs = dedup_tool_specs_by_name(&definition.id, filtered_specs); - - tracing::debug!( - agent_id = %definition.id, - model = %model, - tool_count = allowed_names.len(), - max_iterations = definition.effective_max_iterations(), - iteration_policy = ?definition.iteration_policy, - "[subagent_runner:typed] resolved configuration" - ); - - // ── Build the narrow system prompt ───────────────────────────────── - // - // The renderer lives in `context::prompt` alongside the rest of - // the system-prompt code so all prompt assembly has one home. - // We still use the purpose-built narrow renderer rather than the - // general `SystemPromptBuilder::for_subagent` because the builder - // requires a slice of `Box` and we only have indices - // into the parent's vec (Box isn't Clone, so we can't build an - // owning filtered slice cheaply). - // - // Per-definition omit_* flags are threaded through via - // `SubagentRenderOptions` — previously the narrow renderer - // hard-coded all three as "omit", which silently downgraded - // definitions like `code_executor` / `tool_maker` / `integrations_agent` - // that set `omit_safety_preamble = false`. - let render_options = SubagentRenderOptions::from_definition_flags( - definition.omit_identity, - definition.omit_safety_preamble, - definition.omit_skills_catalog, - definition.omit_profile, - definition.omit_memory_md, - ); - - // Sub-agent prompt rendering: only ever surface CONNECTED - // integrations. When narrowed to a specific toolkit, we further - // restrict to that one entry. Not-connected entries belong only - // in the orchestrator's Delegation Guide; they have no place in - // a sub-agent that's actually executing work. - let narrowed_integrations: Vec = - match toolkit_filter { - Some(tk) => live_integrations - .iter() - .filter(|ci| ci.connected && ci.toolkit.eq_ignore_ascii_case(tk)) - .cloned() - .collect(), - None => live_integrations - .iter() - .filter(|ci| ci.connected) - .cloned() - .collect(), - }; - // ── Resolve archetype prompt body (post-filter) ──────────────────── - // - // Build a live [`PromptContext`] — same shape the main agent uses - // on every turn — so `Dynamic` builders can compose the full - // system prompt via the section helpers in - // [`crate::openhuman::context::prompt`]. `Inline` / `File` sources - // continue to use the legacy `render_subagent_system_prompt` - // wrapper. - let prompt_tools: Vec> = allowed_indices - .iter() - .map(|&i| { - let t = parent.all_tools[i].as_ref(); - PromptTool { - name: t.name(), - description: t.description(), - parameters_schema: Some(t.parameters_schema().to_string()), - } - }) - .chain(dynamic_tools.iter().map(|t| PromptTool { - name: t.name(), - description: t.description(), - parameters_schema: Some(t.parameters_schema().to_string()), - })) - .collect(); - // Derive the visible-tool set from the prompt tool list so prompt - // sections that gate on `visible_tool_names` (e.g. tool-protocol - // notes) see exactly what the model sees, rather than an empty set. - let visible_tool_names: std::collections::HashSet = - prompt_tools.iter().map(|t| t.name.to_string()).collect(); - // Match the main-agent turn (`session/turn.rs::build_system_prompt`) - // by supplying the dispatcher's protocol instructions here. Dynamic - // prompt builders route tools through `render_tools(ctx)`, which - // appends `ctx.dispatcher_instructions` after the tool catalogue — - // passing an empty string drops the `## Tool Use Protocol` block and - // leaves PFormat/Json sub-agents with no call-format guidance. - let dispatcher_instructions = { - use crate::openhuman::agent::dispatcher::{ - NativeToolDispatcher, PFormatToolDispatcher, ToolDispatcher, XmlToolDispatcher, - }; - use crate::openhuman::agent::pformat::PFormatRegistry; - use crate::openhuman::context::prompt::ToolCallFormat; - let empty_tools: Vec> = Vec::new(); - match parent.tool_call_format { - ToolCallFormat::PFormat => { - PFormatToolDispatcher::new(PFormatRegistry::new()).prompt_instructions(&empty_tools) - } - ToolCallFormat::Native => NativeToolDispatcher.prompt_instructions(&empty_tools), - ToolCallFormat::Json => XmlToolDispatcher.prompt_instructions(&empty_tools), - } - }; - let prompt_ctx = PromptContext { - workspace_dir: &parent.workspace_dir, - model_name: &model, - agent_id: &definition.id, - tools: &prompt_tools, - skills: &parent.skills, - dispatcher_instructions: &dispatcher_instructions, - learned: crate::openhuman::context::prompt::LearnedContextData::default(), - visible_tool_names: &visible_tool_names, - tool_call_format: parent.tool_call_format, - connected_integrations: &narrowed_integrations, - connected_identities_md: crate::openhuman::agent::prompts::render_connected_identities(), - include_profile: !definition.omit_profile, - include_memory_md: !definition.omit_memory_md, - curated_snapshot: None, - user_identity: crate::openhuman::app_state::peek_cached_current_user_identity(), - personality_soul_md: None, - personality_memory_md: None, - personality_roster: vec![], - }; - - let system_prompt = match &definition.system_prompt { - PromptSource::Dynamic(build) => { - // Function-driven builder returns the final prompt text. - build(&prompt_ctx).map_err(|e| SubagentRunError::PromptLoad { - path: format!("", definition.id), - source: std::io::Error::other(e.to_string()), - })? - } - PromptSource::Inline(_) | PromptSource::File { .. } => { - // Legacy path for TOML-authored agents: load the raw body, - // then wrap it with the canonical section layout. - let archetype_prompt_body = load_prompt_source(&definition.system_prompt, &prompt_ctx)?; - render_subagent_system_prompt( - &parent.workspace_dir, - &model, - &allowed_indices, - &parent.all_tools, - &dynamic_tools, - &archetype_prompt_body, - render_options, - parent.tool_call_format, - &narrowed_integrations, - ) - } - }; - - let system_prompt = append_subagent_role_contract(system_prompt, &definition.id); - - // ── Build the user message (with optional context prefix) ────────── - // Merge explicit orchestrator context with the parent's auto-loaded - // memory context, but only when the definition opts into memory - // inheritance. - let now = chrono::Local::now(); - let now_str = format!( - "Current Date & Time: {} ({})", - now.format("%Y-%m-%d %H:%M:%S"), - now.format("%Z") - ); - - let mut context_parts: Vec<&str> = Vec::new(); - if !definition.omit_memory_context { - if let Some(ref mem_ctx) = *parent.memory_context { - context_parts.push(mem_ctx); - } - } - - // Always include temporal context for typed sub-agents. System prompts - // for sub-agents are byte-stable for KV cache reuse, so "now" must - // ride in the user message. - context_parts.push(&now_str); - - if let Some(ref ctx) = options.context { - context_parts.push(ctx); - } - let mut history: Vec = if let Some(ref initial) = options.initial_history { - tracing::info!( - agent_id = %definition.id, - task_id = %task_id, - history_len = initial.len(), - "[subagent_runner] resuming with initial_history (checkpoint replay)" - ); - initial.clone() - } else { - let user_message = if context_parts.is_empty() { - task_prompt.to_string() - } else { - format!("[Context]\n{}\n\n{task_prompt}", context_parts.join("\n\n")) - }; - vec![ - ChatMessage::system(system_prompt), - ChatMessage::user(user_message), - ] - }; - - // ── Run the inner tool-call loop ─────────────────────────────────── - // Transcript persistence lives INSIDE the loop (one write per - // provider response), mirroring the main-agent turn loop in - // `session/turn.rs`. No post-loop write needed here. - // Box-pin so `run_inner_loop`'s state machine (which itself wraps - // the engine call below) is heap-allocated independently of - // `run_typed_mode`. Belt-and-braces with the inner engine box at - // the recursion boundary inside `run_inner_loop`. - let (output, iterations, _agg_usage, early_exit_tool) = Box::pin(run_inner_loop( - subagent_provider.as_ref(), - &mut history, - &parent.all_tools, - dynamic_tools, - &filtered_specs, - allowed_names, - lazy_resolver, - &model, - temperature, - definition.effective_max_iterations(), - task_id, - &definition.id, - options.worker_thread_id.clone(), - handoff_cache.as_deref(), - parent, - definition.iteration_policy == IterationPolicy::Extended, - )) - .await?; - - // Determine status: if the turn engine exited early because of - // ask_user_clarification, checkpoint the history and return - // AwaitingUser so the orchestrator can relay the user's answer. - let status = if early_exit_tool.as_deref() == Some("ask_user_clarification") { - let question = output.clone(); - let options_vec: Option> = None; - - // Persist checkpoint so `continue_subagent` can resume later. - let checkpoint_dir = options - .checkpoint_dir - .clone() - .unwrap_or_else(|| parent.workspace_dir.join(".openhuman/subagent_checkpoints")); - if let Err(e) = std::fs::create_dir_all(&checkpoint_dir) { - tracing::warn!( - task_id = %task_id, - error = %e, - "[subagent_runner] failed to create checkpoint directory" - ); - } else { - let checkpoint_data = super::types::SubagentCheckpointData { - task_id: task_id.to_string(), - agent_id: definition.id.clone(), - worker_thread_id: options.worker_thread_id.clone(), - history: history.clone(), - question: question.clone(), - options: options_vec.clone(), - toolkit_override: options.toolkit_override.clone(), - skill_filter_override: options.skill_filter_override.clone(), - model_override: options.model_override.clone(), - created_at: chrono::Utc::now().to_rfc3339(), - }; - let checkpoint_path = checkpoint_dir.join(format!("{task_id}.json")); - match serde_json::to_string_pretty(&checkpoint_data) { - Ok(json) => { - if let Err(e) = std::fs::write(&checkpoint_path, json) { - tracing::warn!( - task_id = %task_id, - path = %checkpoint_path.display(), - error = %e, - "[subagent_runner] failed to write checkpoint" - ); - } else { - tracing::info!( - task_id = %task_id, - path = %checkpoint_path.display(), - history_len = history.len(), - "[subagent_runner] checkpoint written for awaiting_user" - ); - } - } - Err(e) => { - tracing::warn!( - task_id = %task_id, - error = %e, - "[subagent_runner] failed to serialize checkpoint" - ); - } - } - } - - super::types::SubagentRunStatus::AwaitingUser { - question, - options: options_vec, - } - } else { - super::types::SubagentRunStatus::Completed - }; - - Ok(SubagentRunOutcome { - task_id: task_id.to_string(), - agent_id: definition.id.clone(), - output, - iterations, - elapsed: started.elapsed(), - mode: SubagentMode::Typed, - status, - }) -} - -// ───────────────────────────────────────────────────────────────────────────── -// Inner tool-call loop (slim version of agent::loop_::tool_loop) -// ───────────────────────────────────────────────────────────────────────────── - -/// Cumulative usage stats gathered across all provider calls in the loop. -#[derive(Debug, Clone, Default)] -struct AggregatedUsage { - input_tokens: u64, - output_tokens: u64, - cached_input_tokens: u64, - charged_amount_usd: f64, -} - -/// The sub-agent's private tool-execution engine. -/// -/// This function drives the iterative cycle of: -/// 1. Sending messages to the provider. -/// 2. Parsing the provider's response for tool calls. -/// 3. Executing tools (with sandboxing and timeouts). -/// 4. Appending results to history and looping until a final response is found. -/// -/// Unlike the main agent loop, this is isolated and returns only the final text -/// to be synthesized by the parent. -#[allow(clippy::too_many_arguments)] -async fn run_inner_loop( - provider: &dyn Provider, - history: &mut Vec, - parent_tools: &[Box], - extra_tools: Vec>, - tool_specs: &[ToolSpec], - allowed_names: HashSet, - lazy_resolver: Option, - model: &str, - temperature: f64, - max_iterations: usize, - task_id: &str, - agent_id: &str, - worker_thread_id: Option, - handoff_cache: Option<&ResultHandoffCache>, - parent: &ParentExecutionContext, - extended_policy: bool, -) -> Result<(String, usize, AggregatedUsage, Option), SubagentRunError> { - // An autonomous skill run (set via `with_autonomous_iter_cap`) lifts the - // per-agent cap so sub-agents run until done / the circuit breaker trips. - let max_iterations = super::autonomous::autonomous_iter_cap() - .map(|cap| cap.max(max_iterations)) - .unwrap_or(max_iterations) - .max(1); - - // Sub-agent transcript stem — computed once up front so every iteration's - // persist resolves to the same file: `{parent_chain}__{unix_ts}_{agent_id}`. - let child_session_key = { - let now = std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .unwrap_or_default(); - let unix_ts = now.as_secs(); - let nanos = now.subsec_nanos(); - let sanitized: String = agent_id - .chars() - .map(|c| { - if c.is_ascii_alphanumeric() || c == '_' || c == '-' { - c - } else { - '_' - } - }) - .collect(); - let task_suffix: String = task_id - .chars() - .filter(|c| c.is_ascii_alphanumeric() || *c == '_' || *c == '-') - .take(12) - .collect(); - if task_suffix.is_empty() { - format!("{unix_ts}_{nanos:09}_{sanitized}") - } else { - format!("{unix_ts}_{nanos:09}_{sanitized}_{task_suffix}") - } - }; - let transcript_stem = { - let parent_chain = match parent.session_parent_prefix.as_deref() { - Some(prefix) => format!("{}__{}", prefix, parent.session_key), - None => parent.session_key.clone(), - }; - format!("{parent_chain}__{child_session_key}") - }; - - // ── Text-mode override for integrations_agent ── - // Large Composio toolkits compile into provider grammars that blow the - // 65 535-rule ceiling, so for `integrations_agent` we omit `tools: [...]` - // and describe them in the system prompt as prose, parsing `` - // tags out of the model's response. Forcing `request_specs() == &[]` makes - // the engine skip native tools and fall back to its XML parse + batched - // `[Tool results]` path — exactly what text mode needs. - let force_text_mode = agent_id == "integrations_agent" && !tool_specs.is_empty(); - if force_text_mode { - if let Some(sys) = history.iter_mut().find(|m| m.role == "system") { - sys.content.push_str("\n\n"); - sys.content - .push_str(&build_text_mode_tool_instructions(tool_specs)); - } - tracing::info!( - task_id = %task_id, - agent_id = %agent_id, - tool_count = tool_specs.len(), - "[subagent_runner:text-mode] omitting tools from API request, injected XML tool protocol into system prompt" - ); - } - - let advertised_specs: Vec = if force_text_mode { - Vec::new() - } else { - tool_specs.to_vec() - }; - - let mut tool_source = SubagentToolSource { - parent_tools, - extra_tools, - allowed_names, - lazy_resolver, - advertised_specs, - handoff_cache, - policy: crate::openhuman::tools::policy::DefaultToolPolicy, - agent_id: agent_id.to_string(), - }; - let mut observer = SubagentObserver { - worker_thread_id, - workspace_dir: parent.workspace_dir.clone(), - transcript_stem, - agent_id: agent_id.to_string(), - task_id: task_id.to_string(), - force_text_mode, - usage: AggregatedUsage::default(), - }; - let checkpoint = SubagentCheckpoint { - provider, - model: model.to_string(), - temperature, - agent_id: agent_id.to_string(), - }; - let progress = super::super::engine::SubagentProgress { - sink: parent.on_progress.clone(), - agent_id: agent_id.to_string(), - task_id: task_id.to_string(), - extended_policy, - }; - - let parser = super::super::engine::DefaultParser; - // Heap-allocate the child `run_turn_engine` state machine. Sub-agents - // run as nested polls inside the *parent* agent's `run_turn_engine` - // (the orchestrator → tool exec → `dispatch_subagent` → `run_subagent` - // chain), so without the box the parent's tokio worker poll stack - // also has to carry the child engine's ~600-line generator. That - // crosses the 2 MiB tokio worker default and aborts with - // "thread 'tokio-rt-worker' has overflowed its stack" — see the - // `chat-harness-subagent` Playwright lane crash logged here: - // `[subagent_runner] dispatching agent_id=researcher ... → fatal - // runtime error: stack overflow`. Boxing here breaks the stack - // accumulation at the recursion boundary. Smoke-tested in - // `nested_subagent_dispatch_runs_on_a_constrained_worker_stack`; - // the deep end-to-end catcher is the `chat-harness-subagent` - // Playwright spec. - let outcome = Box::pin(super::super::engine::run_turn_engine( - provider, - history, - &mut tool_source, - &progress, - &mut observer, - &checkpoint, - &parser, - "subagent", - model, - temperature, - true, // silent — sub-agents never echo to stdout - &crate::openhuman::config::MultimodalConfig::default(), - &crate::openhuman::config::MultimodalFileConfig::default(), - max_iterations, - None, // sub-agents don't stream a draft - &["ask_user_clarification"], - None, // sub-agents don't support run-queue steering - )) - .await?; - - Ok(( - outcome.text, - outcome.iterations as usize, - observer.usage, - outcome.early_exit_tool, - )) -} - -/// Apply the progressive-disclosure handoff to a tool result. If a cache is -/// present and the (cleaned) result is large and not an error / not from the -/// extractor tool, stash the raw payload and substitute a short placeholder the -/// sub-agent can drill into with `extract_from_result`. Errors and -/// already-extracted output pass through unchanged. -fn apply_handoff( - cache: &ResultHandoffCache, - tool_name: &str, - task_id: &str, - agent_id: &str, - result_text: String, -) -> String { - let skip_cleaning = tool_name == "extract_from_result" || result_text.starts_with("Error"); - let cleaned = if skip_cleaning { - result_text - } else { - let pre_len = result_text.len(); - let cleaned = clean_tool_output(&result_text); - if cleaned.len() < pre_len { - tracing::debug!( - tool = %tool_name, - before_bytes = pre_len, - after_bytes = cleaned.len(), - saved_pct = ((pre_len - cleaned.len()) * 100) / pre_len.max(1), - "[subagent_runner:handoff] cleaned tool output (stripped markup/data-uris/whitespace)" - ); - } - cleaned - }; - let tokens = cleaned.len().div_ceil(4); - // Allow test harnesses (lib tests AND integration test binaries) to lower - // the threshold so the handoff path can be exercised on payloads that - // survive tokenjuice's compaction cap. Never consulted in production - // (the env var is absent) so there is zero runtime cost. - let effective_threshold = std::env::var("OPENHUMAN_TEST_HANDOFF_THRESHOLD_TOKENS") - .ok() - .and_then(|v| v.parse::().ok()) - .unwrap_or(HANDOFF_OVERSIZE_THRESHOLD_TOKENS); - if !skip_cleaning && tokens > effective_threshold { - let id = cache.store(tool_name.to_string(), cleaned.clone()); - let placeholder = build_handoff_placeholder(tool_name, &id, &cleaned); - tracing::info!( - task_id = %task_id, - agent_id = %agent_id, - tool = %tool_name, - raw_tokens = tokens, - raw_bytes = cleaned.len(), - threshold_tokens = effective_threshold, - result_id = %id, - "[subagent_runner:handoff] stashed oversized tool output; substituted placeholder into history" - ); - placeholder - } else { - cleaned - } -} - -/// Sub-agent [`ToolSource`]: looks up tools in `extra_tools` then the parent -/// registry, lazily registers toolkit actions the fuzzy filter omitted, rejects -/// names outside the allowlist, and routes execution through the shared -/// [`run_one_tool`] (so sub-agents now get the same approval gate, audit, -/// credential scrub, tokenjuice and timeout as the channel loop), then applies -/// the progressive-disclosure handoff. -struct SubagentToolSource<'a> { - parent_tools: &'a [Box], - extra_tools: Vec>, - allowed_names: HashSet, - lazy_resolver: Option, - advertised_specs: Vec, - handoff_cache: Option<&'a ResultHandoffCache>, - policy: crate::openhuman::tools::policy::DefaultToolPolicy, - agent_id: String, -} - -#[async_trait::async_trait] -impl super::super::engine::ToolSource for SubagentToolSource<'_> { - fn request_specs(&self) -> &[ToolSpec] { - &self.advertised_specs - } - - async fn execute_call( - &mut self, - call: &super::super::parse::ParsedToolCall, - iteration: usize, - progress: &dyn super::super::engine::ProgressReporter, - progress_call_id: &str, - ) -> super::super::engine::ToolRunResult { - // Lazy registration: a call for an unknown tool that matches a real - // action slug in the bound toolkit gets built on the spot and admitted - // to the allowlist. The fuzzy top-K filter keeps schemas out of the - // prompt, not out of execution. - if !self.allowed_names.contains(&call.name) { - if let Some(resolver) = self.lazy_resolver.as_ref() { - if let Some(tool) = resolver.resolve(&call.name) { - tracing::info!( - agent_id = %self.agent_id, - tool = %call.name, - "[subagent_runner] lazily registered toolkit action outside fuzzy top-K" - ); - self.allowed_names.insert(tool.name().to_string()); - self.extra_tools.push(tool); - } - } - } - - if !self.allowed_names.contains(&call.name) { - tracing::warn!( - agent_id = %self.agent_id, - tool = %call.name, - "[subagent_runner] tool not in allowlist for this sub-agent" - ); - let iteration_u32 = (iteration + 1) as u32; - progress - .tool_started(progress_call_id, &call.name, &call.arguments, iteration_u32) - .await; - let mut available: Vec<&str> = self.allowed_names.iter().map(|s| s.as_str()).collect(); - if let Some(resolver) = self.lazy_resolver.as_ref() { - available.extend(resolver.known_slugs()); - } - available.sort_unstable(); - available.dedup(); - let text = format!( - "Error: tool '{}' is not available to the {} sub-agent. Available tools: {}", - call.name, - self.agent_id, - available.join(", ") - ); - progress - .tool_completed( - progress_call_id, - &call.name, - false, - text.chars().count(), - 0, - iteration_u32, - ) - .await; - return super::super::engine::ToolRunResult { - text, - success: false, - }; - } - - let tool_opt: Option<&dyn Tool> = self - .extra_tools - .iter() - .find(|t| t.name() == call.name) - .or_else(|| self.parent_tools.iter().find(|t| t.name() == call.name)) - .map(|b| b.as_ref()); - let outcome = super::super::engine::run_one_tool( - tool_opt, - call, - iteration, - progress, - &self.policy, - None, - progress_call_id, - ) - .await; - - let text = match self.handoff_cache { - Some(cache) => apply_handoff(cache, &call.name, "", &self.agent_id, outcome.text), - None => outcome.text, - }; - super::super::engine::ToolRunResult { - text, - success: outcome.success, - } - } -} - -/// Sub-agent [`TurnObserver`]: accumulates usage, persists the per-iteration -/// transcript, and mirrors assistant intents / tool results / final responses -/// to the spawn's worker thread (when one is attached). -struct SubagentObserver { - worker_thread_id: Option, - workspace_dir: std::path::PathBuf, - transcript_stem: String, - agent_id: String, - task_id: String, - force_text_mode: bool, - usage: AggregatedUsage, -} - -impl SubagentObserver { - fn append_worker_message( - &self, - content: String, - sender: String, - extra_metadata: serde_json::Value, - ) { - let Some(ref thread_id) = self.worker_thread_id else { - return; - }; - let message = ConversationMessage { - id: format!("{}:{}", sender, uuid::Uuid::new_v4()), - content, - message_type: "text".to_string(), - extra_metadata, - sender, - created_at: chrono::Utc::now().to_rfc3339(), - }; - if let Err(err) = crate::openhuman::memory_conversations::append_message( - self.workspace_dir.clone(), - thread_id, - message, - ) { - tracing::debug!( - agent_id = %self.agent_id, - thread_id = %thread_id, - error = %err, - "[subagent_runner] failed to append message to worker thread" - ); - } - } - - fn persist_transcript(&self, history: &[ChatMessage]) { - let path = match transcript::resolve_keyed_transcript_path( - &self.workspace_dir, - &self.transcript_stem, - ) { - Ok(p) => p, - Err(err) => { - tracing::debug!( - agent_id = %self.agent_id, - error = %err, - "[subagent_runner] failed to resolve transcript path" - ); - return; - } - }; - let now = chrono::Utc::now().to_rfc3339(); - let meta = transcript::TranscriptMeta { - agent_name: self.agent_id.clone(), - dispatcher: "native".into(), - created: now.clone(), - updated: now, - turn_count: 1, - input_tokens: self.usage.input_tokens, - output_tokens: self.usage.output_tokens, - cached_input_tokens: self.usage.cached_input_tokens, - charged_amount_usd: self.usage.charged_amount_usd, - thread_id: crate::openhuman::inference::provider::thread_context::current_thread_id(), - }; - if let Err(err) = transcript::write_transcript(&path, history, &meta, None) { - tracing::debug!( - agent_id = %self.agent_id, - error = %err, - "[subagent_runner] failed to write transcript" - ); - } - } -} - -#[async_trait::async_trait] -impl super::super::engine::TurnObserver for SubagentObserver { - fn record_usage( - &mut self, - _model: &str, - usage: &crate::openhuman::inference::provider::UsageInfo, - ) { - self.usage.input_tokens += usage.input_tokens; - self.usage.output_tokens += usage.output_tokens; - self.usage.cached_input_tokens += usage.cached_input_tokens; - self.usage.charged_amount_usd += usage.charged_amount_usd; - } - - async fn on_assistant( - &mut self, - _display_text: &str, - response_text: &str, - _reasoning_content: Option<&str>, - _native_tool_calls: &[crate::openhuman::inference::provider::ToolCall], - parsed_calls: &[super::super::parse::ParsedToolCall], - iteration: usize, - is_final: bool, - ) { - let tool_calls = parsed_calls.len(); - let extra = if is_final { - serde_json::json!({ - "scope": "worker_thread", - "agent_id": self.agent_id, - "task_id": self.task_id, - "iteration": iteration + 1, - "final": true, - }) - } else { - serde_json::json!({ - "scope": "worker_thread", - "agent_id": self.agent_id, - "task_id": self.task_id, - "iteration": iteration + 1, - "tool_calls": tool_calls, - }) - }; - self.append_worker_message(response_text.to_string(), "agent".to_string(), extra); - } - - fn on_tool_result( - &mut self, - call_id: &str, - tool_name: &str, - result_text: &str, - _success: bool, - iteration: usize, - ) { - // Native mode mirrors each tool result individually; text mode batches - // them in `on_results_batch` instead. - if self.force_text_mode { - return; - } - self.append_worker_message( - result_text.to_string(), - "user".to_string(), - serde_json::json!({ - "scope": "worker_thread", - "agent_id": self.agent_id, - "task_id": self.task_id, - "iteration": iteration + 1, - "tool_call_id": call_id, - "tool_name": tool_name, - }), - ); - } - - fn on_results_batch(&mut self, content: &str, iteration: usize) { - self.append_worker_message( - content.to_string(), - "user".to_string(), - serde_json::json!({ - "scope": "worker_thread", - "agent_id": self.agent_id, - "task_id": self.task_id, - "iteration": iteration + 1, - "mode": "text", - }), - ); - } - - fn after_iteration(&mut self, history: &[ChatMessage], _iteration: usize) { - self.persist_transcript(history); - } -} - -/// Sub-agent [`CheckpointStrategy`]: when the iteration cap is hit, summarize -/// the run-so-far into a resumable checkpoint (so the delegating agent can -/// continue from partial progress) instead of erroring. Falls back to a -/// deterministic digest summary if the summarization call fails or returns no -/// prose. -struct SubagentCheckpoint<'a> { - provider: &'a dyn Provider, - model: String, - temperature: f64, - agent_id: String, -} - -#[async_trait::async_trait] -impl super::super::engine::CheckpointStrategy for SubagentCheckpoint<'_> { - async fn on_max_iter( - &self, - digest: &str, - max_iterations: usize, - ) -> anyhow::Result { - let agent_id = &self.agent_id; - let deterministic = format!( - "I reached my tool-call limit ({max_iterations} steps) before finishing this task. \ - Progress so far (tool calls + results):\n{digest}\n\nThe task is incomplete — the above is \ - what I accomplished; continue from here." - ); - let summary_input = vec![ChatMessage::user(format!( - "You are sub-agent `{agent_id}` and reached your tool-call limit before finishing. Here are \ - the tool calls you made and their results — compile a brief progress checkpoint (what you \ - accomplished, what still remains) for the agent that delegated to you. Do not call tools.\n\n{digest}" - ))]; - match self - .provider - .chat( - ChatRequest { - messages: &summary_input, - tools: None, - stream: None, - }, - &self.model, - self.temperature, - ) - .await - { - Ok(resp) => { - let usage = resp.usage.clone(); - let raw = resp.text.unwrap_or_default(); - let (prose, _) = super::super::parse::parse_tool_calls(&raw); - let text = if prose.trim().is_empty() { - deterministic - } else { - prose - }; - Ok(super::super::engine::CheckpointOutcome { text, usage }) - } - Err(e) => { - tracing::warn!( - agent_id = %self.agent_id, - error = %e, - "[subagent_runner] checkpoint summary call failed — using deterministic fallback" - ); - Ok(super::super::engine::CheckpointOutcome { - text: deterministic, - usage: None, - }) - } - } - } -} - -fn parse_tool_arguments(arguments: &str) -> serde_json::Value { - serde_json::from_str(arguments) - .unwrap_or_else(|_| serde_json::Value::Object(Default::default())) -} - -/// Probe whether the user can call Composio at all under the current -/// config. Returns `true` when the mode-aware factory can build EITHER -/// a backend-mode client (legacy JWT-driven path) OR a direct-mode -/// client (BYO Composio API key). The resolved client is dropped -/// immediately — this is purely a "signed-in vs not" check used by the -/// spawn-time refresh path. Per-action dispatch resolves a fresh client -/// elsewhere via [`create_composio_client`] so the live `composio.mode` -/// toggle keeps winning. -/// -/// Extracted as a free function so the regression suite can exercise -/// the same probe the runner uses without spinning up the full -/// `run_typed_mode` plumbing. -pub(crate) fn user_is_signed_in_to_composio(config: &crate::openhuman::config::Config) -> bool { - crate::openhuman::composio::client::create_composio_client(config).is_ok() -} - -#[cfg(test)] -#[path = "ops_tests.rs"] -mod tests; - -#[cfg(test)] -#[path = "ops_dedup_tests.rs"] -mod dedup_tests; - -#[cfg(test)] -#[path = "ops_truncation_tests.rs"] -mod truncation_tests; diff --git a/src/openhuman/agent/harness/subagent_runner/ops/checkpoint.rs b/src/openhuman/agent/harness/subagent_runner/ops/checkpoint.rs new file mode 100644 index 0000000000..6cab303ee6 --- /dev/null +++ b/src/openhuman/agent/harness/subagent_runner/ops/checkpoint.rs @@ -0,0 +1,82 @@ +//! Sub-agent [`CheckpointStrategy`] implementation. +//! +//! When the iteration cap is hit, summarize the run-so-far into a resumable +//! checkpoint (so the delegating agent can continue from partial progress) +//! instead of erroring. Falls back to a deterministic digest summary if the +//! summarization call fails or returns no prose. + +use crate::openhuman::inference::provider::{ChatMessage, ChatRequest, Provider}; + +/// Sub-agent [`CheckpointStrategy`]: when the iteration cap is hit, summarize +/// the run-so-far into a resumable checkpoint (so the delegating agent can +/// continue from partial progress) instead of erroring. Falls back to a +/// deterministic digest summary if the summarization call fails or returns no +/// prose. +pub(super) struct SubagentCheckpoint<'a> { + pub(super) provider: &'a dyn Provider, + pub(super) model: String, + pub(super) temperature: f64, + pub(super) agent_id: String, +} + +#[async_trait::async_trait] +impl super::super::super::engine::CheckpointStrategy for SubagentCheckpoint<'_> { + async fn on_max_iter( + &self, + digest: &str, + max_iterations: usize, + ) -> anyhow::Result { + let agent_id = &self.agent_id; + let deterministic = format!( + "I reached my tool-call limit ({max_iterations} steps) before finishing this task. \ + Progress so far (tool calls + results):\n{digest}\n\nThe task is incomplete — the above is \ + what I accomplished; continue from here." + ); + let summary_input = vec![ChatMessage::user(format!( + "You are sub-agent `{agent_id}` and reached your tool-call limit before finishing. Here are \ + the tool calls you made and their results — compile a brief progress checkpoint (what you \ + accomplished, what still remains) for the agent that delegated to you. Do not call tools.\n\n{digest}" + ))]; + match self + .provider + .chat( + ChatRequest { + messages: &summary_input, + tools: None, + stream: None, + }, + &self.model, + self.temperature, + ) + .await + { + Ok(resp) => { + let usage = resp.usage.clone(); + let raw = resp.text.unwrap_or_default(); + let (prose, _) = super::super::super::parse::parse_tool_calls(&raw); + let text = if prose.trim().is_empty() { + deterministic + } else { + prose + }; + Ok(super::super::super::engine::CheckpointOutcome { text, usage }) + } + Err(e) => { + tracing::warn!( + agent_id = %self.agent_id, + error = %e, + "[subagent_runner] checkpoint summary call failed — using deterministic fallback" + ); + Ok(super::super::super::engine::CheckpointOutcome { + text: deterministic, + usage: None, + }) + } + } + } +} + +pub(super) fn parse_tool_arguments(arguments: &str) -> serde_json::Value { + serde_json::from_str(arguments) + .unwrap_or_else(|_| serde_json::Value::Object(Default::default())) +} diff --git a/src/openhuman/agent/harness/subagent_runner/ops/handoff_helper.rs b/src/openhuman/agent/harness/subagent_runner/ops/handoff_helper.rs new file mode 100644 index 0000000000..a6fe89624a --- /dev/null +++ b/src/openhuman/agent/harness/subagent_runner/ops/handoff_helper.rs @@ -0,0 +1,67 @@ +//! Progressive-disclosure handoff helper for sub-agent tool results. +//! +//! When an oversized tool result is returned, it is stashed in the +//! [`ResultHandoffCache`] and replaced with a short placeholder the sub-agent +//! can drill into with `extract_from_result`. + +use crate::openhuman::agent::harness::subagent_runner::handoff::{ + build_handoff_placeholder, clean_tool_output, ResultHandoffCache, + HANDOFF_OVERSIZE_THRESHOLD_TOKENS, +}; + +/// Apply the progressive-disclosure handoff to a tool result. If a cache is +/// present and the (cleaned) result is large and not an error / not from the +/// extractor tool, stash the raw payload and substitute a short placeholder the +/// sub-agent can drill into with `extract_from_result`. Errors and +/// already-extracted output pass through unchanged. +pub(super) fn apply_handoff( + cache: &ResultHandoffCache, + tool_name: &str, + task_id: &str, + agent_id: &str, + result_text: String, +) -> String { + let skip_cleaning = tool_name == "extract_from_result" || result_text.starts_with("Error"); + let cleaned = if skip_cleaning { + result_text + } else { + let pre_len = result_text.len(); + let cleaned = clean_tool_output(&result_text); + if cleaned.len() < pre_len { + tracing::debug!( + tool = %tool_name, + before_bytes = pre_len, + after_bytes = cleaned.len(), + saved_pct = ((pre_len - cleaned.len()) * 100) / pre_len.max(1), + "[subagent_runner:handoff] cleaned tool output (stripped markup/data-uris/whitespace)" + ); + } + cleaned + }; + let tokens = cleaned.len().div_ceil(4); + // Allow test harnesses (lib tests AND integration test binaries) to lower + // the threshold so the handoff path can be exercised on payloads that + // survive tokenjuice's compaction cap. Never consulted in production + // (the env var is absent) so there is zero runtime cost. + let effective_threshold = std::env::var("OPENHUMAN_TEST_HANDOFF_THRESHOLD_TOKENS") + .ok() + .and_then(|v| v.parse::().ok()) + .unwrap_or(HANDOFF_OVERSIZE_THRESHOLD_TOKENS); + if !skip_cleaning && tokens > effective_threshold { + let id = cache.store(tool_name.to_string(), cleaned.clone()); + let placeholder = build_handoff_placeholder(tool_name, &id, &cleaned); + tracing::info!( + task_id = %task_id, + agent_id = %agent_id, + tool = %tool_name, + raw_tokens = tokens, + raw_bytes = cleaned.len(), + threshold_tokens = effective_threshold, + result_id = %id, + "[subagent_runner:handoff] stashed oversized tool output; substituted placeholder into history" + ); + placeholder + } else { + cleaned + } +} diff --git a/src/openhuman/agent/harness/subagent_runner/ops/loop_.rs b/src/openhuman/agent/harness/subagent_runner/ops/loop_.rs new file mode 100644 index 0000000000..944182b7df --- /dev/null +++ b/src/openhuman/agent/harness/subagent_runner/ops/loop_.rs @@ -0,0 +1,206 @@ +//! Sub-agent inner tool-call loop. +//! +//! Drives the iterative cycle of provider calls and tool execution until the +//! model returns without further tool calls (or the iteration budget is +//! exhausted). Unlike the main agent loop, this is isolated and returns only +//! the final text to be synthesised by the parent. + +use std::collections::HashSet; + +use crate::openhuman::agent::harness::fork_context::ParentExecutionContext; +use crate::openhuman::agent::harness::subagent_runner::handoff::ResultHandoffCache; +use crate::openhuman::agent::harness::subagent_runner::types::SubagentRunError; +use crate::openhuman::inference::provider::Provider; +use crate::openhuman::tools::{Tool, ToolSpec}; + +use super::super::tool_prep::build_text_mode_tool_instructions; +use super::checkpoint::SubagentCheckpoint; +use super::observer::SubagentObserver; +use super::provider::LazyToolkitResolver; +use super::tool_source::SubagentToolSource; + +/// Cumulative usage stats gathered across all provider calls in the loop. +#[derive(Debug, Clone, Default)] +pub(super) struct AggregatedUsage { + pub(super) input_tokens: u64, + pub(super) output_tokens: u64, + pub(super) cached_input_tokens: u64, + pub(super) charged_amount_usd: f64, +} + +/// The sub-agent's private tool-execution engine. +/// +/// This function drives the iterative cycle of: +/// 1. Sending messages to the provider. +/// 2. Parsing the provider's response for tool calls. +/// 3. Executing tools (with sandboxing and timeouts). +/// 4. Appending results to history and looping until a final response is found. +/// +/// Unlike the main agent loop, this is isolated and returns only the final text +/// to be synthesized by the parent. +#[allow(clippy::too_many_arguments)] +pub(super) async fn run_inner_loop( + provider: &dyn Provider, + history: &mut Vec, + parent_tools: &[Box], + extra_tools: Vec>, + tool_specs: &[ToolSpec], + allowed_names: HashSet, + lazy_resolver: Option, + model: &str, + temperature: f64, + max_iterations: usize, + task_id: &str, + agent_id: &str, + worker_thread_id: Option, + handoff_cache: Option<&ResultHandoffCache>, + parent: &ParentExecutionContext, + extended_policy: bool, +) -> Result<(String, usize, AggregatedUsage, Option), SubagentRunError> { + // An autonomous skill run (set via `with_autonomous_iter_cap`) lifts the + // per-agent cap so sub-agents run until done / the circuit breaker trips. + let max_iterations = super::super::autonomous::autonomous_iter_cap() + .map(|cap| cap.max(max_iterations)) + .unwrap_or(max_iterations) + .max(1); + + // Sub-agent transcript stem — computed once up front so every iteration's + // persist resolves to the same file: `{parent_chain}__{unix_ts}_{agent_id}`. + let child_session_key = { + let now = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap_or_default(); + let unix_ts = now.as_secs(); + let nanos = now.subsec_nanos(); + let sanitized: String = agent_id + .chars() + .map(|c| { + if c.is_ascii_alphanumeric() || c == '_' || c == '-' { + c + } else { + '_' + } + }) + .collect(); + let task_suffix: String = task_id + .chars() + .filter(|c| c.is_ascii_alphanumeric() || *c == '_' || *c == '-') + .take(12) + .collect(); + if task_suffix.is_empty() { + format!("{unix_ts}_{nanos:09}_{sanitized}") + } else { + format!("{unix_ts}_{nanos:09}_{sanitized}_{task_suffix}") + } + }; + let transcript_stem = { + let parent_chain = match parent.session_parent_prefix.as_deref() { + Some(prefix) => format!("{}__{}", prefix, parent.session_key), + None => parent.session_key.clone(), + }; + format!("{parent_chain}__{child_session_key}") + }; + + // ── Text-mode override for integrations_agent ── + // Large Composio toolkits compile into provider grammars that blow the + // 65 535-rule ceiling, so for `integrations_agent` we omit `tools: [...]` + // and describe them in the system prompt as prose, parsing `` + // tags out of the model's response. Forcing `request_specs() == &[]` makes + // the engine skip native tools and fall back to its XML parse + batched + // `[Tool results]` path — exactly what text mode needs. + let force_text_mode = agent_id == "integrations_agent" && !tool_specs.is_empty(); + if force_text_mode { + if let Some(sys) = history.iter_mut().find(|m| m.role == "system") { + sys.content.push_str("\n\n"); + sys.content + .push_str(&build_text_mode_tool_instructions(tool_specs)); + } + tracing::info!( + task_id = %task_id, + agent_id = %agent_id, + tool_count = tool_specs.len(), + "[subagent_runner:text-mode] omitting tools from API request, injected XML tool protocol into system prompt" + ); + } + + let advertised_specs: Vec = if force_text_mode { + Vec::new() + } else { + tool_specs.to_vec() + }; + + let mut tool_source = SubagentToolSource { + parent_tools, + extra_tools, + allowed_names, + lazy_resolver, + advertised_specs, + handoff_cache, + policy: crate::openhuman::tools::policy::DefaultToolPolicy, + agent_id: agent_id.to_string(), + }; + let mut observer = SubagentObserver { + worker_thread_id, + workspace_dir: parent.workspace_dir.clone(), + transcript_stem, + agent_id: agent_id.to_string(), + task_id: task_id.to_string(), + force_text_mode, + usage: AggregatedUsage::default(), + }; + let checkpoint = SubagentCheckpoint { + provider, + model: model.to_string(), + temperature, + agent_id: agent_id.to_string(), + }; + let progress = super::super::super::engine::SubagentProgress { + sink: parent.on_progress.clone(), + agent_id: agent_id.to_string(), + task_id: task_id.to_string(), + extended_policy, + }; + + let parser = super::super::super::engine::DefaultParser; + // Heap-allocate the child `run_turn_engine` state machine. Sub-agents + // run as nested polls inside the *parent* agent's `run_turn_engine` + // (the orchestrator → tool exec → `dispatch_subagent` → `run_subagent` + // chain), so without the box the parent's tokio worker poll stack + // also has to carry the child engine's ~600-line generator. That + // crosses the 2 MiB tokio worker default and aborts with + // "thread 'tokio-rt-worker' has overflowed its stack" — see the + // `chat-harness-subagent` Playwright lane crash logged here: + // `[subagent_runner] dispatching agent_id=researcher ... → fatal + // runtime error: stack overflow`. Boxing here breaks the stack + // accumulation at the recursion boundary. Smoke-tested in + // `nested_subagent_dispatch_runs_on_a_constrained_worker_stack`; + // the deep end-to-end catcher is the `chat-harness-subagent` + // Playwright spec. + let outcome = Box::pin(super::super::super::engine::run_turn_engine( + provider, + history, + &mut tool_source, + &progress, + &mut observer, + &checkpoint, + &parser, + "subagent", + model, + temperature, + true, // silent — sub-agents never echo to stdout + &crate::openhuman::config::MultimodalConfig::default(), + &crate::openhuman::config::MultimodalFileConfig::default(), + max_iterations, + None, // sub-agents don't stream a draft + &["ask_user_clarification"], + None, // sub-agents don't support run-queue steering + )) + .await?; + + Ok(( + outcome.text, + outcome.iterations as usize, + observer.usage, + outcome.early_exit_tool, + )) +} diff --git a/src/openhuman/agent/harness/subagent_runner/ops/mod.rs b/src/openhuman/agent/harness/subagent_runner/ops/mod.rs new file mode 100644 index 0000000000..f58939db58 --- /dev/null +++ b/src/openhuman/agent/harness/subagent_runner/ops/mod.rs @@ -0,0 +1,83 @@ +//! Sub-agent execution entry points and the inner tool-call loop. +//! +//! The public runner lives in [`run_subagent`]. It dispatches to +//! [`runner::run_typed_mode`] (narrow prompt + filtered tools) which builds a +//! brand-new system prompt and a filtered tool list for the requested +//! archetype, then drives provider calls and tool execution until the model +//! returns without further tool calls (or the iteration budget is exhausted). +//! +//! ## Layout +//! +//! | File | Contents | +//! | ------------------- | -------------------------------------------------------------- | +//! | `provider.rs` | `resolve_subagent_provider`, `user_is_signed_in_to_composio`, `LazyToolkitResolver` | +//! | `prompt.rs` | Role-contract suffix, `append_subagent_role_contract`, `dedup_tool_specs_by_name` | +//! | `runner.rs` | `run_subagent`, `run_typed_mode` | +//! | `loop_.rs` | `run_inner_loop`, `AggregatedUsage` | +//! | `tool_source.rs` | `SubagentToolSource` | +//! | `handoff_helper.rs` | `apply_handoff` | +//! | `observer.rs` | `SubagentObserver` | +//! | `checkpoint.rs` | `SubagentCheckpoint`, `parse_tool_arguments` | + +mod checkpoint; +mod handoff_helper; +mod loop_; +mod observer; +mod prompt; +mod provider; +mod runner; +mod tool_source; + +// Public entry point — the primary API surface consumed by the parent module. +pub use runner::run_subagent; + +// `user_is_signed_in_to_composio` is the mode-aware "can the user call +// composio at all?" probe added in Wave 2 (#1710). Re-exported here so +// non-composio probe sites (registration gates, heartbeat telemetry) +// can call it as +// `crate::openhuman::agent::harness::subagent_runner::user_is_signed_in_to_composio` +// without reaching into a private sibling module. +pub(crate) use provider::user_is_signed_in_to_composio; + +// `resolve_subagent_provider` is called from tests via +// `super::resolve_subagent_provider`. Keep it accessible at the ops +// module boundary. +pub(crate) use provider::resolve_subagent_provider; + +// Re-exports for test companion modules that use `use super::*`. +// These provide the same flat namespace the original ops.rs had. +#[cfg(test)] +pub(super) use prompt::{append_subagent_role_contract, dedup_tool_specs_by_name}; +#[cfg(test)] +pub(super) use provider::{normalize_slug, LazyToolkitResolver}; +// filter_tool_indices lives in tool_prep (sibling of ops). +#[cfg(test)] +pub(super) use super::tool_prep::filter_tool_indices; +// Types used by tests that were previously in scope via the flat ops.rs imports. +#[cfg(test)] +pub(super) use super::types::{ + SubagentMode, SubagentRunError, SubagentRunOptions, SubagentRunOutcome, +}; +#[cfg(test)] +pub(super) use crate::openhuman::agent::harness::definition::{AgentDefinition, PromptSource}; +#[cfg(test)] +pub(super) use crate::openhuman::agent::harness::fork_context::ParentExecutionContext; +#[cfg(test)] +pub(super) use crate::openhuman::agent::harness::{ + current_spawn_depth, with_spawn_depth, MAX_SPAWN_DEPTH, +}; +#[cfg(test)] +pub(super) use crate::openhuman::tools::{Tool, ToolSpec}; + +// Test companion modules — path references relative to their original location. +#[cfg(test)] +#[path = "../ops_tests.rs"] +mod tests; + +#[cfg(test)] +#[path = "../ops_dedup_tests.rs"] +mod dedup_tests; + +#[cfg(test)] +#[path = "../ops_truncation_tests.rs"] +mod truncation_tests; diff --git a/src/openhuman/agent/harness/subagent_runner/ops/observer.rs b/src/openhuman/agent/harness/subagent_runner/ops/observer.rs new file mode 100644 index 0000000000..432adf0dfa --- /dev/null +++ b/src/openhuman/agent/harness/subagent_runner/ops/observer.rs @@ -0,0 +1,181 @@ +//! Sub-agent [`TurnObserver`] implementation. +//! +//! Accumulates usage stats, persists per-iteration transcripts, and +//! mirrors assistant intents / tool results / final responses to the +//! spawn's worker thread (when one is attached). + +use crate::openhuman::inference::provider::ChatMessage; +use crate::openhuman::memory_conversations::ConversationMessage; + +use super::super::super::session::transcript; +use super::loop_::AggregatedUsage; + +pub(super) struct SubagentObserver { + pub(super) worker_thread_id: Option, + pub(super) workspace_dir: std::path::PathBuf, + pub(super) transcript_stem: String, + pub(super) agent_id: String, + pub(super) task_id: String, + pub(super) force_text_mode: bool, + pub(super) usage: AggregatedUsage, +} + +impl SubagentObserver { + pub(super) fn append_worker_message( + &self, + content: String, + sender: String, + extra_metadata: serde_json::Value, + ) { + let Some(ref thread_id) = self.worker_thread_id else { + return; + }; + let message = ConversationMessage { + id: format!("{}:{}", sender, uuid::Uuid::new_v4()), + content, + message_type: "text".to_string(), + extra_metadata, + sender, + created_at: chrono::Utc::now().to_rfc3339(), + }; + if let Err(err) = crate::openhuman::memory_conversations::append_message( + self.workspace_dir.clone(), + thread_id, + message, + ) { + tracing::debug!( + agent_id = %self.agent_id, + thread_id = %thread_id, + error = %err, + "[subagent_runner] failed to append message to worker thread" + ); + } + } + + pub(super) fn persist_transcript(&self, history: &[ChatMessage]) { + let path = match transcript::resolve_keyed_transcript_path( + &self.workspace_dir, + &self.transcript_stem, + ) { + Ok(p) => p, + Err(err) => { + tracing::debug!( + agent_id = %self.agent_id, + error = %err, + "[subagent_runner] failed to resolve transcript path" + ); + return; + } + }; + let now = chrono::Utc::now().to_rfc3339(); + let meta = transcript::TranscriptMeta { + agent_name: self.agent_id.clone(), + dispatcher: "native".into(), + created: now.clone(), + updated: now, + turn_count: 1, + input_tokens: self.usage.input_tokens, + output_tokens: self.usage.output_tokens, + cached_input_tokens: self.usage.cached_input_tokens, + charged_amount_usd: self.usage.charged_amount_usd, + thread_id: crate::openhuman::inference::provider::thread_context::current_thread_id(), + }; + if let Err(err) = transcript::write_transcript(&path, history, &meta, None) { + tracing::debug!( + agent_id = %self.agent_id, + error = %err, + "[subagent_runner] failed to write transcript" + ); + } + } +} + +#[async_trait::async_trait] +impl super::super::super::engine::TurnObserver for SubagentObserver { + fn record_usage( + &mut self, + _model: &str, + usage: &crate::openhuman::inference::provider::UsageInfo, + ) { + self.usage.input_tokens += usage.input_tokens; + self.usage.output_tokens += usage.output_tokens; + self.usage.cached_input_tokens += usage.cached_input_tokens; + self.usage.charged_amount_usd += usage.charged_amount_usd; + } + + async fn on_assistant( + &mut self, + _display_text: &str, + response_text: &str, + _reasoning_content: Option<&str>, + _native_tool_calls: &[crate::openhuman::inference::provider::ToolCall], + parsed_calls: &[super::super::super::parse::ParsedToolCall], + iteration: usize, + is_final: bool, + ) { + let tool_calls = parsed_calls.len(); + let extra = if is_final { + serde_json::json!({ + "scope": "worker_thread", + "agent_id": self.agent_id, + "task_id": self.task_id, + "iteration": iteration + 1, + "final": true, + }) + } else { + serde_json::json!({ + "scope": "worker_thread", + "agent_id": self.agent_id, + "task_id": self.task_id, + "iteration": iteration + 1, + "tool_calls": tool_calls, + }) + }; + self.append_worker_message(response_text.to_string(), "agent".to_string(), extra); + } + + fn on_tool_result( + &mut self, + call_id: &str, + tool_name: &str, + result_text: &str, + _success: bool, + iteration: usize, + ) { + // Native mode mirrors each tool result individually; text mode batches + // them in `on_results_batch` instead. + if self.force_text_mode { + return; + } + self.append_worker_message( + result_text.to_string(), + "user".to_string(), + serde_json::json!({ + "scope": "worker_thread", + "agent_id": self.agent_id, + "task_id": self.task_id, + "iteration": iteration + 1, + "tool_call_id": call_id, + "tool_name": tool_name, + }), + ); + } + + fn on_results_batch(&mut self, content: &str, iteration: usize) { + self.append_worker_message( + content.to_string(), + "user".to_string(), + serde_json::json!({ + "scope": "worker_thread", + "agent_id": self.agent_id, + "task_id": self.task_id, + "iteration": iteration + 1, + "mode": "text", + }), + ); + } + + fn after_iteration(&mut self, history: &[ChatMessage], _iteration: usize) { + self.persist_transcript(history); + } +} diff --git a/src/openhuman/agent/harness/subagent_runner/ops/prompt.rs b/src/openhuman/agent/harness/subagent_runner/ops/prompt.rs new file mode 100644 index 0000000000..1c9117b151 --- /dev/null +++ b/src/openhuman/agent/harness/subagent_runner/ops/prompt.rs @@ -0,0 +1,106 @@ +//! System-prompt helpers for sub-agent typed mode. +//! +//! Includes the role-contract suffix, its injector, and the tool-spec +//! deduplication helper used before sending specs to the provider. + +use crate::openhuman::tools::ToolSpec; +use std::collections::HashSet; + +// ───────────────────────────────────────────────────────────────────────────── +// Sub-agent role contract +// ───────────────────────────────────────────────────────────────────────────── + +/// Prompt suffix injected into every typed sub-agent run. +/// +/// Purpose: +/// - make the child explicitly aware it is acting as a sub-agent +/// - keep delegated outputs concise so parent-context growth stays bounded +/// - discourage verbose restatement of the delegated task/context +pub(super) const SUBAGENT_ROLE_CONTRACT_SUFFIX: &str = + "## Sub-agent Role Contract\n\n\ +You are a sub-agent working for a parent OpenHuman agent, not a direct end-user assistant.\n\ +- Stay tightly scoped to the delegated task.\n\ +- Keep tool arguments and follow-up prompts compact, include only required fields/context.\n\ +- Keep your final response concise and synthesis-ready for the parent, prefer short bullets or short paragraphs.\n\ +- Do not restate the full task/context unless strictly required for correctness.\n\ +\n\ +## Sub-agent Result Contract\n\n\ +Return a compact result with these headings:\n\ +- Answer\n\ +- Evidence used\n\ +- Actions taken\n\ +- Open uncertainties\n\ +- Failed tool calls\n\ +- Recommended next step\n\ +\n\ +Do not include facts in Answer that are not supported by Evidence used or Actions taken.\n\ +If a tool result was truncated, partial, or too large to inspect fully, say so under Open uncertainties and do not treat it as complete.\n"; + +pub(crate) fn append_subagent_role_contract(base_prompt: String, agent_id: &str) -> String { + if base_prompt.contains(SUBAGENT_ROLE_CONTRACT_SUFFIX.trim()) { + tracing::debug!( + agent_id = %agent_id, + base_chars = base_prompt.chars().count(), + "[subagent_runner] sub-agent role contract already present in system prompt" + ); + return base_prompt; + } + + let mut prompt = base_prompt; + if !prompt.ends_with('\n') { + prompt.push('\n'); + } + prompt.push('\n'); + prompt.push_str(SUBAGENT_ROLE_CONTRACT_SUFFIX); + + tracing::debug!( + agent_id = %agent_id, + suffix_chars = SUBAGENT_ROLE_CONTRACT_SUFFIX.chars().count(), + final_chars = prompt.chars().count(), + "[subagent_runner] appended sub-agent role contract to system prompt" + ); + + prompt +} + +// ───────────────────────────────────────────────────────────────────────────── +// Tool-spec deduplication +// ───────────────────────────────────────────────────────────────────────────── + +/// Deduplicate assembled tool specs by name, keeping the first occurrence. +/// +/// The sub-agent's `filtered_specs` is a `Vec` assembled from +/// `parent.all_tool_specs` indices plus dynamic tools, so a delegation tool can +/// shadow a same-named skill/integration tool (common for the wide-set +/// `tools_agent`), leaving two specs with the same name. Strict providers reject +/// such a request with `400 "Tool names must be unique."` The main-agent path +/// dedups via [`session::builder::dedup_visible_tool_specs`]; this separate +/// sub-agent assembly must do the same. +/// +/// First occurrence wins so registration-order semantics are preserved (tool +/// dispatch still resolves by name). Dropped duplicates are logged at `debug` +/// (diagnostic instrumentation, per the repo Rust logging guideline). +/// +/// Extracted as a free function so the regression suite can exercise the dedup +/// without standing up the full `run_typed_mode` plumbing. +pub(crate) fn dedup_tool_specs_by_name(agent_id: &str, specs: Vec) -> Vec { + let mut seen: HashSet = HashSet::with_capacity(specs.len()); + let mut deduped: Vec = Vec::with_capacity(specs.len()); + let mut dropped: Vec = Vec::new(); + for spec in specs { + if seen.insert(spec.name.clone()) { + deduped.push(spec); + } else { + dropped.push(spec.name); + } + } + if !dropped.is_empty() { + tracing::debug!( + agent_id = %agent_id, + "[subagent_runner] dropped {} duplicate tool spec(s) before sending to provider: {:?}", + dropped.len(), + dropped + ); + } + deduped +} diff --git a/src/openhuman/agent/harness/subagent_runner/ops/provider.rs b/src/openhuman/agent/harness/subagent_runner/ops/provider.rs new file mode 100644 index 0000000000..2d70d8abeb --- /dev/null +++ b/src/openhuman/agent/harness/subagent_runner/ops/provider.rs @@ -0,0 +1,246 @@ +//! Sub-agent provider and model resolution. +//! +//! Resolves `(provider, model)` from a declarative [`ModelSpec`], plus +//! Composio sign-in probe and the lazy toolkit action resolver. + +use std::sync::Arc; + +use crate::openhuman::inference::provider::Provider; + +// ───────────────────────────────────────────────────────────────────────────── +// Provider / model resolution +// ───────────────────────────────────────────────────────────────────────────── + +/// Resolve a sub-agent's `(provider, model)` based on its declarative +/// `[model]` spec. +/// +/// - inline `model` override — highest precedence for one call. +/// - config-level pin — `[orchestrator] model` or `[teams.*]` +/// `lead_model` / `agent_model`, when present. +/// - `Inherit` — use the parent's provider AND model. Literally +/// "do what the parent does". +/// - `Hint(workload)` — build a fresh provider via the per-workload +/// factory (e.g. `integrations_agent`'s `[model] hint = "agentic"` +/// resolves to whatever `agentic_provider` is routed to in +/// AI Settings). The factory returns the *exact* model id for that +/// workload — the OpenHuman backend and every third-party provider +/// accept exact model names, so there's no `{hint}-v1` synthesis +/// anywhere on this path. +/// - `Exact(name)` — escape hatch: use the parent's provider with +/// this model name overriding the parent's. Callers are expected +/// to know the model is valid for the parent's provider; the enum +/// is the wrong place to encode provider switching, which belongs +/// to `Hint` + AI-settings routing. +/// +/// `config` is `None` when the live `Config::load_or_init()` failed +/// (rare — transient I/O). Both `None` config and factory build errors +/// fall back to `(parent_provider, parent_model)` so a config glitch +/// can't sink sub-agent execution entirely. +/// +/// The async part (config load) is hoisted out of the caller so this +/// helper stays sync and can be exercised by a focused unit test +/// without spinning up a `tokio::test` runtime per case. +pub(crate) fn resolve_subagent_provider( + spec: &crate::openhuman::agent::harness::definition::ModelSpec, + agent_id: &str, + config: Option<&crate::openhuman::config::Config>, + parent_provider: Arc, + parent_model: String, + is_team_lead: bool, + model_override: Option<&str>, +) -> (Arc, String) { + use crate::openhuman::agent::harness::definition::ModelSpec; + if let Some(model) = model_override + .map(str::trim) + .filter(|model| !model.is_empty()) + { + log::debug!( + "[subagent_runner] agent_id={} using inline model override model={}", + agent_id, + model + ); + return (parent_provider, model.to_string()); + } + + if let Some(model) = config.and_then(|cfg| cfg.configured_agent_model(agent_id, is_team_lead)) { + log::debug!( + "[subagent_runner] agent_id={} using config-level model pin model={}", + agent_id, + model + ); + return (parent_provider, model.to_string()); + } + + match spec { + ModelSpec::Hint(workload) => match config { + Some(cfg) => { + match crate::openhuman::inference::provider::create_chat_provider(workload, cfg) { + Ok((p, m)) => { + log::info!( + "[subagent_runner] role={} agent_id={} resolved via workload factory model={}", + workload, agent_id, m + ); + (std::sync::Arc::from(p), m) + } + Err(e) => { + let suggested_key = match workload.as_str() { + "summarization" | "memory" => "memory_provider".to_string(), + _ => format!("{workload}_provider"), + }; + log::warn!( + "[subagent_runner] workload='{}' provider build failed for agent_id={} error='{}' \ + falling back to parent provider (parent_model='{}'). \ + Consider setting {} in config.", + workload, + agent_id, + e, + parent_model, + suggested_key + ); + (parent_provider, parent_model) + } + } + } + None => { + log::warn!( + "[subagent_runner] config load failed for workload '{}' (agent_id={}) — \ + falling back to parent provider + parent model '{}'", + workload, + agent_id, + parent_model + ); + (parent_provider, parent_model) + } + }, + ModelSpec::Inherit => (parent_provider, parent_model), + ModelSpec::Exact(name) => (parent_provider, name.clone()), + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// Composio sign-in probe +// ───────────────────────────────────────────────────────────────────────────── + +/// Probe whether the user can call Composio at all under the current +/// config. Returns `true` when the mode-aware factory can build EITHER +/// a backend-mode client (legacy JWT-driven path) OR a direct-mode +/// client (BYO Composio API key). The resolved client is dropped +/// immediately — this is purely a "signed-in vs not" check used by the +/// spawn-time refresh path. Per-action dispatch resolves a fresh client +/// elsewhere via [`create_composio_client`] so the live `composio.mode` +/// toggle keeps winning. +/// +/// Extracted as a free function so the regression suite can exercise +/// the same probe the runner uses without spinning up the full +/// `run_typed_mode` plumbing. +pub(crate) fn user_is_signed_in_to_composio(config: &crate::openhuman::config::Config) -> bool { + crate::openhuman::composio::client::create_composio_client(config).is_ok() +} + +// ───────────────────────────────────────────────────────────────────────────── +// Lazy toolkit resolver +// ───────────────────────────────────────────────────────────────────────────── + +/// Lazy resolver that lets `integrations_agent` recover when the model +/// calls a Composio action slug that exists in the bound toolkit's full +/// catalogue but was filtered out of the up-front fuzzy top-K. On a +/// match we build the [`ComposioActionTool`] on demand so the call +/// dispatches normally instead of dead-ending in +/// `Error: tool '...' is not available`. +/// +/// Holds an [`Arc`] rather than a pre-baked +/// [`crate::openhuman::composio::ComposioClient`] so the live +/// `composio.mode` toggle is honoured per execute — see +/// [`crate::openhuman::composio::ComposioActionTool`] and issue #1710. +pub(crate) struct LazyToolkitResolver { + pub(crate) config: std::sync::Arc, + pub(crate) actions: Vec, +} + +impl LazyToolkitResolver { + pub(super) fn resolve(&self, name: &str) -> Option> { + let action = self.find_action(name)?; + Some(Box::new( + crate::openhuman::composio::ComposioActionTool::new( + self.config.clone(), + action.name.clone(), + action.description.clone(), + action.parameters.clone(), + ), + )) + } + + /// Match a model-supplied tool name to a real toolkit action, tolerant + /// of the near-miss slugs models routinely emit — case differences and + /// separator/prefix drift (bug-report-2026-05-26 A2). Tries, in order: + /// exact, case-insensitive, then a normalized alphanumeric match + /// (accepted only when **unique**, so a fabricated slug can't silently + /// resolve to the wrong action — those still fall through to the + /// "tool not available" error, which lists `known_slugs` for the model + /// to self-correct). + fn find_action( + &self, + name: &str, + ) -> Option<&crate::openhuman::context::prompt::ConnectedIntegrationTool> { + if let Some(action) = self.actions.iter().find(|a| a.name == name) { + return Some(action); + } + if let Some(action) = self + .actions + .iter() + .find(|a| a.name.eq_ignore_ascii_case(name)) + { + tracing::debug!( + requested = %name, + matched = %action.name, + "[subagent_runner] resolved tool by case-insensitive match" + ); + return Some(action); + } + let norm = normalize_slug(name); + if !norm.is_empty() { + let mut matches = self + .actions + .iter() + .filter(|a| normalize_slug(&a.name) == norm); + if let Some(action) = matches.next() { + if matches.next().is_none() { + tracing::info!( + requested = %name, + matched = %action.name, + "[subagent_runner] resolved tool by normalized-slug match" + ); + return Some(action); + } + // Ambiguous: 2+ actions normalize to the same slug (e.g. + // `read_file` and `ReadFile` → `readfile`). We deliberately + // refuse to guess. Warn (not debug): a slug collision is a + // toolkit configuration anomaly that should surface in normal + // operator logs, not stay hidden behind debug filtering. + tracing::warn!( + requested = %name, + norm = %norm, + "[subagent_runner] ambiguous normalized-slug match — multiple actions resolve to the same slug; not resolving" + ); + } + } + None + } + + /// Slugs from the bound toolkit, for inclusion in unknown-tool + /// errors so the model can self-correct without burning a turn. + pub(super) fn known_slugs(&self) -> Vec<&str> { + self.actions.iter().map(|a| a.name.as_str()).collect() + } +} + +/// Lowercased, non-alphanumerics stripped — collapses separator/prefix +/// drift (`GOOGLESLIDES_BATCH_UPDATE` vs `googleslides_batch_update`) so +/// near-miss tool slugs still resolve, while genuinely different slugs +/// (e.g. a hallucinated `GMAIL_GET_LAST_3_MESSAGES`) stay distinct. +pub(crate) fn normalize_slug(s: &str) -> String { + s.chars() + .filter(|c| c.is_ascii_alphanumeric()) + .map(|c| c.to_ascii_lowercase()) + .collect() +} diff --git a/src/openhuman/agent/harness/subagent_runner/ops/runner.rs b/src/openhuman/agent/harness/subagent_runner/ops/runner.rs new file mode 100644 index 0000000000..f54413e954 --- /dev/null +++ b/src/openhuman/agent/harness/subagent_runner/ops/runner.rs @@ -0,0 +1,744 @@ +//! Top-level sub-agent run entry points. +//! +//! [`run_subagent`] is the primary entry point for agent delegation and +//! dispatches to [`run_typed_mode`] which builds a brand-new system prompt +//! and a filtered tool list for the requested archetype, then drives provider +//! calls and tool execution until the model returns without further tool calls +//! (or the iteration budget is exhausted). + +use std::collections::HashSet; +use std::sync::Arc; +use std::time::Instant; + +use crate::openhuman::agent::harness::definition::{ + AgentDefinition, IterationPolicy, PromptSource, +}; +use crate::openhuman::agent::harness::fork_context::{current_parent, ParentExecutionContext}; +use crate::openhuman::agent::harness::subagent_runner::extract_tool::ExtractFromResultTool; +use crate::openhuman::agent::harness::subagent_runner::handoff::ResultHandoffCache; +use crate::openhuman::agent::harness::subagent_runner::tool_prep::{ + filter_tool_indices, is_subagent_spawn_tool, load_prompt_source, top_k_for_toolkit, +}; +use crate::openhuman::agent::harness::subagent_runner::types::{ + SubagentMode, SubagentRunError, SubagentRunOptions, SubagentRunOutcome, +}; +use crate::openhuman::agent::harness::{ + current_spawn_depth, with_current_sandbox_mode, with_spawn_depth, MAX_SPAWN_DEPTH, +}; +use crate::openhuman::context::prompt::{ + render_subagent_system_prompt, PromptContext, PromptTool, SubagentRenderOptions, +}; +use crate::openhuman::file_state::with_file_state_agent_id; +use crate::openhuman::tools::{Tool, ToolCategory, ToolSpec}; + +use super::loop_::run_inner_loop; +use super::prompt::{append_subagent_role_contract, dedup_tool_specs_by_name}; +use super::provider::{ + resolve_subagent_provider, user_is_signed_in_to_composio, LazyToolkitResolver, +}; + +/// Run a sub-agent based on its definition and a task prompt. +/// +/// This is the primary entry point for agent delegation. It performs the following: +/// 1. Resolves the [`ParentExecutionContext`] task-local. +/// 2. Generates a unique `task_id` if one wasn't provided. +/// 3. Dispatches to `run_typed_mode`. +/// +/// On success returns a [`SubagentRunOutcome`] whose `output` is the +/// final assistant text. On failure the error is suitable for stringifying +/// into a `tool_result` block. +pub async fn run_subagent( + definition: &AgentDefinition, + task_prompt: &str, + options: SubagentRunOptions, +) -> Result { + // Unconditionally heap-allocate the entire run_subagent body so + // every caller doesn't have to carry this future's state inline. + // Tools that delegate run inside the parent agent's already-deep + // `run_turn_engine` poll, so the parent's stack would otherwise pile + // (parent engine state + dispatch_subagent state + run_subagent's + // wrapper state + run_typed_mode state + child engine state) onto + // tokio's 2 MiB worker stack and abort with "thread + // 'tokio-rt-worker' has overflowed its stack, fatal runtime error: + // stack overflow" — observed at `[subagent_runner] dispatching + // agent_id=researcher ...` in the `chat-harness-subagent` Playwright + // lane crash. The inner `Box::pin`s around `run_typed_mode` / + // `run_inner_loop` / child `run_turn_engine` further chunk the + // child's state so a single sub-agent run can't blow the stack either. + Box::pin(async move { + let parent = current_parent().ok_or(SubagentRunError::NoParentContext)?; + let task_id = options + .task_id + .clone() + .unwrap_or_else(|| format!("sub-{}", uuid::Uuid::new_v4())); + let started = Instant::now(); + let current_depth = current_spawn_depth(); + let attempted_depth = current_depth.saturating_add(1); + + if attempted_depth > MAX_SPAWN_DEPTH { + tracing::warn!( + agent_id = %definition.id, + task_id = %task_id, + current_depth, + attempted_depth, + max_depth = MAX_SPAWN_DEPTH, + "[subagent_runner] spawn depth exceeded" + ); + return Err(SubagentRunError::SpawnDepthExceeded { + attempted_depth, + max_depth: MAX_SPAWN_DEPTH, + }); + } + + tracing::info!( + agent_id = %definition.id, + task_id = %task_id, + spawn_depth = attempted_depth, + max_spawn_depth = MAX_SPAWN_DEPTH, + prompt_chars = task_prompt.chars().count(), + skill_filter = ?options.skill_filter_override.as_deref().or(definition.skill_filter.as_deref()), + "[subagent_runner] dispatching" + ); + + // Install the sub-agent's declared `sandbox_mode` as the active + // task-local for every tool invocation inside this run. + let mut outcome = with_spawn_depth(attempted_depth, async { + with_file_state_agent_id(task_id.clone(), async { + with_current_sandbox_mode(definition.sandbox_mode, async { + Box::pin(run_typed_mode( + definition, + task_prompt, + &options, + &parent, + &task_id, + )) + .await + }) + .await + }) + .await + }) + .await?; + + // Truncate result to the definition's cap if set. + // Use char-count (not byte-length) to avoid panicking on + // multi-byte UTF-8 sequences at the truncation boundary. + if let Some(cap) = definition.max_result_chars { + let original_chars = outcome.output.chars().count(); + if original_chars > cap { + tracing::debug!( + agent_id = %definition.id, + original_chars, + cap, + "[subagent_runner] truncating oversized result to max_result_chars cap" + ); + let byte_offset = outcome + .output + .char_indices() + .nth(cap) + .map(|(i, _)| i) + .unwrap_or(outcome.output.len()); + outcome.output.truncate(byte_offset); + outcome.output.push_str("\n[...truncated]"); + } + } + + tracing::info!( + agent_id = %definition.id, + task_id = %task_id, + spawn_depth = attempted_depth, + elapsed_ms = outcome.elapsed.as_millis() as u64, + iterations = outcome.iterations, + output_chars = outcome.output.chars().count(), + "[subagent_runner] completed" + ); + + let _ = started; // silence unused-warning if logging is compiled out + Ok(outcome) + }) + .await +} + +// ───────────────────────────────────────────────────────────────────────────── +// Typed mode — narrow prompt, filtered tools, cheaper model +// ───────────────────────────────────────────────────────────────────────────── + +/// Execute a sub-agent in "Typed" mode. +/// +/// This mode builds a brand-new, minimized system prompt specifically for the +/// agent's archetype. It filters the parent's tools down to only those allowed +/// by the definition and per-spawn overrides. +async fn run_typed_mode( + definition: &AgentDefinition, + task_prompt: &str, + options: &SubagentRunOptions, + parent: &ParentExecutionContext, + task_id: &str, +) -> Result { + let started = Instant::now(); + + // Resolve provider + model. See `resolve_subagent_provider` for the + // semantics of each ModelSpec variant. `Config::load_or_init()` is + // async so the load is hoisted out of the helper — the helper itself + // is sync and unit-tested. + let config_loaded = crate::openhuman::config::Config::load_or_init().await; + let (subagent_provider, model) = resolve_subagent_provider( + &definition.model, + &definition.id, + config_loaded.as_ref().ok(), + parent.provider.clone(), + parent.model_name.clone(), + !definition.subagents.is_empty(), + options.model_override.as_deref(), + ); + let temperature = definition.temperature; + + // ── Refresh connected-integrations at spawn time ─────────────────── + // + // The parent session's `connected_integrations` Vec is frozen at + // session-start. Re-fetch from the global integrations cache here. + // The cache is invalidated by `ComposioConnectionCreatedSubscriber` + // once the OAuth handshake reaches ACTIVE/CONNECTED, so this call + // returns the fresh list almost for free on the warm path. Fall back + // to the parent's frozen list when the live fetch returns empty. + let live_integrations: Vec = { + let probe_config = crate::openhuman::config::Config::load_or_init().await.ok(); + let signed_in = probe_config + .as_ref() + .map(user_is_signed_in_to_composio) + .unwrap_or(false); + if !signed_in { + parent.connected_integrations.clone() + } else { + match crate::openhuman::config::Config::load_or_init().await { + Ok(config) => { + use crate::openhuman::composio::FetchConnectedIntegrationsStatus; + match crate::openhuman::composio::fetch_connected_integrations_status(&config) + .await + { + FetchConnectedIntegrationsStatus::Authoritative(fresh) => { + tracing::debug!( + count = fresh.len(), + parent_count = parent.connected_integrations.len(), + "[subagent_runner] refreshed connected_integrations at spawn time" + ); + fresh + } + FetchConnectedIntegrationsStatus::Unavailable => { + tracing::debug!( + "[subagent_runner] integrations backend unavailable; falling back to parent's frozen list" + ); + parent.connected_integrations.clone() + } + } + } + Err(e) => { + tracing::debug!( + error = %e, + "[subagent_runner] config load failed; falling back to parent's frozen integrations list" + ); + parent.connected_integrations.clone() + } + } + } + }; + + // ── Filter tools per definition + per-spawn override ─────────────── + let toolkit_filter = options.toolkit_override.as_deref(); + let mut allowed_indices = filter_tool_indices( + &parent.all_tools, + &definition.tools, + &definition.disallowed_tools, + options + .skill_filter_override + .as_deref() + .or(definition.skill_filter.as_deref()), + ); + + // Sub-agents must never spawn their own sub-agents. Strip `spawn_subagent` + // and every synthesised `delegate_*` tool regardless of the archetype's + // declared scope. + let before = allowed_indices.len(); + allowed_indices.retain(|&i| { + let name = parent.all_tools[i].name(); + !is_subagent_spawn_tool(name) && name != "spawn_worker_thread" + }); + let stripped = before - allowed_indices.len(); + if stripped > 0 { + tracing::debug!( + agent_id = %definition.id, + stripped, + "[subagent_runner] removed sub-agent spawn tools from sub-agent's tool surface" + ); + } + + // ── Force-include extra_tools ────────────────────────────────────── + if !definition.extra_tools.is_empty() { + let disallow_set: std::collections::HashSet<&str> = definition + .disallowed_tools + .iter() + .map(|s| s.as_str()) + .collect(); + for (i, tool) in parent.all_tools.iter().enumerate() { + let name = tool.name(); + if definition.extra_tools.iter().any(|n| n == name) + && !allowed_indices.contains(&i) + && !disallow_set.contains(name) + && !is_subagent_spawn_tool(name) + { + allowed_indices.push(i); + } + } + } + + // ── Dynamic per-action toolkit tools (integrations_agent + toolkit) ────── + let mut dynamic_tools: Vec> = Vec::new(); + let mut lazy_resolver: Option = None; + let is_integrations_agent_with_toolkit = + definition.id == "integrations_agent" && toolkit_filter.is_some(); + + // `tools_agent` must never see Workflow-category tools. + if definition.id == "tools_agent" { + allowed_indices.retain(|&i| parent.all_tools[i].category() != ToolCategory::Workflow); + } + + if is_integrations_agent_with_toolkit { + if let Some(tk) = toolkit_filter { + let arc_config = match crate::openhuman::config::Config::load_or_init().await { + Ok(c) => std::sync::Arc::new(c), + Err(e) => { + tracing::warn!( + agent_id = %definition.id, + toolkit = %tk, + error = %e, + "[subagent_runner:typed] config load failed; dynamic composio tools won't be registered" + ); + return Err(SubagentRunError::Provider(anyhow::anyhow!( + "subagent_runner: config load failed building integrations_agent for toolkit `{tk}`: {e}" + ))); + } + }; + + use crate::openhuman::composio::client::{create_composio_client, ComposioClientKind}; + let client_kind = match create_composio_client(arc_config.as_ref()) { + Ok(k) => Some(k), + Err(e) => { + tracing::warn!( + agent_id = %definition.id, + toolkit = %tk, + error = %e, + "[subagent_runner:typed] composio factory failed; dynamic per-action tools fall back to cached catalogue" + ); + None + } + }; + + if let Some(cached_integration) = live_integrations + .iter() + .find(|ci| ci.connected && ci.toolkit.eq_ignore_ascii_case(tk)) + { + let fresh_actions = match &client_kind { + Some(ComposioClientKind::Backend(client)) => { + match crate::openhuman::composio::fetch_toolkit_actions(client, tk, None) + .await + { + Ok(actions) if !actions.is_empty() => actions, + Ok(_) => { + tracing::debug!( + agent_id = %definition.id, + toolkit = %tk, + "[subagent_runner:typed] fresh list_tools returned empty; falling back to cached catalogue" + ); + cached_integration.tools.clone() + } + Err(e) => { + tracing::warn!( + agent_id = %definition.id, + toolkit = %tk, + error = %e, + "[subagent_runner:typed] fresh list_tools failed; falling back to cached catalogue" + ); + cached_integration.tools.clone() + } + } + } + Some(ComposioClientKind::Direct(_)) => { + tracing::info!( + agent_id = %definition.id, + toolkit = %tk, + cached_actions = cached_integration.tools.len(), + "[composio-direct] subagent_runner:typed: direct mode active — using cached catalogue, skipping backend list_tools refresh" + ); + cached_integration.tools.clone() + } + None => { + tracing::debug!( + agent_id = %definition.id, + toolkit = %tk, + cached_actions = cached_integration.tools.len(), + "[subagent_runner:typed] composio client unavailable; using cached catalogue" + ); + cached_integration.tools.clone() + } + }; + let integration = crate::openhuman::context::prompt::ConnectedIntegration { + toolkit: cached_integration.toolkit.clone(), + description: cached_integration.description.clone(), + tools: fresh_actions, + gated_tools: cached_integration.gated_tools.clone(), + connected: cached_integration.connected, + non_active_status: cached_integration.non_active_status.clone(), + }; + let integration = &integration; + let top_k = top_k_for_toolkit(tk); + let filter_hits = super::super::super::tool_filter::filter_actions_by_prompt( + task_prompt, + &integration.tools, + top_k, + ); + let selected: Vec<&crate::openhuman::context::prompt::ConnectedIntegrationTool> = + if filter_hits.len() >= super::super::super::tool_filter::MIN_CONFIDENT_HITS { + tracing::info!( + agent_id = %definition.id, + toolkit = %tk, + total = integration.tools.len(), + kept = filter_hits.len(), + top_k = top_k, + "[subagent_runner:typed] fuzzy tool filter narrowed toolkit" + ); + filter_hits.iter().map(|&i| &integration.tools[i]).collect() + } else { + tracing::info!( + agent_id = %definition.id, + toolkit = %tk, + total = integration.tools.len(), + filter_hits = filter_hits.len(), + "[subagent_runner:typed] fuzzy filter thin; falling back to full toolkit" + ); + integration.tools.iter().collect() + }; + + for action in selected { + dynamic_tools.push(Box::new( + crate::openhuman::composio::ComposioActionTool::new( + arc_config.clone(), + action.name.clone(), + action.description.clone(), + action.parameters.clone(), + ), + )); + } + tracing::debug!( + agent_id = %definition.id, + toolkit = %tk, + action_count = dynamic_tools.len(), + "[subagent_runner:typed] dynamically registered per-action composio tools" + ); + lazy_resolver = Some(LazyToolkitResolver { + config: arc_config.clone(), + actions: integration.tools.clone(), + }); + } else { + tracing::warn!( + agent_id = %definition.id, + toolkit = %tk, + "[subagent_runner:typed] toolkit not found among parent's connected integrations; sub-agent will have no callable actions (spawn_subagent pre-flight should have caught this)" + ); + } + } + } + + // ── Progressive-disclosure handoff cache ─────────────────────────── + let handoff_cache: Option> = if is_integrations_agent_with_toolkit { + let cache = Arc::new(ResultHandoffCache::new()); + let parent_chain = match parent.session_parent_prefix.as_deref() { + Some(prefix) => format!("{}__{}", prefix, parent.session_key), + None => parent.session_key.clone(), + }; + dynamic_tools.push(Box::new(ExtractFromResultTool::new( + cache.clone(), + parent.provider.clone(), + parent.workspace_dir.clone(), + parent_chain, + definition.id.clone(), + ))); + tracing::debug!( + agent_id = %definition.id, + "[subagent_runner:typed] registered extract_from_result tool + handoff cache" + ); + Some(cache) + } else { + None + }; + + // Build provider-visible tool schemas in EXECUTION-PRECEDENCE order: + // `dynamic_tools` (extra_tools at runtime) before parent specs. + let mut filtered_specs: Vec = dynamic_tools.iter().map(|t| t.spec()).collect(); + filtered_specs.extend( + allowed_indices + .iter() + .map(|&i| parent.all_tool_specs[i].clone()), + ); + let mut allowed_names: HashSet = allowed_indices + .iter() + .map(|&i| parent.all_tools[i].name().to_string()) + .collect(); + // Dynamic tool names must also be in the allowlist so the inner loop + // accepts model tool_calls that reference them. + for tool in &dynamic_tools { + allowed_names.insert(tool.name().to_string()); + } + let filtered_specs = + crate::openhuman::agent::harness::session::dedup_visible_tool_specs(filtered_specs); + let filtered_specs = dedup_tool_specs_by_name(&definition.id, filtered_specs); + + tracing::debug!( + agent_id = %definition.id, + model = %model, + tool_count = allowed_names.len(), + max_iterations = definition.effective_max_iterations(), + iteration_policy = ?definition.iteration_policy, + "[subagent_runner:typed] resolved configuration" + ); + + // ── Build the narrow system prompt ───────────────────────────────── + let render_options = SubagentRenderOptions::from_definition_flags( + definition.omit_identity, + definition.omit_safety_preamble, + definition.omit_skills_catalog, + definition.omit_profile, + definition.omit_memory_md, + ); + + let narrowed_integrations: Vec = + match toolkit_filter { + Some(tk) => live_integrations + .iter() + .filter(|ci| ci.connected && ci.toolkit.eq_ignore_ascii_case(tk)) + .cloned() + .collect(), + None => live_integrations + .iter() + .filter(|ci| ci.connected) + .cloned() + .collect(), + }; + + let prompt_tools: Vec> = allowed_indices + .iter() + .map(|&i| { + let t = parent.all_tools[i].as_ref(); + PromptTool { + name: t.name(), + description: t.description(), + parameters_schema: Some(t.parameters_schema().to_string()), + } + }) + .chain(dynamic_tools.iter().map(|t| PromptTool { + name: t.name(), + description: t.description(), + parameters_schema: Some(t.parameters_schema().to_string()), + })) + .collect(); + let visible_tool_names: std::collections::HashSet = + prompt_tools.iter().map(|t| t.name.to_string()).collect(); + let dispatcher_instructions = { + use crate::openhuman::agent::dispatcher::{ + NativeToolDispatcher, PFormatToolDispatcher, ToolDispatcher, XmlToolDispatcher, + }; + use crate::openhuman::agent::pformat::PFormatRegistry; + use crate::openhuman::context::prompt::ToolCallFormat; + let empty_tools: Vec> = Vec::new(); + match parent.tool_call_format { + ToolCallFormat::PFormat => { + PFormatToolDispatcher::new(PFormatRegistry::new()).prompt_instructions(&empty_tools) + } + ToolCallFormat::Native => NativeToolDispatcher.prompt_instructions(&empty_tools), + ToolCallFormat::Json => XmlToolDispatcher.prompt_instructions(&empty_tools), + } + }; + let prompt_ctx = PromptContext { + workspace_dir: &parent.workspace_dir, + model_name: &model, + agent_id: &definition.id, + tools: &prompt_tools, + skills: &parent.skills, + dispatcher_instructions: &dispatcher_instructions, + learned: crate::openhuman::context::prompt::LearnedContextData::default(), + visible_tool_names: &visible_tool_names, + tool_call_format: parent.tool_call_format, + connected_integrations: &narrowed_integrations, + connected_identities_md: crate::openhuman::agent::prompts::render_connected_identities(), + include_profile: !definition.omit_profile, + include_memory_md: !definition.omit_memory_md, + curated_snapshot: None, + user_identity: crate::openhuman::app_state::peek_cached_current_user_identity(), + personality_soul_md: None, + personality_memory_md: None, + personality_roster: vec![], + }; + + let system_prompt = match &definition.system_prompt { + PromptSource::Dynamic(build) => { + build(&prompt_ctx).map_err(|e| SubagentRunError::PromptLoad { + path: format!("", definition.id), + source: std::io::Error::other(e.to_string()), + })? + } + PromptSource::Inline(_) | PromptSource::File { .. } => { + let archetype_prompt_body = load_prompt_source(&definition.system_prompt, &prompt_ctx)?; + render_subagent_system_prompt( + &parent.workspace_dir, + &model, + &allowed_indices, + &parent.all_tools, + &dynamic_tools, + &archetype_prompt_body, + render_options, + parent.tool_call_format, + &narrowed_integrations, + ) + } + }; + + let system_prompt = append_subagent_role_contract(system_prompt, &definition.id); + + // ── Build the user message (with optional context prefix) ────────── + let now = chrono::Local::now(); + let now_str = format!( + "Current Date & Time: {} ({})", + now.format("%Y-%m-%d %H:%M:%S"), + now.format("%Z") + ); + + let mut context_parts: Vec<&str> = Vec::new(); + if !definition.omit_memory_context { + if let Some(ref mem_ctx) = *parent.memory_context { + context_parts.push(mem_ctx); + } + } + context_parts.push(&now_str); + + if let Some(ref ctx) = options.context { + context_parts.push(ctx); + } + let mut history: Vec = + if let Some(ref initial) = options.initial_history { + tracing::info!( + agent_id = %definition.id, + task_id = %task_id, + history_len = initial.len(), + "[subagent_runner] resuming with initial_history (checkpoint replay)" + ); + initial.clone() + } else { + let user_message = if context_parts.is_empty() { + task_prompt.to_string() + } else { + format!("[Context]\n{}\n\n{task_prompt}", context_parts.join("\n\n")) + }; + vec![ + crate::openhuman::inference::provider::ChatMessage::system(system_prompt), + crate::openhuman::inference::provider::ChatMessage::user(user_message), + ] + }; + + // ── Run the inner tool-call loop ─────────────────────────────────── + let (output, iterations, _agg_usage, early_exit_tool) = Box::pin(run_inner_loop( + subagent_provider.as_ref(), + &mut history, + &parent.all_tools, + dynamic_tools, + &filtered_specs, + allowed_names, + lazy_resolver, + &model, + temperature, + definition.effective_max_iterations(), + task_id, + &definition.id, + options.worker_thread_id.clone(), + handoff_cache.as_deref(), + parent, + definition.iteration_policy == IterationPolicy::Extended, + )) + .await?; + + // Determine status: if the turn engine exited early because of + // ask_user_clarification, checkpoint the history and return + // AwaitingUser so the orchestrator can relay the user's answer. + let status = if early_exit_tool.as_deref() == Some("ask_user_clarification") { + let question = output.clone(); + let options_vec: Option> = None; + + let checkpoint_dir = options + .checkpoint_dir + .clone() + .unwrap_or_else(|| parent.workspace_dir.join(".openhuman/subagent_checkpoints")); + if let Err(e) = std::fs::create_dir_all(&checkpoint_dir) { + tracing::warn!( + task_id = %task_id, + error = %e, + "[subagent_runner] failed to create checkpoint directory" + ); + } else { + let checkpoint_data = + crate::openhuman::agent::harness::subagent_runner::types::SubagentCheckpointData { + task_id: task_id.to_string(), + agent_id: definition.id.clone(), + worker_thread_id: options.worker_thread_id.clone(), + history: history.clone(), + question: question.clone(), + options: options_vec.clone(), + toolkit_override: options.toolkit_override.clone(), + skill_filter_override: options.skill_filter_override.clone(), + model_override: options.model_override.clone(), + created_at: chrono::Utc::now().to_rfc3339(), + }; + let checkpoint_path = checkpoint_dir.join(format!("{task_id}.json")); + match serde_json::to_string_pretty(&checkpoint_data) { + Ok(json) => { + if let Err(e) = std::fs::write(&checkpoint_path, json) { + tracing::warn!( + task_id = %task_id, + path = %checkpoint_path.display(), + error = %e, + "[subagent_runner] failed to write checkpoint" + ); + } else { + tracing::info!( + task_id = %task_id, + path = %checkpoint_path.display(), + history_len = history.len(), + "[subagent_runner] checkpoint written for awaiting_user" + ); + } + } + Err(e) => { + tracing::warn!( + task_id = %task_id, + error = %e, + "[subagent_runner] failed to serialize checkpoint" + ); + } + } + } + + crate::openhuman::agent::harness::subagent_runner::types::SubagentRunStatus::AwaitingUser { + question, + options: options_vec, + } + } else { + crate::openhuman::agent::harness::subagent_runner::types::SubagentRunStatus::Completed + }; + + Ok(SubagentRunOutcome { + task_id: task_id.to_string(), + agent_id: definition.id.clone(), + output, + iterations, + elapsed: started.elapsed(), + mode: SubagentMode::Typed, + status, + }) +} diff --git a/src/openhuman/agent/harness/subagent_runner/ops/tool_source.rs b/src/openhuman/agent/harness/subagent_runner/ops/tool_source.rs new file mode 100644 index 0000000000..09a5d7565a --- /dev/null +++ b/src/openhuman/agent/harness/subagent_runner/ops/tool_source.rs @@ -0,0 +1,130 @@ +//! Sub-agent [`ToolSource`] implementation. +//! +//! Looks up tools in `extra_tools` then the parent registry, lazily registers +//! toolkit actions the fuzzy filter omitted, rejects names outside the +//! allowlist, and routes execution through the shared [`run_one_tool`] (so +//! sub-agents now get the same approval gate, audit, credential scrub, +//! tokenjuice and timeout as the channel loop), then applies the +//! progressive-disclosure handoff. + +use std::collections::HashSet; + +use crate::openhuman::tools::{Tool, ToolSpec}; + +use super::handoff_helper::apply_handoff; +use super::provider::LazyToolkitResolver; +use crate::openhuman::agent::harness::subagent_runner::handoff::ResultHandoffCache; + +/// Sub-agent [`ToolSource`]: looks up tools in `extra_tools` then the parent +/// registry, lazily registers toolkit actions the fuzzy filter omitted, rejects +/// names outside the allowlist, and routes execution through the shared +/// [`run_one_tool`] (so sub-agents now get the same approval gate, audit, +/// credential scrub, tokenjuice and timeout as the channel loop), then applies +/// the progressive-disclosure handoff. +pub(super) struct SubagentToolSource<'a> { + pub(super) parent_tools: &'a [Box], + pub(super) extra_tools: Vec>, + pub(super) allowed_names: HashSet, + pub(super) lazy_resolver: Option, + pub(super) advertised_specs: Vec, + pub(super) handoff_cache: Option<&'a ResultHandoffCache>, + pub(super) policy: crate::openhuman::tools::policy::DefaultToolPolicy, + pub(super) agent_id: String, +} + +#[async_trait::async_trait] +impl super::super::super::engine::ToolSource for SubagentToolSource<'_> { + fn request_specs(&self) -> &[ToolSpec] { + &self.advertised_specs + } + + async fn execute_call( + &mut self, + call: &super::super::super::parse::ParsedToolCall, + iteration: usize, + progress: &dyn super::super::super::engine::ProgressReporter, + progress_call_id: &str, + ) -> super::super::super::engine::ToolRunResult { + // Lazy registration: a call for an unknown tool that matches a real + // action slug in the bound toolkit gets built on the spot and admitted + // to the allowlist. The fuzzy top-K filter keeps schemas out of the + // prompt, not out of execution. + if !self.allowed_names.contains(&call.name) { + if let Some(resolver) = self.lazy_resolver.as_ref() { + if let Some(tool) = resolver.resolve(&call.name) { + tracing::info!( + agent_id = %self.agent_id, + tool = %call.name, + "[subagent_runner] lazily registered toolkit action outside fuzzy top-K" + ); + self.allowed_names.insert(tool.name().to_string()); + self.extra_tools.push(tool); + } + } + } + + if !self.allowed_names.contains(&call.name) { + tracing::warn!( + agent_id = %self.agent_id, + tool = %call.name, + "[subagent_runner] tool not in allowlist for this sub-agent" + ); + let iteration_u32 = (iteration + 1) as u32; + progress + .tool_started(progress_call_id, &call.name, &call.arguments, iteration_u32) + .await; + let mut available: Vec<&str> = self.allowed_names.iter().map(|s| s.as_str()).collect(); + if let Some(resolver) = self.lazy_resolver.as_ref() { + available.extend(resolver.known_slugs()); + } + available.sort_unstable(); + available.dedup(); + let text = format!( + "Error: tool '{}' is not available to the {} sub-agent. Available tools: {}", + call.name, + self.agent_id, + available.join(", ") + ); + progress + .tool_completed( + progress_call_id, + &call.name, + false, + text.chars().count(), + 0, + iteration_u32, + ) + .await; + return super::super::super::engine::ToolRunResult { + text, + success: false, + }; + } + + let tool_opt: Option<&dyn Tool> = self + .extra_tools + .iter() + .find(|t| t.name() == call.name) + .or_else(|| self.parent_tools.iter().find(|t| t.name() == call.name)) + .map(|b| b.as_ref()); + let outcome = super::super::super::engine::run_one_tool( + tool_opt, + call, + iteration, + progress, + &self.policy, + None, + progress_call_id, + ) + .await; + + let text = match self.handoff_cache { + Some(cache) => apply_handoff(cache, &call.name, "", &self.agent_id, outcome.text), + None => outcome.text, + }; + super::super::super::engine::ToolRunResult { + text, + success: outcome.success, + } + } +} diff --git a/src/openhuman/agent/prompts/builder.rs b/src/openhuman/agent/prompts/builder.rs new file mode 100644 index 0000000000..569216595b --- /dev/null +++ b/src/openhuman/agent/prompts/builder.rs @@ -0,0 +1,255 @@ +//! [`SystemPromptBuilder`] — assembles ordered [`PromptSection`]s into a +//! final system-prompt string. + +use super::sections::*; +use super::types::*; +use anyhow::Result; + +/// Global style rules appended to every assembled system prompt, regardless +/// of which sections the agent opts in/out of. Kept tiny and byte-stable so +/// it doesn't bust the inference backend's prefix cache. +pub const GLOBAL_STYLE_SUFFIX: &str = "## Output style\n\n\ + - Do **not** use em-dashes (`—`). Replace them with commas, colons, \ + parentheses, or two short sentences. This applies to every output \ + you produce: chat replies, summaries, tool args, and file contents.\n"; + +#[derive(Default)] +pub struct SystemPromptBuilder { + pub(super) sections: Vec>, +} + +impl SystemPromptBuilder { + pub fn with_defaults() -> Self { + Self { + sections: vec![ + Box::new(IdentitySection), + // User files (PROFILE.md, MEMORY.md) ride right after the + // identity bootstrap so they land in the cache-friendly + // prefix alongside SOUL/IDENTITY. Gated per-agent — see + // `UserFilesSection`. Intentionally separate from + // `IdentitySection` so agents that strip the identity + // preamble via `for_subagent(omit_identity=true)` still + // get their user files (welcome / orchestrator / the + // trigger pair). + Box::new(UserFilesSection), + // User memory sits right after the identity bootstrap so the + // model has rich, persistent context about the user before it + // sees the tool catalogue. Section is empty (and skipped) when + // the tree summarizer has nothing on disk yet. + // + // The privileged `UserReflectionsSection` is appended + // dynamically by `session::builder` when the + // learning subsystem is enabled, alongside + // `LearnedContextSection` / `UserProfileSection` — those + // three are config-gated and intentionally not part of + // the static default chain. + Box::new(UserMemorySection), + Box::new(ToolsSection), + Box::new(SafetySection), + Box::new(WorkspaceSection), + Box::new(DateTimeSection), + Box::new(RuntimeSection), + ], + } + } + + /// Build a narrow prompt for a sub-agent. + /// + /// The sub-agent's archetype prompt is registered as a dedicated + /// section that always renders first. The remaining sections respect + /// the `omit_*` flags from the [`crate::openhuman::agent::harness::definition::AgentDefinition`]: + /// `omit_identity` skips the project-context dump, `omit_safety_preamble` + /// skips the safety rules, and so on. The `WorkspaceSection` is always + /// included so the sub-agent knows its working directory. + /// + /// `archetype_prompt_text` is the already-loaded body of the + /// `system_prompt` source on the definition (the runner resolves + /// inline vs file before calling this). + /// + /// # KV cache stability + /// + /// `DateTimeSection` is intentionally **not** included here. + /// Repeat spawns of the same sub-agent definition must produce + /// byte-identical system prompts so the inference backend's + /// automatic prefix cache can reuse the prefill from the previous + /// run. Injecting `Local::now()` into the prompt would defeat that + /// goal — if a sub-agent genuinely needs the current time it + /// should receive it via the user message, not the system prompt. + pub fn for_subagent( + archetype_prompt_text: String, + omit_identity: bool, + omit_safety_preamble: bool, + _omit_skills_catalog: bool, + ) -> Self { + let mut sections: Vec> = + vec![Box::new(ArchetypePromptSection::new(archetype_prompt_text))]; + + if !omit_identity { + sections.push(Box::new(IdentitySection)); + } + // User files (PROFILE.md / MEMORY.md) are gated independently of + // `omit_identity` so agents that drop the identity preamble (e.g. + // welcome's `omit_identity = true`) still surface the user's + // onboarding + archivist context when `omit_profile` / + // `omit_memory_md` are opted in. + sections.push(Box::new(UserFilesSection)); + // Tools section is always included — the sub-agent needs to see + // its own (filtered) tool catalogue. + sections.push(Box::new(ToolsSection)); + if !omit_safety_preamble { + sections.push(Box::new(SafetySection)); + } + // Skills catalogue and connected integrations are rendered by + // the individual agent's `prompt.rs` when that agent needs + // them (integrations_agent for the skill-executor voice, + // orchestrator/welcome for the delegator voice). The shared + // builder intentionally does not emit them — keeping + // agent-specific prose scoped to the agent that owns it. + sections.push(Box::new(WorkspaceSection)); + + Self { sections } + } + + /// Build from a fully-assembled prompt string — no section wrapping. + /// + /// Used when the caller has already composed the final prompt (e.g. + /// via a function-driven `PromptSource::Dynamic` builder that calls + /// the `render_*` section helpers itself). The returned builder has + /// a single [`ArchetypePromptSection`] containing the body verbatim. + pub fn from_final_body(body: String) -> Self { + Self { + sections: vec![Box::new(ArchetypePromptSection::new(body))], + } + } + + /// Build from a [`PromptSource::Dynamic`] function pointer. + /// + /// The function is called every time [`Self::build`] runs, with the + /// live [`PromptContext`] the call-site supplies — so late-arriving + /// state like `connected_integrations` (fetched asynchronously at + /// the start of a session) reaches the dynamic renderer instead of + /// being frozen into an empty slice at builder-construction time. + /// + /// KV-cache contract: callers must only invoke `build_system_prompt` + /// once per session (after `fetch_connected_integrations`). The + /// rendered bytes are then frozen for the rest of the session the + /// same way `from_final_body` freezes them — the difference is just + /// *when* the freeze happens. + pub fn from_dynamic( + builder: crate::openhuman::agent::harness::definition::PromptBuilder, + ) -> Self { + Self { + sections: vec![Box::new(DynamicPromptSection::new(builder))], + } + } + + pub fn add_section(mut self, section: Box) -> Self { + self.sections.push(section); + self + } + + /// Insert `section` immediately before the first existing section + /// whose [`PromptSection::name`] matches `target_name`. When no + /// matching section is present (most dynamic / sub-agent builders + /// do not include `user_memory`, for example), the new section is + /// appended at the end instead. + /// + /// Used by the session builder to guarantee that the privileged + /// reflection block ranks ahead of broader memory sections like + /// `user_memory`, even when the surrounding builder was assembled + /// via [`Self::with_defaults`] which already contains them. + pub fn insert_section_before( + mut self, + target_name: &str, + section: Box, + ) -> Self { + let position = self.sections.iter().position(|s| s.name() == target_name); + match position { + Some(idx) => self.sections.insert(idx, section), + None => self.sections.push(section), + } + self + } + + /// Append a [`ToolMemoryRulesSection`] carrying a pre-fetched + /// snapshot of Critical / High priority tool-scoped rules (#1400). + /// + /// Snapshot semantics — the rules are baked into the section at + /// construction so the rendered system prompt stays byte-identical + /// for the lifetime of the session. The session builder is + /// responsible for pre-fetching via + /// [`crate::openhuman::memory_tools::ToolMemoryStore::rules_for_prompt`] + /// (or the `memory_tool_rules_for_prompt` RPC) before invoking + /// this method. + /// + /// No-op when `rules` is empty. + pub fn with_tool_memory_rules( + mut self, + rules: Vec, + ) -> Self { + if rules.is_empty() { + return self; + } + // Insert before the tool-catalogue section so these rules appear + // adjacent to the tool listings and survive tail-biased trimming. + // Falls back to push when no tools section is present. + let section: Box = Box::new( + crate::openhuman::memory_tools::ToolMemoryRulesSection::new(rules), + ); + let tools_idx = self + .sections + .iter() + .position(|s| s.name() == "tools" || s.name() == "tool_catalogue"); + match tools_idx { + Some(idx) => self.sections.insert(idx, section), + None => self.sections.push(section), + } + self + } + + /// Append a "Memory context" section carrying the resolved chunks the + /// subconscious LLM cited when it produced the reflection that + /// spawned this thread (#623). + /// + /// Snapshot semantics — chunks are baked at construction so the + /// rendered system prompt remains byte-identical for the lifetime of + /// the session, preserving the inference backend's prefix cache hit. + /// The session builder calls this when it detects a thread with a + /// `subconscious_reflection`-origin seed message. + /// + /// No-op when `chunks` is empty. + pub fn with_reflection_context( + mut self, + chunks: Vec, + ) -> Self { + if chunks.is_empty() { + return self; + } + self.sections + .push(Box::new(ReflectionMemoryContextSection::new(chunks))); + self + } + + /// Render every section in order into a single prompt string. + /// + /// The rendered bytes are intended to be **frozen for the whole + /// session** — callers build the system prompt once at session + /// start and reuse the exact bytes on every subsequent turn so the + /// inference backend's prefix cache hits uniformly. There is no + /// cache-boundary marker to emit because the entire prompt is + /// static from the provider's perspective. + pub fn build(&self, ctx: &PromptContext<'_>) -> Result { + let mut output = String::new(); + for section in &self.sections { + let part = section.build(ctx)?; + if part.trim().is_empty() { + continue; + } + output.push_str(part.trim_end()); + output.push_str("\n\n"); + } + output.push_str(GLOBAL_STYLE_SUFFIX); + output.push('\n'); + Ok(output) + } +} diff --git a/src/openhuman/agent/prompts/mod.rs b/src/openhuman/agent/prompts/mod.rs index 159861325f..7b715a5935 100644 --- a/src/openhuman/agent/prompts/mod.rs +++ b/src/openhuman/agent/prompts/mod.rs @@ -3,1514 +3,21 @@ pub use types::*; mod connected_identities; pub use connected_identities::render_connected_identities; -use crate::openhuman::tools::Tool; -use crate::openhuman::workflows::Workflow; -use anyhow::Result; -use chrono::{DateTime, Local, Utc}; -use std::fmt::Write; -use std::hash::{Hash, Hasher}; -use std::path::Path; -use std::sync::OnceLock; - -#[derive(Default)] -pub struct SystemPromptBuilder { - sections: Vec>, -} - -impl SystemPromptBuilder { - pub fn with_defaults() -> Self { - Self { - sections: vec![ - Box::new(IdentitySection), - // User files (PROFILE.md, MEMORY.md) ride right after the - // identity bootstrap so they land in the cache-friendly - // prefix alongside SOUL/IDENTITY. Gated per-agent — see - // `UserFilesSection`. Intentionally separate from - // `IdentitySection` so agents that strip the identity - // preamble via `for_subagent(omit_identity=true)` still - // get their user files (welcome / orchestrator / the - // trigger pair). - Box::new(UserFilesSection), - // User memory sits right after the identity bootstrap so the - // model has rich, persistent context about the user before it - // sees the tool catalogue. Section is empty (and skipped) when - // the tree summarizer has nothing on disk yet. - // - // The privileged `UserReflectionsSection` is appended - // dynamically by `session::builder` when the - // learning subsystem is enabled, alongside - // `LearnedContextSection` / `UserProfileSection` — those - // three are config-gated and intentionally not part of - // the static default chain. - Box::new(UserMemorySection), - Box::new(ToolsSection), - Box::new(SafetySection), - Box::new(WorkspaceSection), - Box::new(DateTimeSection), - Box::new(RuntimeSection), - ], - } - } - - /// Build a narrow prompt for a sub-agent. - /// - /// The sub-agent's archetype prompt is registered as a dedicated - /// section that always renders first. The remaining sections respect - /// the `omit_*` flags from the [`crate::openhuman::agent::harness::definition::AgentDefinition`]: - /// `omit_identity` skips the project-context dump, `omit_safety_preamble` - /// skips the safety rules, and so on. The `WorkspaceSection` is always - /// included so the sub-agent knows its working directory. - /// - /// `archetype_prompt_text` is the already-loaded body of the - /// `system_prompt` source on the definition (the runner resolves - /// inline vs file before calling this). - /// - /// # KV cache stability - /// - /// `DateTimeSection` is intentionally **not** included here. - /// Repeat spawns of the same sub-agent definition must produce - /// byte-identical system prompts so the inference backend's - /// automatic prefix cache can reuse the prefill from the previous - /// run. Injecting `Local::now()` into the prompt would defeat that - /// goal — if a sub-agent genuinely needs the current time it - /// should receive it via the user message, not the system prompt. - pub fn for_subagent( - archetype_prompt_text: String, - omit_identity: bool, - omit_safety_preamble: bool, - _omit_skills_catalog: bool, - ) -> Self { - let mut sections: Vec> = - vec![Box::new(ArchetypePromptSection::new(archetype_prompt_text))]; - - if !omit_identity { - sections.push(Box::new(IdentitySection)); - } - // User files (PROFILE.md / MEMORY.md) are gated independently of - // `omit_identity` so agents that drop the identity preamble (e.g. - // welcome's `omit_identity = true`) still surface the user's - // onboarding + archivist context when `omit_profile` / - // `omit_memory_md` are opted in. - sections.push(Box::new(UserFilesSection)); - // Tools section is always included — the sub-agent needs to see - // its own (filtered) tool catalogue. - sections.push(Box::new(ToolsSection)); - if !omit_safety_preamble { - sections.push(Box::new(SafetySection)); - } - // Skills catalogue and connected integrations are rendered by - // the individual agent's `prompt.rs` when that agent needs - // them (integrations_agent for the skill-executor voice, - // orchestrator/welcome for the delegator voice). The shared - // builder intentionally does not emit them — keeping - // agent-specific prose scoped to the agent that owns it. - sections.push(Box::new(WorkspaceSection)); - - Self { sections } - } - - /// Build from a fully-assembled prompt string — no section wrapping. - /// - /// Used when the caller has already composed the final prompt (e.g. - /// via a function-driven `PromptSource::Dynamic` builder that calls - /// the `render_*` section helpers itself). The returned builder has - /// a single [`ArchetypePromptSection`] containing the body verbatim. - pub fn from_final_body(body: String) -> Self { - Self { - sections: vec![Box::new(ArchetypePromptSection::new(body))], - } - } - - /// Build from a [`PromptSource::Dynamic`] function pointer. - /// - /// The function is called every time [`Self::build`] runs, with the - /// live [`PromptContext`] the call-site supplies — so late-arriving - /// state like `connected_integrations` (fetched asynchronously at - /// the start of a session) reaches the dynamic renderer instead of - /// being frozen into an empty slice at builder-construction time. - /// - /// KV-cache contract: callers must only invoke `build_system_prompt` - /// once per session (after `fetch_connected_integrations`). The - /// rendered bytes are then frozen for the rest of the session the - /// same way `from_final_body` freezes them — the difference is just - /// *when* the freeze happens. - pub fn from_dynamic( - builder: crate::openhuman::agent::harness::definition::PromptBuilder, - ) -> Self { - Self { - sections: vec![Box::new(DynamicPromptSection::new(builder))], - } - } - - pub fn add_section(mut self, section: Box) -> Self { - self.sections.push(section); - self - } - - /// Insert `section` immediately before the first existing section - /// whose [`PromptSection::name`] matches `target_name`. When no - /// matching section is present (most dynamic / sub-agent builders - /// do not include `user_memory`, for example), the new section is - /// appended at the end instead. - /// - /// Used by the session builder to guarantee that the privileged - /// reflection block ranks ahead of broader memory sections like - /// `user_memory`, even when the surrounding builder was assembled - /// via [`Self::with_defaults`] which already contains them. - pub fn insert_section_before( - mut self, - target_name: &str, - section: Box, - ) -> Self { - let position = self.sections.iter().position(|s| s.name() == target_name); - match position { - Some(idx) => self.sections.insert(idx, section), - None => self.sections.push(section), - } - self - } - - /// Append a [`ToolMemoryRulesSection`] carrying a pre-fetched - /// snapshot of Critical / High priority tool-scoped rules (#1400). - /// - /// Snapshot semantics — the rules are baked into the section at - /// construction so the rendered system prompt stays byte-identical - /// for the lifetime of the session. The session builder is - /// responsible for pre-fetching via - /// [`crate::openhuman::memory_tools::ToolMemoryStore::rules_for_prompt`] - /// (or the `memory_tool_rules_for_prompt` RPC) before invoking - /// this method. - /// - /// No-op when `rules` is empty. - pub fn with_tool_memory_rules( - mut self, - rules: Vec, - ) -> Self { - if rules.is_empty() { - return self; - } - // Insert before the tool-catalogue section so these rules appear - // adjacent to the tool listings and survive tail-biased trimming. - // Falls back to push when no tools section is present. - let section: Box = Box::new( - crate::openhuman::memory_tools::ToolMemoryRulesSection::new(rules), - ); - let tools_idx = self - .sections - .iter() - .position(|s| s.name() == "tools" || s.name() == "tool_catalogue"); - match tools_idx { - Some(idx) => self.sections.insert(idx, section), - None => self.sections.push(section), - } - self - } - - /// Append a "Memory context" section carrying the resolved chunks the - /// subconscious LLM cited when it produced the reflection that - /// spawned this thread (#623). - /// - /// Snapshot semantics — chunks are baked at construction so the - /// rendered system prompt remains byte-identical for the lifetime of - /// the session, preserving the inference backend's prefix cache hit. - /// The session builder calls this when it detects a thread with a - /// `subconscious_reflection`-origin seed message. - /// - /// No-op when `chunks` is empty. - pub fn with_reflection_context( - mut self, - chunks: Vec, - ) -> Self { - if chunks.is_empty() { - return self; - } - self.sections - .push(Box::new(ReflectionMemoryContextSection::new(chunks))); - self - } - - /// Render every section in order into a single prompt string. - /// - /// The rendered bytes are intended to be **frozen for the whole - /// session** — callers build the system prompt once at session - /// start and reuse the exact bytes on every subsequent turn so the - /// inference backend's prefix cache hits uniformly. There is no - /// cache-boundary marker to emit because the entire prompt is - /// static from the provider's perspective. - pub fn build(&self, ctx: &PromptContext<'_>) -> Result { - let mut output = String::new(); - for section in &self.sections { - let part = section.build(ctx)?; - if part.trim().is_empty() { - continue; - } - output.push_str(part.trim_end()); - output.push_str("\n\n"); - } - output.push_str(GLOBAL_STYLE_SUFFIX); - output.push('\n'); - Ok(output) - } -} - -/// Global style rules appended to every assembled system prompt, regardless -/// of which sections the agent opts in/out of. Kept tiny and byte-stable so -/// it doesn't bust the inference backend's prefix cache. -pub const GLOBAL_STYLE_SUFFIX: &str = "## Output style\n\n\ - - Do **not** use em-dashes (`—`). Replace them with commas, colons, \ - parentheses, or two short sentences. This applies to every output \ - you produce: chat replies, summaries, tool args, and file contents.\n"; - -/// "Memory context" section for chat threads spawned from a subconscious -/// reflection (#623). Renders the resolved [`SourceChunk`]s that the -/// subconscious LLM cited when it produced the reflection — gives the -/// orchestrator the same memory context the reflection-LLM had, so the -/// user can drill into the observation without the orchestrator -/// hallucinating details it never saw. -/// -/// Chunks are passed in at construction (snapshot at session-start) so -/// the rendered bytes stay stable for the whole session, matching the -/// "frozen prompt for prefix cache" contract documented on -/// [`SystemPromptBuilder::build`]. -pub struct ReflectionMemoryContextSection { - chunks: Vec, -} - -impl ReflectionMemoryContextSection { - pub fn new(chunks: Vec) -> Self { - Self { chunks } - } -} - -impl PromptSection for ReflectionMemoryContextSection { - fn name(&self) -> &str { - "reflection_memory_context" - } - - fn build(&self, _ctx: &PromptContext<'_>) -> Result { - // Skip chunks the resolver couldn't populate — `not_found`, - // `db_error`, or stub kinds without a wired resolver yet. Earlier - // versions emitted "(content not yet resolved)" as a placeholder, - // but the orchestrator picks up that literal string as part of - // its memory context and ends up echoing it back to the user - // mid-reply. Better to give the LLM no chunk than a placeholder - // it'll quote. - let usable: Vec<&crate::openhuman::subconscious::SourceChunk> = self - .chunks - .iter() - .filter(|c| !c.content.trim().is_empty()) - .collect(); - if usable.is_empty() { - return Ok(String::new()); - } - let mut out = String::from("## Memory context\n\n"); - out.push_str( - "This thread was spawned from a subconscious reflection. The chunks below \ - are what OpenHuman was looking at when it surfaced the observation — \ - use them to ground follow-up answers in the same evidence the reflection \ - was based on.\n\n", - ); - for chunk in usable { - let body = chunk.content.replace('\n', " ").trim().to_string(); - let _ = writeln!( - out, - "- **{kind}** `{ref_id}`: {body}", - kind = chunk.kind, - ref_id = chunk.ref_id, - body = body, - ); - } - Ok(out) - } -} - -/// Sub-agent role prompt — pre-loaded text from an -/// [`crate::openhuman::agent::harness::definition::AgentDefinition`]'s -/// `system_prompt` field. Always rendered first when present. -pub struct ArchetypePromptSection { - body: String, -} - -impl ArchetypePromptSection { - pub fn new(body: String) -> Self { - Self { body } - } -} - -impl PromptSection for ArchetypePromptSection { - fn name(&self) -> &str { - "archetype_prompt" - } - - fn build(&self, _ctx: &PromptContext<'_>) -> Result { - if self.body.trim().is_empty() { - return Ok(String::new()); - } - Ok(self.body.clone()) - } -} - -/// Section that defers to a [`crate::openhuman::agent::harness::definition::PromptBuilder`] -/// every time it renders, so dynamic prompts (orchestrator, welcome, -/// integrations_agent, …) get to see the live runtime -/// [`PromptContext`] — including `connected_integrations`, which are -/// fetched asynchronously after the builder itself has been -/// constructed. -pub struct DynamicPromptSection { - builder: crate::openhuman::agent::harness::definition::PromptBuilder, -} - -impl DynamicPromptSection { - pub fn new(builder: crate::openhuman::agent::harness::definition::PromptBuilder) -> Self { - Self { builder } - } -} - -impl PromptSection for DynamicPromptSection { - fn name(&self) -> &str { - "dynamic_prompt" - } - - fn build(&self, ctx: &PromptContext<'_>) -> Result { - (self.builder)(ctx) - } -} - -pub struct IdentitySection; -pub struct ToolsSection; -pub struct SafetySection; -// `WorkflowsSection` and `ConnectedIntegrationsSection` previously lived -// here and branched on `ctx.agent_id` to pick between the skill- -// executor and delegator voice. They've been removed — each agent's -// `prompt.rs` now renders its own block inline (integrations_agent owns the -// `## Available Skills` + executor-voice `## Connected Integrations` -// blocks, orchestrator owns `## Delegation Guide — Integrations`, -// welcome owns its onboarding-flavoured connected list). -pub struct WorkspaceSection; -pub struct RuntimeSection; -pub struct DateTimeSection; -pub struct UserMemorySection; -/// Renders explicit user reflections — a privileged memory class -/// distinct from generic tree summaries. Rendered above -/// [`UserMemorySection`] so the orchestrator sees the user's own -/// intentional self-statements before any broader summary block. -/// -/// Empty (and skipped) when [`LearnedContextData::reflections`] is -/// empty — keeps the prompt clean for users who haven't yet expressed -/// any reflection-style content. -pub struct UserReflectionsSection; -/// Renders the authenticated user's non-secret identity fields -/// (`id` / `name` / `email`) into the system prompt — see issue #926. -/// -/// Empty when [`PromptContext::user_identity`] is `None` or the -/// identity has no populated fields. Tokens, refresh tokens, and any -/// opaque credential material are forbidden — only the three -/// identifying fields ship. -pub struct UserIdentitySection; - -/// Injects the user-specific, session-frozen workspace files -/// (`PROFILE.md` + `MEMORY.md`), each capped at [`USER_FILE_MAX_CHARS`]. -/// -/// Separate from [`IdentitySection`] so agents that strip the project- -/// context preamble (`omit_identity = true` — welcome, orchestrator, -/// the trigger pair) still get their user-file injection at runtime via -/// [`SystemPromptBuilder::for_subagent`], which skips `IdentitySection` -/// entirely when `omit_identity` is on. -/// -/// Cache-stability: static per session — the whole point of the -/// 2000-char cap and the load-once rule documented on -/// [`AgentDefinition::omit_profile`] / `omit_memory_md`. -pub struct UserFilesSection; - -/// Renders the personality roster for the master agent's system prompt. -/// -/// When [`PromptContext::personality_roster`] is non-empty, emits an -/// `## Available Personalities` section listing each non-self personality -/// with its `id`, `name`, `description`, and an optional truncated -/// `memory_summary`. Empty (and skipped) for non-master agents. -pub struct PersonalityRosterSection; - -impl PromptSection for PersonalityRosterSection { - fn name(&self) -> &str { - "personality_roster" - } - - fn build(&self, ctx: &PromptContext<'_>) -> Result { - if ctx.personality_roster.is_empty() { - return Ok(String::new()); - } - let mut out = String::from("## Available Personalities\n\n"); - out.push_str( - "You are the master agent. You can delegate tasks to these personality agents \ - using the `delegate_to_personality` tool. Each personality has its own memory, \ - identity, and expertise.\n\n", - ); - for entry in &ctx.personality_roster { - out.push_str(&format!( - "- **{}** (`{}`): {}", - entry.name, entry.id, entry.description - )); - if let Some(ref summary) = entry.memory_summary { - let truncated = if summary.chars().count() > 200 { - let head: String = summary.chars().take(200).collect(); - format!("{head}…") - } else { - summary.clone() - }; - out.push_str(&format!("\n Recent context: {truncated}")); - } - out.push('\n'); - } - Ok(out) - } -} - -impl PromptSection for IdentitySection { - fn name(&self) -> &str { - "identity" - } - - fn build(&self, ctx: &PromptContext<'_>) -> Result { - let mut prompt = String::from("## Project Context\n\n"); - prompt.push_str( - "The following workspace files define your identity, behavior, and context.\n\n", - ); - // When the visible-tool filter is active the main agent is a pure - // orchestrator: it routes via spawn_subagent, synthesises results, - // and talks to the user. It does NOT need the periodic-task config - // (HEARTBEAT.md) — subagents handle their own concerns. - let is_orchestrator = !ctx.visible_tool_names.is_empty(); - let all_files: &[&str] = &["SOUL.md", "IDENTITY.md", "HEARTBEAT.md"]; - // Orchestrator skips these from the prompt but we still sync them - // to disk so they stay current. - let skip_in_prompt: &[&str] = if is_orchestrator { - &["HEARTBEAT.md"] - } else { - &[] - }; - for file in all_files { - // Always sync to disk so builtin updates ship. - sync_workspace_file(ctx.workspace_dir, file); - if skip_in_prompt.contains(file) { - continue; - } - if *file == "SOUL.md" { - if let Some(ref soul) = ctx.personality_soul_md { - tracing::debug!( - "[identity] personality SOUL.md override active ({} chars)", - soul.len() - ); - inject_inline_content(&mut prompt, "SOUL.md", soul, BOOTSTRAP_MAX_CHARS); - continue; - } - } - inject_workspace_file(&mut prompt, ctx.workspace_dir, file); - } - - // PROFILE.md / MEMORY.md injection lives in the dedicated - // `UserFilesSection` (below) so agents that strip the identity - // preamble (`omit_identity = true`) — welcome, orchestrator, the - // trigger pair — still get their user files at runtime via - // `SystemPromptBuilder::for_subagent`, which omits - // `IdentitySection` entirely when `omit_identity` is set. - - Ok(prompt) - } -} - -impl PromptSection for UserFilesSection { - fn name(&self) -> &str { - "user_files" - } - - fn build(&self, ctx: &PromptContext<'_>) -> Result { - // Gate on the per-agent flags derived from - // `AgentDefinition::omit_profile` / `omit_memory_md`. Both files - // are user-specific, potentially growing, and capped at - // [`USER_FILE_MAX_CHARS`] (~1000 tokens) so they can't bloat the - // cached prefix. - // - // KV-cache contract: once injected into a session's rendered - // prompt, the bytes are frozen for the remainder of that - // session — any mid-session archivist write or enrichment - // refresh lands on the NEXT session, never the in-flight one. - let mut out = String::new(); - if ctx.include_profile { - inject_workspace_file_capped( - &mut out, - ctx.workspace_dir, - "PROFILE.md", - USER_FILE_MAX_CHARS, - ); - } - if ctx.include_memory_md { - // Personality-specific MEMORY.md takes highest priority, then - // the session-frozen curated-memory snapshot, then the - // workspace file (pure prompt-unit tests and older call sites). - if let Some(ref memory_md) = ctx.personality_memory_md { - tracing::debug!( - "[user_files] personality MEMORY.md override active ({} chars)", - memory_md.len() - ); - inject_inline_content(&mut out, "MEMORY.md", memory_md, USER_FILE_MAX_CHARS); - } else if let Some(snap) = &ctx.curated_snapshot { - inject_snapshot_content(&mut out, "MEMORY.md", &snap.memory, USER_FILE_MAX_CHARS); - inject_snapshot_content(&mut out, "USER.md", &snap.user, USER_FILE_MAX_CHARS); - } else { - inject_workspace_file_capped( - &mut out, - ctx.workspace_dir, - "MEMORY.md", - USER_FILE_MAX_CHARS, - ); - } - } - Ok(out) - } -} - -impl PromptSection for ToolsSection { - fn name(&self) -> &str { - "tools" - } - - fn build(&self, ctx: &PromptContext<'_>) -> Result { - // Native function-calling: the provider already sends full JSON - // schemas in the API request — no need to repeat the tool catalogue - // in the system prompt (pure token bloat). However, any non-empty - // `dispatcher_instructions` (e.g. the "## Tool Use Protocol" block - // from NativeToolDispatcher) must still be included so the model - // receives its behavioural guidance. - if ctx.tool_call_format == ToolCallFormat::Native { - if ctx.dispatcher_instructions.trim().is_empty() { - return Ok(String::new()); - } - return Ok(ctx.dispatcher_instructions.to_string()); - } - let mut out = String::from("## Tools\n\n"); - let has_filter = !ctx.visible_tool_names.is_empty(); - for tool in ctx.tools { - // Skip tools not in the visible set when a filter is active. - if has_filter && !ctx.visible_tool_names.contains(tool.name) { - continue; - } - - // One rendering shape for every dispatcher: a compact - // P-Format signature (`name[a|b|c]`). The signature comes - // straight from the parameter schema (alphabetical by - // property name — see `pformat` module docs for why) so - // model and parser agree on argument ordering. For - // `Native` dispatchers the provider already has the full - // JSON schema in the API request, so repeating it in the - // prompt is pure token bloat; for `Json` / `PFormat` text - // dispatchers the dispatcher's own `prompt_instructions` - // block (appended below) carries whatever schema detail - // the wire format needs. - let signature = render_pformat_signature_for_prompt(tool); - let _ = writeln!( - out, - "- **{}**: {}\n Call as: `{}`", - tool.name, tool.description, signature - ); - } - if !ctx.dispatcher_instructions.is_empty() { - out.push('\n'); - out.push_str(ctx.dispatcher_instructions); - } - Ok(out) - } -} - -/// Build a P-Format signature line (`name[a|b|c]`) from a `&dyn Tool`. -/// Used by `render_subagent_system_prompt` which operates on `Box` -/// directly (no intermediate `PromptTool`). Mirrors the `PromptTool` variant -/// below — both BTreeMap-iterate the schema's `properties` in the same order. -fn render_pformat_signature_for_box_tool(tool: &dyn crate::openhuman::tools::Tool) -> String { - let schema = tool.parameters_schema(); - let names: Vec = schema - .get("properties") - .and_then(|p| p.as_object()) - .map(|m| m.keys().cloned().collect()) - .unwrap_or_default(); - if names.is_empty() { - format!("{}[]", tool.name()) - } else { - format!("{}[{}]", tool.name(), names.join("|")) - } -} - -/// Build a P-Format signature line (`name[a|b|c]`) from a [`PromptTool`]. -/// Local to this module so [`ToolsSection`] doesn't have to depend on -/// the agent crate's `pformat` helper. The two implementations stay in -/// lockstep — both use BTreeMap iteration order on the schema's -/// `properties` field. -fn render_pformat_signature_for_prompt(tool: &PromptTool<'_>) -> String { - let names: Vec = tool - .parameters_schema - .as_deref() - .and_then(|s| serde_json::from_str::(s).ok()) - .and_then(|v| { - v.get("properties") - .and_then(|p| p.as_object()) - .map(|m| m.keys().cloned().collect()) - }) - .unwrap_or_default(); - if names.is_empty() { - format!("{}[]", tool.name) - } else { - format!("{}[{}]", tool.name, names.join("|")) - } -} - -impl PromptSection for SafetySection { - fn name(&self) -> &str { - "safety" - } - - fn build(&self, _ctx: &PromptContext<'_>) -> Result { - Ok("## Safety\n\n- Do not exfiltrate private data.\n- Do not run destructive commands without asking.\n- Do not bypass oversight or approval mechanisms.\n- Prefer `trash` over `rm`.\n- When in doubt, ask before acting externally.".into()) - } -} - -impl PromptSection for WorkspaceSection { - fn name(&self) -> &str { - "workspace" - } - - fn build(&self, ctx: &PromptContext<'_>) -> Result { - Ok(format!( - "## Workspace\n\nWorking directory: `{}`", - ctx.workspace_dir.display() - )) - } -} - -impl PromptSection for RuntimeSection { - fn name(&self) -> &str { - "runtime" - } - - fn build(&self, ctx: &PromptContext<'_>) -> Result { - let host = - hostname::get().map_or_else(|_| "unknown".into(), |h| h.to_string_lossy().to_string()); - Ok(format!( - "## Runtime\n\nHost: {host} | OS: {} | Model: {}", - std::env::consts::OS, - ctx.model_name - )) - } -} - -impl PromptSection for UserReflectionsSection { - fn name(&self) -> &str { - "user_reflections" - } - - fn build(&self, ctx: &PromptContext<'_>) -> Result { - if ctx.learned.reflections.is_empty() { - return Ok(String::new()); - } - - let mut out = String::from("## User Reflections\n\n"); - out.push_str( - "Explicit reflections the user authored about themselves, their goals, \ - or how they want you to behave going forward. Treat these as \ - higher-priority than the broader user-memory summaries below: \ - they are recent, intentional, identity-relevant signals and \ - should steer your responses ahead of any generic historical \ - context.\n\n", - ); - for reflection in &ctx.learned.reflections { - let trimmed = reflection.trim(); - if trimmed.is_empty() { - continue; - } - out.push_str("- "); - out.push_str(trimmed); - out.push('\n'); - } - out.push('\n'); - Ok(out) - } -} - -/// Format a memory item's `updated_at` as an absolute UTC date label -/// for prompt injection, e.g. `2026-05-25`. -/// -/// Absolute (not relative "N days ago") on purpose: memory sections sit -/// near the front of the KV-cache-stable system prompt, so a label that -/// changes daily would bust the cached prefix for everything after it. -/// An absolute date only changes when the underlying memory does. The -/// model judges staleness by comparing this against the injected current -/// date. Shared by [`UserMemorySection`] and the working-memory block in -/// `agent::memory_loader`. (#2944) -pub(crate) fn memory_date_label(updated_at: DateTime) -> String { - updated_at.format("%Y-%m-%d").to_string() -} - -impl PromptSection for UserMemorySection { - fn name(&self) -> &str { - "user_memory" - } - - fn build(&self, ctx: &PromptContext<'_>) -> Result { - if ctx.learned.tree_root_summaries.is_empty() { - return Ok(String::new()); - } - - let mut out = String::from("## User Memory\n\n"); - out.push_str( - "Long-term memory distilled by the tree summarizer. \ - Each section is the root summary for a memory namespace, \ - representing everything we've learned about that domain over time. \ - Treat this as durable background context, but NOT as fresh, \ - present-tense fact: each section header shows when that memory \ - was last updated. Compare those dates against the `## Current \ - Date & Time` section below before answering time-sensitive \ - questions (today's briefing, daily summary, reminders, calendar, \ - notifications, \"today/tomorrow/this week\"). If a summary predates \ - the period the user is asking about, treat it as potentially \ - stale — say so explicitly and never present older memory as \ - today's update.\n\n", - ); - - for NamespaceSummary { - namespace, - body, - updated_at, - } in &ctx.learned.tree_root_summaries - { - let trimmed = body.trim(); - if trimmed.is_empty() { - continue; - } - // Absolute date (not "N days ago") keeps this front-of-prompt - // section byte-stable for KV-cache reuse — see `NamespaceSummary`. - let _ = writeln!( - out, - "### {namespace} (last updated {})\n", - memory_date_label(*updated_at) - ); - out.push_str(trimmed); - out.push_str("\n\n"); - } - - Ok(out) - } -} - -impl PromptSection for DateTimeSection { - fn name(&self) -> &str { - "datetime" - } - - fn build(&self, _ctx: &PromptContext<'_>) -> Result { - // IANA zone first because it's the unambiguous machine-readable - // form (`America/Los_Angeles`) — agents that need to reason about - // timezone rules should grep this, not the locale-dependent - // `%Z` abbreviation. Falls back to "UTC" when the host can't - // resolve a zone (CI, stripped containers). - let iana = iana_time_zone::get_timezone().unwrap_or_else(|_| "UTC".to_string()); - let now = Local::now(); - Ok(format!( - "## Current Date & Time\n\n{} {} ({}, UTC{})", - now.format("%Y-%m-%d %H:%M:%S"), - iana, - now.format("%Z"), - now.format("%:z"), - )) - } -} - -impl PromptSection for UserIdentitySection { - fn name(&self) -> &str { - "user_identity" - } - - fn build(&self, ctx: &PromptContext<'_>) -> Result { - let identity = match ctx.user_identity.as_ref() { - Some(id) if !id.is_empty() => id, - _ => return Ok(String::new()), - }; - - // Render the field list FIRST, then decide whether to ship the - // heading. `UserIdentity::is_empty()` only checks `None`-ness — - // a struct whose fields are all `Some("")` / whitespace would - // otherwise leave the prompt with a `## User` heading + intro - // pointing at zero fields, which is exactly the empty-prompt - // failure mode we're trying to suppress (#926). - let mut fields = String::new(); - if let Some(name) = identity.name.as_deref().filter(|s| !s.trim().is_empty()) { - let _ = writeln!(fields, "- name: {}", sanitize_identity_field(name)); - } - if let Some(email) = identity.email.as_deref().filter(|s| !s.trim().is_empty()) { - let _ = writeln!(fields, "- email: {}", sanitize_identity_field(email)); - } - if let Some(id) = identity.id.as_deref().filter(|s| !s.trim().is_empty()) { - let _ = writeln!(fields, "- id: {}", sanitize_identity_field(id)); - } - if fields.trim().is_empty() { - return Ok(String::new()); - } - - let mut out = String::from("## User\n\n"); - out.push_str( - "The signed-in user is identified below. Use these fields directly in tool \ - calls and do not ask the user to repeat them.\n\n", - ); - out.push_str(&fields); - Ok(out.trim_end().to_string()) - } -} - -/// Collapse newlines and runs of whitespace in a user-identity field so -/// it fits on a single markdown bullet without breaking the prompt -/// structure. Values come from `auth_get_me` (server-controlled), but -/// defence-in-depth: a name with embedded newlines could split the -/// `- name:` bullet and reshape the `## User` block. -fn sanitize_identity_field(s: &str) -> String { - s.chars() - .map(|c| if c == '\n' || c == '\r' { ' ' } else { c }) - .collect::() - .split_whitespace() - .collect::>() - .join(" ") -} - -// ───────────────────────────────────────────────────────────────────────────── -// Section helpers for function-driven prompts -// ───────────────────────────────────────────────────────────────────────────── -// -// Each of the `Section` unit structs above is also available as a free -// `render_*` function that takes the same `PromptContext` and returns -// the section body (or an empty string when the section's gate is -// closed). -// -// These exist so `agents//prompt.rs` builders can assemble their own -// final system prompt, composing the exact sections they care about in -// the order they want — no `SystemPromptBuilder` machinery required. - -/// Render the `## Project Context` identity block -/// (`SOUL.md` / `IDENTITY.md` / optionally `HEARTBEAT.md`). -pub fn render_identity(ctx: &PromptContext<'_>) -> Result { - IdentitySection.build(ctx) -} - -/// Render the `PROFILE.md` + `MEMORY.md` user-file injection. -/// Empty when neither `ctx.include_profile` nor `ctx.include_memory_md` -/// is set. -pub fn render_user_files(ctx: &PromptContext<'_>) -> Result { - UserFilesSection.build(ctx) -} - -/// Render the tree-summariser user-memory block. -pub fn render_user_memory(ctx: &PromptContext<'_>) -> Result { - UserMemorySection.build(ctx) -} - -/// Render the privileged `## User Reflections` block. Empty when the -/// learning subsystem has not captured any reflections yet. -pub fn render_user_reflections(ctx: &PromptContext<'_>) -> Result { - UserReflectionsSection.build(ctx) -} - -/// Render the `## Tools` catalogue in the dispatcher's tool-call format. -pub fn render_tools(ctx: &PromptContext<'_>) -> Result { - ToolsSection.build(ctx) -} - -/// Render the static `## Safety` block. -pub fn render_safety() -> String { - SafetySection - .build(&empty_prompt_context_for_static_sections()) - .expect("SafetySection::build is infallible") -} - -// `render_skills` and `render_connected_integrations` helpers are -// gone — `## Available Skills` lives in `integrations_agent/prompt.rs`, and -// the connected-integrations / delegation-guide blocks each live in -// their owning agent's `prompt.rs` so no branching-on-agent-id logic -// needs to exist here. - -/// Render the `## Workspace` block (working directory + file listing -/// bounds) — part of the dynamic, per-request suffix. -pub fn render_workspace(ctx: &PromptContext<'_>) -> Result { - WorkspaceSection.build(ctx) -} - -/// Render the `## Runtime` block (model name, dispatcher format) — -/// dynamic. -pub fn render_runtime(ctx: &PromptContext<'_>) -> Result { - RuntimeSection.build(ctx) -} - -/// Render the `## Current Date & Time` block. Intentionally **not** -/// included in byte-stable sub-agent prompts (`for_subagent`) because -/// injecting `Local::now()` defeats prefix caching. Exposed so full- -/// assembly main-agent builders can opt in. -pub fn render_datetime(ctx: &PromptContext<'_>) -> Result { - DateTimeSection.build(ctx) -} - -/// Render the `## User` identity block. Empty when -/// [`PromptContext::user_identity`] is unset or has no populated -/// fields. See issue #926. -pub fn render_user_identity(ctx: &PromptContext<'_>) -> Result { - UserIdentitySection.build(ctx) -} - -/// Compose the full ambient-environment block — runtime + user -/// identity + current date/time, in that order. -/// -/// Per-agent `prompt.rs` builders call this once near the end of their -/// assembly so every agent reports the same machine-readable view of -/// "where am I, who is the user, what time is it" (issue #926). -/// Datetime is appended last so the time-volatile section sits at the -/// tail of the prompt and the rest of the prefix stays cache-stable -/// across turns within the same minute, matching the convention used -/// by [`SystemPromptBuilder::with_defaults`]. -pub fn render_ambient_environment(ctx: &PromptContext<'_>) -> Result { - let mut out = String::with_capacity(512); - let runtime = render_runtime(ctx)?; - if !runtime.trim().is_empty() { - out.push_str(runtime.trim_end()); - out.push_str("\n\n"); - } - let user = render_user_identity(ctx)?; - if !user.trim().is_empty() { - out.push_str(user.trim_end()); - out.push_str("\n\n"); - } - let datetime = render_datetime(ctx)?; - if !datetime.trim().is_empty() { - out.push_str(datetime.trim_end()); - out.push('\n'); - } - Ok(out) -} - -/// Build a throwaway `PromptContext` for sections whose `build` only -/// uses static/immutable inputs (currently just `SafetySection`). Keeps -/// the `render_safety()` free function from forcing callers to -/// manufacture a full context when they only need the static text. -fn empty_prompt_context_for_static_sections() -> PromptContext<'static> { - static EMPTY_TOOLS: &[PromptTool<'static>] = &[]; - static EMPTY_SKILLS: &[Workflow] = &[]; - static EMPTY_INTEGRATIONS: &[ConnectedIntegration] = &[]; - // SAFETY: the &HashSet reference must outlive the returned context; - // a leaked OnceLock-style allocation gives us a permanent 'static - // anchor without adding runtime cost on the hot path. - static EMPTY_VISIBLE: OnceLock> = OnceLock::new(); - let visible = EMPTY_VISIBLE.get_or_init(std::collections::HashSet::new); - PromptContext { - workspace_dir: std::path::Path::new(""), - model_name: "", - agent_id: "", - tools: EMPTY_TOOLS, - skills: EMPTY_SKILLS, - dispatcher_instructions: "", - learned: LearnedContextData::default(), - visible_tool_names: visible, - tool_call_format: ToolCallFormat::PFormat, - connected_integrations: EMPTY_INTEGRATIONS, - connected_identities_md: String::new(), - include_profile: false, - include_memory_md: false, - curated_snapshot: None, - user_identity: None, - personality_soul_md: None, - personality_memory_md: None, - personality_roster: vec![], - } -} - -/// Render a narrow, KV-cache-stable system prompt for a typed sub-agent. -/// -/// This is a purpose-built alternative to -/// [`SystemPromptBuilder::for_subagent`] for call sites that only have -/// indices into the parent's `&[Box]` vec (so they can't -/// cheaply build a filtered owning slice for `ToolsSection`). The -/// output mirrors what `for_subagent` would emit with the matching -/// `omit_*` flags, plus a sub-agent-specific calling-convention -/// preamble and a model-only runtime banner. -/// -/// `archetype_body` is the already-loaded archetype markdown — for -/// `PromptSource::Inline` this is the inline string, for -/// `PromptSource::File` this is the file contents loaded by the caller. -/// Callers resolve the source exactly once and hand the body in, so -/// this renderer works uniformly for both definition shapes. -/// -/// `options` carries the per-definition rendering flags (safety, etc.) -/// inverted into positive-sense `include_*` form. -/// [`SubagentRenderOptions::narrow`] preserves the historical behaviour. -/// -/// # KV cache stability -/// -/// The rendered bytes MUST be a pure function of: -/// - the `archetype_body` (archetype role prompt) -/// - the filtered tool set (names, descriptions, schemas) -/// - the workspace directory -/// - the resolved model name -/// - the `options` (all static per definition) -/// -/// Anything that varies across invocations at the *same* call site -/// (e.g. `chrono::Local::now()`, hostnames, pids, turn counters) is -/// forbidden here. Repeat spawns of the same sub-agent within a session -/// must produce byte-identical system prompts so the inference -/// backend's automatic prefix caching can reuse the prefill from the -/// previous run. Time-of-day information, if a sub-agent needs it, -/// belongs in the user message — not the system prompt. -pub fn render_subagent_system_prompt( - workspace_dir: &Path, - model_name: &str, - allowed_indices: &[usize], - parent_tools: &[Box], - extra_tools: &[Box], - archetype_body: &str, - options: SubagentRenderOptions, - tool_call_format: ToolCallFormat, - connected_integrations: &[ConnectedIntegration], -) -> String { - render_subagent_system_prompt_with_format( - workspace_dir, - model_name, - allowed_indices, - parent_tools, - extra_tools, - archetype_body, - options, - tool_call_format, - connected_integrations, - ) -} - -/// Inner renderer that accepts an explicit [`ToolCallFormat`] so callers -/// that know the active dispatcher format can thread it through. The -/// public [`render_subagent_system_prompt`] defaults to PFormat for -/// backwards compatibility. -pub fn render_subagent_system_prompt_with_format( - workspace_dir: &Path, - model_name: &str, - allowed_indices: &[usize], - parent_tools: &[Box], - extra_tools: &[Box], - archetype_body: &str, - options: SubagentRenderOptions, - tool_call_format: ToolCallFormat, - _connected_integrations: &[ConnectedIntegration], -) -> String { - let mut out = String::new(); - - // 1. Archetype role prompt. Works for `PromptSource::Inline`, - // `PromptSource::File`, and `PromptSource::Dynamic` because the - // caller preloaded the body via `load_prompt_source`. - let trimmed = archetype_body.trim(); - if !trimmed.is_empty() { - out.push_str(trimmed); - out.push_str("\n\n"); - } - - // 1b. Optional identity block. Off by default; turned on when the - // definition sets `omit_identity = false`. Renders the same - // OpenClaw bootstrap files the main agent loads, keeping the - // byte layout stable across repeat spawns of the same - // definition within a session. - if options.include_identity { - out.push_str("## Project Context\n\n"); - out.push_str( - "The following workspace files define your identity, behavior, and context.\n\n", - ); - for file in &["SOUL.md", "IDENTITY.md"] { - inject_workspace_file(&mut out, workspace_dir, file); - } - } - - // 1c. PROFILE.md (onboarding enrichment output) and MEMORY.md - // (archivist-curated long-term memory). Each is gated on its own - // flag and capped at `USER_FILE_MAX_CHARS` (~1000 tokens) so a - // growing on-disk file can't push the system prompt out of the - // cache-friendly prefix range. - // - // KV-cache contract: once these files land in a session's - // rendered prompt the bytes are frozen for the remainder of that - // session. Do not re-read them mid-turn — a byte change breaks - // the backend's automatic prefix cache. Mid-session writes to - // either file are intentionally only visible on the NEXT session. - if options.include_profile { - inject_workspace_file_capped(&mut out, workspace_dir, "PROFILE.md", USER_FILE_MAX_CHARS); - } - if options.include_memory_md { - inject_workspace_file_capped(&mut out, workspace_dir, "MEMORY.md", USER_FILE_MAX_CHARS); - } - - // 2. Filtered tool catalogue. Indices are taken in ascending order - // from `allowed_indices`, which itself preserves `parent_tools` - // order, so the rendering is deterministic. We use `.get(i)` - // defensively even though the current caller (subagent_runner) - // only produces in-range indices — a future caller that derives - // indices from a different source must not be able to panic this - // renderer with a stale index. - // - // Rendering uses the caller-specified `tool_call_format` so - // sub-agents and the main dispatcher stay in lockstep. - // Tool catalogue rendering is dispatcher-format-aware: - // - // - **Native**: The provider receives full tool schemas through - // the request body's `tools` field (via `filtered_specs` in the - // sub-agent runner) and emits structured `tool_calls`. Listing - // the same tools again as prose in the system prompt is pure - // duplication — for a integrations_agent spawn with 62 dynamic gmail - // tools, that duplication added ~54k tokens and blew past the - // model's context window. We skip the prose `## Tools` section - // entirely in this mode. - // - // - **PFormat / Json**: Both are prompt-driven formats — the - // model discovers tools by reading the prose `## Tools` section - // and emits text-wrapped tool calls (`name[a|b]` - // for PFormat, `{"name":...}` for Json). - // Neither uses the native `tools` request field, so we MUST - // list each tool in prose — including dynamically-registered - // `extra_tools` — or the model has no way to know they exist. - if !matches!(tool_call_format, ToolCallFormat::Native) { - out.push_str("## Tools\n\n"); - let render_one = |out: &mut String, tool: &dyn Tool| match tool_call_format { - ToolCallFormat::PFormat => { - let sig = render_pformat_signature_for_box_tool(tool); - let _ = writeln!( - out, - "- **{}**: {}\n Call as: `{}`", - tool.name(), - tool.description(), - sig - ); - } - ToolCallFormat::Json => { - let _ = writeln!( - out, - "- **{}**: {}\n Parameters: `{}`", - tool.name(), - tool.description(), - tool.parameters_schema() - ); - } - ToolCallFormat::Native => { - // Unreachable — outer guard skips Native entirely. - } - }; - for &i in allowed_indices { - let Some(tool) = parent_tools.get(i) else { - tracing::warn!( - index = i, - tool_count = parent_tools.len(), - "[context::prompt] dropping out-of-range tool index in subagent render" - ); - continue; - }; - render_one(&mut out, tool.as_ref()); - } - for tool in extra_tools { - render_one(&mut out, tool.as_ref()); - } - } - - // 3. Sub-agent calling-convention preamble — format-aware. - // Sub-agents need the same call format the main dispatcher expects - // so their output parses correctly. - out.push('\n'); - match tool_call_format { - ToolCallFormat::PFormat => { - out.push_str( - "## Tool Use Protocol\n\n\ - Tool calls use **P-Format**: compact, positional, pipe-delimited syntax \ - wrapped in `` tags.\n\n\ - ```\n\ntool_name[arg1|arg2]\n\n```\n\n\ - Arguments are positional — match the order shown in each tool's `Call as:` \ - signature above (alphabetical by parameter name). \ - Escape `|` as `\\|`, `]` as `\\]` inside values. \ - You may emit multiple `` blocks per response.\n\n\ - Use the provided tools to accomplish the task. Reply with a concise, dense \ - final answer when you have one — the parent agent will weave it back into the \ - user-visible response.\n\n", - ); - } - ToolCallFormat::Json => { - out.push_str( - "## Tool Use Protocol\n\n\ - To use a tool, wrap a JSON object in `` tags:\n\n\ - ```\n\n{\"name\": \"tool_name\", \"arguments\": {\"param\": \"value\"}}\n\n```\n\n\ - You may emit multiple `` blocks in a single response.\n\n\ - Use the provided tools to accomplish the task. Reply with a concise, dense \ - final answer when you have one — the parent agent will weave it back into the \ - user-visible response.\n\n", - ); - } - ToolCallFormat::Native => { - out.push_str( - "Use the provided tools via the model's native tool-calling output. \ - Reply with a concise, dense final answer when you have one — the parent \ - agent will weave it back into the user-visible response.\n\n", - ); - } - } - - // 3b. Optional safety preamble. Definitions that do work with real - // side-effects (code_executor, tool_maker, integrations_agent) set - // `omit_safety_preamble = false` so the narrow renderer used to - // silently drop that instruction — we now honour the flag. - // Byte-identical to `SafetySection::build`. - if options.include_safety_preamble { - out.push_str( - "## Safety\n\n- Do not exfiltrate private data.\n- Do not run destructive commands without asking.\n- Do not bypass oversight or approval mechanisms.\n- Prefer `trash` over `rm`.\n- When in doubt, ask before acting externally.\n\n", - ); - } - - // 3c/3d. `## Available Skills` and `## Connected Integrations` - // are no longer emitted here. Each agent that needs them - // renders its own block in its `prompt.rs` (integrations_agent - // owns the executor voice, orchestrator/welcome own the - // delegator voice). Legacy Inline/File-sourced TOML agents - // that still route through this helper simply don't get - // either block — which matches the fact that none of them - // currently opt in. - - // 4. Workspace so the model knows where it is. Intentionally stable: - // no datetime, no hostname, no pid — see the KV-cache note above. - let _ = writeln!( - out, - "## Workspace\n\nWorking directory: `{}`\n", - workspace_dir.display() - ); - - // 6. Runtime banner — model name only. Stable for the lifetime of - // this sub-agent's definition. - let _ = writeln!(out, "## Runtime\n\nModel: {model_name}"); - out.push('\n'); - out.push_str(GLOBAL_STYLE_SUFFIX); - - out -} - -/// Ensure the workspace file is up-to-date with the compiled-in default. -/// -/// On first install the file doesn't exist → write it. On subsequent runs -/// we store a hash of the compiled-in content in a sidecar file -/// (`.{filename}.builtin-hash`). If the hash changes (code was updated), -/// the disk file is overwritten so prompt improvements ship automatically. -/// User edits between code releases are preserved — we only overwrite when -/// the built-in default itself changes. -fn sync_workspace_file(workspace_dir: &Path, filename: &str) { - let default_content = default_workspace_file_content(filename); - if default_content.is_empty() { - return; - } - - let path = workspace_dir.join(filename); - let hash_path = workspace_dir.join(format!(".{filename}.builtin-hash")); - - // Compute a simple hash of the current compiled-in content. - let current_hash = { - let mut hasher = std::collections::hash_map::DefaultHasher::new(); - default_content.hash(&mut hasher); - format!("{:016x}", hasher.finish()) - }; - - // Read the last-written hash (if any). - let stored_hash = std::fs::read_to_string(&hash_path).unwrap_or_default(); - let stored_hash = stored_hash.trim(); - - if stored_hash == current_hash && path.exists() { - // Built-in hasn't changed and file exists — nothing to do. - return; - } - - // Decide whether to overwrite the existing file. Two safe cases: - // 1. File doesn't exist yet — first install, write the default. - // 2. File exists AND its current hash matches the stored builtin - // hash — the user hasn't edited it since we last wrote it, so - // it's safe to ship the new default. - // Otherwise the file has been hand-edited between releases; leave - // the user's version in place and just update the stored hash so we - // stop re-comparing against the old default on every boot. - let file_exists = path.exists(); - let user_unmodified = if file_exists { - match std::fs::read_to_string(&path) { - Ok(disk) => { - let mut hasher = std::collections::hash_map::DefaultHasher::new(); - disk.hash(&mut hasher); - let disk_hash = format!("{:016x}", hasher.finish()); - disk_hash == stored_hash - } - Err(_) => false, - } - } else { - false - }; - - if let Some(parent) = path.parent() { - let _ = std::fs::create_dir_all(parent); - } - - if !file_exists || user_unmodified { - if let Err(e) = std::fs::write(&path, default_content) { - log::warn!("[agent:prompt] failed to write workspace file {filename}: {e}"); - return; - } - log::info!("[agent:prompt] updated workspace file {filename} (builtin content changed)"); - } else { - log::info!( - "[agent:prompt] keeping user-edited workspace file {filename} (builtin changed but disk contents diverge)" - ); - } - let _ = std::fs::write(&hash_path, ¤t_hash); -} - -/// Inject `filename` from `workspace_dir` into `prompt`, truncated to -/// [`BOOTSTRAP_MAX_CHARS`]. Thin wrapper around -/// [`inject_workspace_file_capped`] for bootstrap-class files -/// (`SOUL.md`, `IDENTITY.md`, `HEARTBEAT.md`). -fn inject_workspace_file(prompt: &mut String, workspace_dir: &Path, filename: &str) { - inject_workspace_file_capped(prompt, workspace_dir, filename, BOOTSTRAP_MAX_CHARS); -} - -/// Inject pre-loaded string content into `prompt` under a `### label` heading, -/// capped at `max_chars`. Mirrors the format of [`inject_snapshot_content`] -/// and [`inject_workspace_file_capped`] but takes a `&str` instead of a file -/// path. Used for personality-specific overrides (`personality_soul_md`, -/// `personality_memory_md`) on [`PromptContext`] so a swap from the file-based -/// loader to an inline override is byte-compatible with the workspace-file path. -/// -/// Empty/whitespace content is silently skipped. -fn inject_inline_content(prompt: &mut String, label: &str, content: &str, max_chars: usize) { - let trimmed = content.trim(); - if trimmed.is_empty() { - return; - } - let _ = writeln!(prompt, "### {label}\n"); - let truncated = if trimmed.chars().count() > max_chars { - trimmed - .char_indices() - .nth(max_chars) - .map(|(idx, _)| &trimmed[..idx]) - .unwrap_or(trimmed) - } else { - trimmed - }; - prompt.push_str(truncated); - if truncated.len() < trimmed.len() { - let _ = writeln!( - prompt, - "\n\n[... truncated at {max_chars} chars — use `read` for full file]\n" - ); - } else { - prompt.push_str("\n\n"); - } -} - -/// for the output header and truncation semantics. -/// -/// Empty/whitespace content is silently skipped, mirroring the file -/// loader's "no noisy placeholder" behaviour. -fn inject_snapshot_content(prompt: &mut String, label: &str, content: &str, max_chars: usize) { - let trimmed = content.trim(); - if trimmed.is_empty() { - return; - } - let _ = writeln!(prompt, "### {label}\n"); - let truncated = if trimmed.chars().count() > max_chars { - trimmed - .char_indices() - .nth(max_chars) - .map(|(idx, _)| &trimmed[..idx]) - .unwrap_or(trimmed) - } else { - trimmed - }; - prompt.push_str(truncated); - if truncated.len() < trimmed.len() { - let _ = writeln!( - prompt, - "\n\n[... truncated at {max_chars} chars — use `read` for full file]\n" - ); - } else { - prompt.push_str("\n\n"); - } -} - -/// Inject `filename` into `prompt` with an explicit character budget. -/// -/// Used directly by callers that want a tighter cap than -/// [`BOOTSTRAP_MAX_CHARS`] — notably `PROFILE.md` and `MEMORY.md` which -/// are user-specific, potentially growing, and do not warrant a full -/// 20K-char budget (see [`USER_FILE_MAX_CHARS`]). -/// -/// Missing / empty files are silently skipped so callers can inject -/// optional files unconditionally without emitting a noisy placeholder. -/// -/// **KV-cache contract:** the output is a pure function of `filename`, -/// file bytes at call time, and `max_chars`. Callers must invoke this -/// once per session — re-reading mid-session breaks the inference -/// backend's automatic prefix cache. See the byte-stability note on -/// [`render_subagent_system_prompt`]. -fn inject_workspace_file_capped( - prompt: &mut String, - workspace_dir: &Path, - filename: &str, - max_chars: usize, -) { - let path = workspace_dir.join(filename); - - match std::fs::read_to_string(&path) { - Ok(content) => { - let trimmed = content.trim(); - if trimmed.is_empty() { - return; - } - let _ = writeln!(prompt, "### {filename}\n"); - let truncated = if trimmed.chars().count() > max_chars { - trimmed - .char_indices() - .nth(max_chars) - .map(|(idx, _)| &trimmed[..idx]) - .unwrap_or(trimmed) - } else { - trimmed - }; - prompt.push_str(truncated); - if truncated.len() < trimmed.len() { - let _ = writeln!( - prompt, - "\n\n[... truncated at {max_chars} chars — use `read` for full file]\n" - ); - } else { - prompt.push_str("\n\n"); - } - } - Err(e) => match e.kind() { - std::io::ErrorKind::NotFound => { - // Keep prompt focused: missing optional identity/bootstrap files should not - // add noisy placeholders that dilute tool-calling instructions. - } - _ => { - log::debug!("[prompt] failed to read {}: {e}", path.display()); - } - }, - } -} - -fn default_workspace_file_content(filename: &str) -> &'static str { - // The bundled identity files live at `src/openhuman/agent/prompts/` - // (owned by the `agent/` tree because they describe agent identity). - // This module is under `src/openhuman/context/`, so the relative path - // walks up one level and back into `agent/prompts/`. - match filename { - "SOUL.md" => include_str!("SOUL.md"), - "IDENTITY.md" => include_str!("IDENTITY.md"), - "HEARTBEAT.md" => { - "# Periodic Tasks\n\n# Add tasks below (one per line, starting with `- `)\n" - } - _ => "", - } -} +pub mod builder; +pub use builder::{SystemPromptBuilder, GLOBAL_STYLE_SUFFIX}; + +pub mod sections; +pub use sections::*; + +pub mod render_helpers; +pub use render_helpers::{ + default_workspace_file_content, inject_inline_content, inject_snapshot_content, + inject_workspace_file, inject_workspace_file_capped, memory_date_label, + render_ambient_environment, render_datetime, render_identity, render_runtime, render_safety, + render_subagent_system_prompt, render_subagent_system_prompt_with_format, render_tools, + render_user_files, render_user_identity, render_user_memory, render_user_reflections, + render_workspace, sync_workspace_file, +}; #[cfg(test)] #[path = "mod_tests.rs"] diff --git a/src/openhuman/agent/prompts/mod_tests.rs b/src/openhuman/agent/prompts/mod_tests.rs index cae7b5575b..aeec8f7c29 100644 --- a/src/openhuman/agent/prompts/mod_tests.rs +++ b/src/openhuman/agent/prompts/mod_tests.rs @@ -2,6 +2,7 @@ use super::*; use crate::openhuman::tools::traits::Tool; use async_trait::async_trait; use std::collections::HashSet; +use std::path::Path; use std::sync::LazyLock; static NO_FILTER: LazyLock> = LazyLock::new(HashSet::new); diff --git a/src/openhuman/agent/prompts/render_helpers.rs b/src/openhuman/agent/prompts/render_helpers.rs new file mode 100644 index 0000000000..36bd8be27a --- /dev/null +++ b/src/openhuman/agent/prompts/render_helpers.rs @@ -0,0 +1,688 @@ +//! Free `render_*` functions, sub-agent prompt renderer, and workspace-file +//! I/O helpers. +//! +//! The `render_*` family provides a functional interface over the section +//! structs in [`super::sections`] — `agents//prompt.rs` builders call +//! these to assemble their own final system prompt without needing the full +//! [`super::builder::SystemPromptBuilder`] machinery. + +use super::builder::GLOBAL_STYLE_SUFFIX; +use super::sections::*; +use super::types::*; +use anyhow::Result; +use chrono::{DateTime, Utc}; +use std::fmt::Write; +use std::hash::{Hash, Hasher}; +use std::path::Path; +use std::sync::OnceLock; + +// ───────────────────────────────────────────────────────────────────────────── +// Section render helpers (functional wrappers over section structs) +// ───────────────────────────────────────────────────────────────────────────── + +/// Render the `## Project Context` identity block +/// (`SOUL.md` / `IDENTITY.md` / optionally `HEARTBEAT.md`). +pub fn render_identity(ctx: &PromptContext<'_>) -> Result { + IdentitySection.build(ctx) +} + +/// Render the `PROFILE.md` + `MEMORY.md` user-file injection. +/// Empty when neither `ctx.include_profile` nor `ctx.include_memory_md` +/// is set. +pub fn render_user_files(ctx: &PromptContext<'_>) -> Result { + UserFilesSection.build(ctx) +} + +/// Render the tree-summariser user-memory block. +pub fn render_user_memory(ctx: &PromptContext<'_>) -> Result { + UserMemorySection.build(ctx) +} + +/// Render the privileged `## User Reflections` block. Empty when the +/// learning subsystem has not captured any reflections yet. +pub fn render_user_reflections(ctx: &PromptContext<'_>) -> Result { + UserReflectionsSection.build(ctx) +} + +/// Render the `## Tools` catalogue in the dispatcher's tool-call format. +pub fn render_tools(ctx: &PromptContext<'_>) -> Result { + ToolsSection.build(ctx) +} + +/// Render the static `## Safety` block. +pub fn render_safety() -> String { + SafetySection + .build(&empty_prompt_context_for_static_sections()) + .expect("SafetySection::build is infallible") +} + +// `render_skills` and `render_connected_integrations` helpers are +// gone — `## Available Skills` lives in `integrations_agent/prompt.rs`, and +// the connected-integrations / delegation-guide blocks each live in +// their owning agent's `prompt.rs` so no branching-on-agent-id logic +// needs to exist here. + +/// Render the `## Workspace` block (working directory + file listing +/// bounds) — part of the dynamic, per-request suffix. +pub fn render_workspace(ctx: &PromptContext<'_>) -> Result { + WorkspaceSection.build(ctx) +} + +/// Render the `## Runtime` block (model name, dispatcher format) — +/// dynamic. +pub fn render_runtime(ctx: &PromptContext<'_>) -> Result { + RuntimeSection.build(ctx) +} + +/// Render the `## Current Date & Time` block. Intentionally **not** +/// included in byte-stable sub-agent prompts (`for_subagent`) because +/// injecting `Local::now()` defeats prefix caching. Exposed so full- +/// assembly main-agent builders can opt in. +pub fn render_datetime(ctx: &PromptContext<'_>) -> Result { + DateTimeSection.build(ctx) +} + +/// Render the `## User` identity block. Empty when +/// [`PromptContext::user_identity`] is unset or has no populated +/// fields. See issue #926. +pub fn render_user_identity(ctx: &PromptContext<'_>) -> Result { + UserIdentitySection.build(ctx) +} + +/// Compose the full ambient-environment block — runtime + user +/// identity + current date/time, in that order. +/// +/// Per-agent `prompt.rs` builders call this once near the end of their +/// assembly so every agent reports the same machine-readable view of +/// "where am I, who is the user, what time is it" (issue #926). +/// Datetime is appended last so the time-volatile section sits at the +/// tail of the prompt and the rest of the prefix stays cache-stable +/// across turns within the same minute, matching the convention used +/// by [`super::builder::SystemPromptBuilder::with_defaults`]. +pub fn render_ambient_environment(ctx: &PromptContext<'_>) -> Result { + let mut out = String::with_capacity(512); + let runtime = render_runtime(ctx)?; + if !runtime.trim().is_empty() { + out.push_str(runtime.trim_end()); + out.push_str("\n\n"); + } + let user = render_user_identity(ctx)?; + if !user.trim().is_empty() { + out.push_str(user.trim_end()); + out.push_str("\n\n"); + } + let datetime = render_datetime(ctx)?; + if !datetime.trim().is_empty() { + out.push_str(datetime.trim_end()); + out.push('\n'); + } + Ok(out) +} + +/// Format a memory item's `updated_at` as an absolute UTC date label +/// for prompt injection, e.g. `2026-05-25`. +/// +/// Absolute (not relative "N days ago") on purpose: memory sections sit +/// near the front of the KV-cache-stable system prompt, so a label that +/// changes daily would bust the cached prefix for everything after it. +/// An absolute date only changes when the underlying memory does. The +/// model judges staleness by comparing this against the injected current +/// date. Shared by [`UserMemorySection`] and the working-memory block in +/// `agent::memory_loader`. (#2944) +pub fn memory_date_label(updated_at: DateTime) -> String { + updated_at.format("%Y-%m-%d").to_string() +} + +// ───────────────────────────────────────────────────────────────────────────── +// Sub-agent prompt renderer +// ───────────────────────────────────────────────────────────────────────────── + +/// Render a narrow, KV-cache-stable system prompt for a typed sub-agent. +/// +/// This is a purpose-built alternative to +/// [`super::builder::SystemPromptBuilder::for_subagent`] for call sites +/// that only have indices into the parent's `&[Box]` vec (so they +/// can't cheaply build a filtered owning slice for `ToolsSection`). The +/// output mirrors what `for_subagent` would emit with the matching +/// `omit_*` flags, plus a sub-agent-specific calling-convention +/// preamble and a model-only runtime banner. +/// +/// `archetype_body` is the already-loaded archetype markdown — for +/// `PromptSource::Inline` this is the inline string, for +/// `PromptSource::File` this is the file contents loaded by the caller. +/// Callers resolve the source exactly once and hand the body in, so +/// this renderer works uniformly for both definition shapes. +/// +/// `options` carries the per-definition rendering flags (safety, etc.) +/// inverted into positive-sense `include_*` form. +/// [`SubagentRenderOptions::narrow`] preserves the historical behaviour. +/// +/// # KV cache stability +/// +/// The rendered bytes MUST be a pure function of: +/// - the `archetype_body` (archetype role prompt) +/// - the filtered tool set (names, descriptions, schemas) +/// - the workspace directory +/// - the resolved model name +/// - the `options` (all static per definition) +/// +/// Anything that varies across invocations at the *same* call site +/// (e.g. `chrono::Local::now()`, hostnames, pids, turn counters) is +/// forbidden here. Repeat spawns of the same sub-agent within a session +/// must produce byte-identical system prompts so the inference +/// backend's automatic prefix caching can reuse the prefill from the +/// previous run. Time-of-day information, if a sub-agent needs it, +/// belongs in the user message — not the system prompt. +pub fn render_subagent_system_prompt( + workspace_dir: &Path, + model_name: &str, + allowed_indices: &[usize], + parent_tools: &[Box], + extra_tools: &[Box], + archetype_body: &str, + options: SubagentRenderOptions, + tool_call_format: ToolCallFormat, + connected_integrations: &[ConnectedIntegration], +) -> String { + render_subagent_system_prompt_with_format( + workspace_dir, + model_name, + allowed_indices, + parent_tools, + extra_tools, + archetype_body, + options, + tool_call_format, + connected_integrations, + ) +} + +/// Inner renderer that accepts an explicit [`ToolCallFormat`] so callers +/// that know the active dispatcher format can thread it through. The +/// public [`render_subagent_system_prompt`] defaults to PFormat for +/// backwards compatibility. +pub fn render_subagent_system_prompt_with_format( + workspace_dir: &Path, + model_name: &str, + allowed_indices: &[usize], + parent_tools: &[Box], + extra_tools: &[Box], + archetype_body: &str, + options: SubagentRenderOptions, + tool_call_format: ToolCallFormat, + _connected_integrations: &[ConnectedIntegration], +) -> String { + let mut out = String::new(); + + // 1. Archetype role prompt. Works for `PromptSource::Inline`, + // `PromptSource::File`, and `PromptSource::Dynamic` because the + // caller preloaded the body via `load_prompt_source`. + let trimmed = archetype_body.trim(); + if !trimmed.is_empty() { + out.push_str(trimmed); + out.push_str("\n\n"); + } + + // 1b. Optional identity block. Off by default; turned on when the + // definition sets `omit_identity = false`. Renders the same + // OpenClaw bootstrap files the main agent loads, keeping the + // byte layout stable across repeat spawns of the same + // definition within a session. + if options.include_identity { + out.push_str("## Project Context\n\n"); + out.push_str( + "The following workspace files define your identity, behavior, and context.\n\n", + ); + for file in &["SOUL.md", "IDENTITY.md"] { + inject_workspace_file(&mut out, workspace_dir, file); + } + } + + // 1c. PROFILE.md (onboarding enrichment output) and MEMORY.md + // (archivist-curated long-term memory). Each is gated on its own + // flag and capped at `USER_FILE_MAX_CHARS` (~1000 tokens) so a + // growing on-disk file can't push the system prompt out of the + // cache-friendly prefix range. + // + // KV-cache contract: once these files land in a session's + // rendered prompt the bytes are frozen for the remainder of that + // session. Do not re-read them mid-turn — a byte change breaks + // the backend's automatic prefix cache. Mid-session writes to + // either file are intentionally only visible on the NEXT session. + if options.include_profile { + inject_workspace_file_capped(&mut out, workspace_dir, "PROFILE.md", USER_FILE_MAX_CHARS); + } + if options.include_memory_md { + inject_workspace_file_capped(&mut out, workspace_dir, "MEMORY.md", USER_FILE_MAX_CHARS); + } + + // 2. Filtered tool catalogue. Indices are taken in ascending order + // from `allowed_indices`, which itself preserves `parent_tools` + // order, so the rendering is deterministic. We use `.get(i)` + // defensively even though the current caller (subagent_runner) + // only produces in-range indices — a future caller that derives + // indices from a different source must not be able to panic this + // renderer with a stale index. + // + // Rendering uses the caller-specified `tool_call_format` so + // sub-agents and the main dispatcher stay in lockstep. + // Tool catalogue rendering is dispatcher-format-aware: + // + // - **Native**: The provider receives full tool schemas through + // the request body's `tools` field (via `filtered_specs` in the + // sub-agent runner) and emits structured `tool_calls`. Listing + // the same tools again as prose in the system prompt is pure + // duplication — for a integrations_agent spawn with 62 dynamic gmail + // tools, that duplication added ~54k tokens and blew past the + // model's context window. We skip the prose `## Tools` section + // entirely in this mode. + // + // - **PFormat / Json**: Both are prompt-driven formats — the + // model discovers tools by reading the prose `## Tools` section + // and emits text-wrapped tool calls (`name[a|b]` + // for PFormat, `{"name":...}` for Json). + // Neither uses the native `tools` request field, so we MUST + // list each tool in prose — including dynamically-registered + // `extra_tools` — or the model has no way to know they exist. + if !matches!(tool_call_format, ToolCallFormat::Native) { + out.push_str("## Tools\n\n"); + let render_one = + |out: &mut String, tool: &dyn crate::openhuman::tools::Tool| match tool_call_format { + ToolCallFormat::PFormat => { + let sig = render_pformat_signature_for_box_tool(tool); + let _ = writeln!( + out, + "- **{}**: {}\n Call as: `{}`", + tool.name(), + tool.description(), + sig + ); + } + ToolCallFormat::Json => { + let _ = writeln!( + out, + "- **{}**: {}\n Parameters: `{}`", + tool.name(), + tool.description(), + tool.parameters_schema() + ); + } + ToolCallFormat::Native => { + // Unreachable — outer guard skips Native entirely. + } + }; + for &i in allowed_indices { + let Some(tool) = parent_tools.get(i) else { + tracing::warn!( + index = i, + tool_count = parent_tools.len(), + "[context::prompt] dropping out-of-range tool index in subagent render" + ); + continue; + }; + render_one(&mut out, tool.as_ref()); + } + for tool in extra_tools { + render_one(&mut out, tool.as_ref()); + } + } + + // 3. Sub-agent calling-convention preamble — format-aware. + // Sub-agents need the same call format the main dispatcher expects + // so their output parses correctly. + out.push('\n'); + match tool_call_format { + ToolCallFormat::PFormat => { + out.push_str( + "## Tool Use Protocol\n\n\ + Tool calls use **P-Format**: compact, positional, pipe-delimited syntax \ + wrapped in `` tags.\n\n\ + ```\n\ntool_name[arg1|arg2]\n\n```\n\n\ + Arguments are positional — match the order shown in each tool's `Call as:` \ + signature above (alphabetical by parameter name). \ + Escape `|` as `\\|`, `]` as `\\]` inside values. \ + You may emit multiple `` blocks per response.\n\n\ + Use the provided tools to accomplish the task. Reply with a concise, dense \ + final answer when you have one — the parent agent will weave it back into the \ + user-visible response.\n\n", + ); + } + ToolCallFormat::Json => { + out.push_str( + "## Tool Use Protocol\n\n\ + To use a tool, wrap a JSON object in `` tags:\n\n\ + ```\n\n{\"name\": \"tool_name\", \"arguments\": {\"param\": \"value\"}}\n\n```\n\n\ + You may emit multiple `` blocks in a single response.\n\n\ + Use the provided tools to accomplish the task. Reply with a concise, dense \ + final answer when you have one — the parent agent will weave it back into the \ + user-visible response.\n\n", + ); + } + ToolCallFormat::Native => { + out.push_str( + "Use the provided tools via the model's native tool-calling output. \ + Reply with a concise, dense final answer when you have one — the parent \ + agent will weave it back into the user-visible response.\n\n", + ); + } + } + + // 3b. Optional safety preamble. Definitions that do work with real + // side-effects (code_executor, tool_maker, integrations_agent) set + // `omit_safety_preamble = false` so the narrow renderer used to + // silently drop that instruction — we now honour the flag. + // Byte-identical to `SafetySection::build`. + if options.include_safety_preamble { + out.push_str( + "## Safety\n\n- Do not exfiltrate private data.\n- Do not run destructive commands without asking.\n- Do not bypass oversight or approval mechanisms.\n- Prefer `trash` over `rm`.\n- When in doubt, ask before acting externally.\n\n", + ); + } + + // 3c/3d. `## Available Skills` and `## Connected Integrations` + // are no longer emitted here. Each agent that needs them + // renders its own block in its `prompt.rs` (integrations_agent + // owns the executor voice, orchestrator/welcome own the + // delegator voice). Legacy Inline/File-sourced TOML agents + // that still route through this helper simply don't get + // either block — which matches the fact that none of them + // currently opt in. + + // 4. Workspace so the model knows where it is. Intentionally stable: + // no datetime, no hostname, no pid — see the KV-cache note above. + let _ = writeln!( + out, + "## Workspace\n\nWorking directory: `{}`\n", + workspace_dir.display() + ); + + // 6. Runtime banner — model name only. Stable for the lifetime of + // this sub-agent's definition. + let _ = writeln!(out, "## Runtime\n\nModel: {model_name}"); + out.push('\n'); + out.push_str(GLOBAL_STYLE_SUFFIX); + + out +} + +// ───────────────────────────────────────────────────────────────────────────── +// Workspace-file I/O helpers +// ───────────────────────────────────────────────────────────────────────────── + +/// Ensure the workspace file is up-to-date with the compiled-in default. +/// +/// On first install the file doesn't exist → write it. On subsequent runs +/// we store a hash of the compiled-in content in a sidecar file +/// (`.{filename}.builtin-hash`). If the hash changes (code was updated), +/// the disk file is overwritten so prompt improvements ship automatically. +/// User edits between code releases are preserved — we only overwrite when +/// the built-in default itself changes. +pub fn sync_workspace_file(workspace_dir: &Path, filename: &str) { + let default_content = default_workspace_file_content(filename); + if default_content.is_empty() { + return; + } + + let path = workspace_dir.join(filename); + let hash_path = workspace_dir.join(format!(".{filename}.builtin-hash")); + + // Compute a simple hash of the current compiled-in content. + let current_hash = { + let mut hasher = std::collections::hash_map::DefaultHasher::new(); + default_content.hash(&mut hasher); + format!("{:016x}", hasher.finish()) + }; + + // Read the last-written hash (if any). + let stored_hash = std::fs::read_to_string(&hash_path).unwrap_or_default(); + let stored_hash = stored_hash.trim(); + + if stored_hash == current_hash && path.exists() { + // Built-in hasn't changed and file exists — nothing to do. + return; + } + + // Decide whether to overwrite the existing file. Two safe cases: + // 1. File doesn't exist yet — first install, write the default. + // 2. File exists AND its current hash matches the stored builtin + // hash — the user hasn't edited it since we last wrote it, so + // it's safe to ship the new default. + // Otherwise the file has been hand-edited between releases; leave + // the user's version in place and just update the stored hash so we + // stop re-comparing against the old default on every boot. + let file_exists = path.exists(); + let user_unmodified = if file_exists { + match std::fs::read_to_string(&path) { + Ok(disk) => { + let mut hasher = std::collections::hash_map::DefaultHasher::new(); + disk.hash(&mut hasher); + let disk_hash = format!("{:016x}", hasher.finish()); + disk_hash == stored_hash + } + Err(_) => false, + } + } else { + false + }; + + if let Some(parent) = path.parent() { + let _ = std::fs::create_dir_all(parent); + } + + if !file_exists || user_unmodified { + if let Err(e) = std::fs::write(&path, default_content) { + log::warn!("[agent:prompt] failed to write workspace file {filename}: {e}"); + return; + } + log::info!("[agent:prompt] updated workspace file {filename} (builtin content changed)"); + } else { + log::info!( + "[agent:prompt] keeping user-edited workspace file {filename} (builtin changed but disk contents diverge)" + ); + } + let _ = std::fs::write(&hash_path, ¤t_hash); +} + +/// Inject `filename` from `workspace_dir` into `prompt`, truncated to +/// [`BOOTSTRAP_MAX_CHARS`]. Thin wrapper around +/// [`inject_workspace_file_capped`] for bootstrap-class files +/// (`SOUL.md`, `IDENTITY.md`, `HEARTBEAT.md`). +pub fn inject_workspace_file(prompt: &mut String, workspace_dir: &Path, filename: &str) { + inject_workspace_file_capped(prompt, workspace_dir, filename, BOOTSTRAP_MAX_CHARS); +} + +/// Inject pre-loaded string content into `prompt` under a `### label` heading, +/// capped at `max_chars`. Mirrors the format of [`inject_snapshot_content`] +/// and [`inject_workspace_file_capped`] but takes a `&str` instead of a file +/// path. Used for personality-specific overrides (`personality_soul_md`, +/// `personality_memory_md`) on [`PromptContext`] so a swap from the file-based +/// loader to an inline override is byte-compatible with the workspace-file path. +/// +/// Empty/whitespace content is silently skipped. +pub fn inject_inline_content(prompt: &mut String, label: &str, content: &str, max_chars: usize) { + let trimmed = content.trim(); + if trimmed.is_empty() { + return; + } + let _ = writeln!(prompt, "### {label}\n"); + let truncated = if trimmed.chars().count() > max_chars { + trimmed + .char_indices() + .nth(max_chars) + .map(|(idx, _)| &trimmed[..idx]) + .unwrap_or(trimmed) + } else { + trimmed + }; + prompt.push_str(truncated); + if truncated.len() < trimmed.len() { + let _ = writeln!( + prompt, + "\n\n[... truncated at {max_chars} chars — use `read` for full file]\n" + ); + } else { + prompt.push_str("\n\n"); + } +} + +/// for the output header and truncation semantics. +/// +/// Empty/whitespace content is silently skipped, mirroring the file +/// loader's "no noisy placeholder" behaviour. +pub fn inject_snapshot_content(prompt: &mut String, label: &str, content: &str, max_chars: usize) { + let trimmed = content.trim(); + if trimmed.is_empty() { + return; + } + let _ = writeln!(prompt, "### {label}\n"); + let truncated = if trimmed.chars().count() > max_chars { + trimmed + .char_indices() + .nth(max_chars) + .map(|(idx, _)| &trimmed[..idx]) + .unwrap_or(trimmed) + } else { + trimmed + }; + prompt.push_str(truncated); + if truncated.len() < trimmed.len() { + let _ = writeln!( + prompt, + "\n\n[... truncated at {max_chars} chars — use `read` for full file]\n" + ); + } else { + prompt.push_str("\n\n"); + } +} + +/// Inject `filename` into `prompt` with an explicit character budget. +/// +/// Used directly by callers that want a tighter cap than +/// [`BOOTSTRAP_MAX_CHARS`] — notably `PROFILE.md` and `MEMORY.md` which +/// are user-specific, potentially growing, and do not warrant a full +/// 20K-char budget (see [`USER_FILE_MAX_CHARS`]). +/// +/// Missing / empty files are silently skipped so callers can inject +/// optional files unconditionally without emitting a noisy placeholder. +/// +/// **KV-cache contract:** the output is a pure function of `filename`, +/// file bytes at call time, and `max_chars`. Callers must invoke this +/// once per session — re-reading mid-session breaks the inference +/// backend's automatic prefix cache. See the byte-stability note on +/// [`render_subagent_system_prompt`]. +pub fn inject_workspace_file_capped( + prompt: &mut String, + workspace_dir: &Path, + filename: &str, + max_chars: usize, +) { + let path = workspace_dir.join(filename); + + match std::fs::read_to_string(&path) { + Ok(content) => { + let trimmed = content.trim(); + if trimmed.is_empty() { + return; + } + let _ = writeln!(prompt, "### {filename}\n"); + let truncated = if trimmed.chars().count() > max_chars { + trimmed + .char_indices() + .nth(max_chars) + .map(|(idx, _)| &trimmed[..idx]) + .unwrap_or(trimmed) + } else { + trimmed + }; + prompt.push_str(truncated); + if truncated.len() < trimmed.len() { + let _ = writeln!( + prompt, + "\n\n[... truncated at {max_chars} chars — use `read` for full file]\n" + ); + } else { + prompt.push_str("\n\n"); + } + } + Err(e) => match e.kind() { + std::io::ErrorKind::NotFound => { + // Keep prompt focused: missing optional identity/bootstrap files should not + // add noisy placeholders that dilute tool-calling instructions. + } + _ => { + log::debug!("[prompt] failed to read {}: {e}", path.display()); + } + }, + } +} + +pub fn default_workspace_file_content(filename: &str) -> &'static str { + // The bundled identity files live at `src/openhuman/agent/prompts/` + // (owned by the `agent/` tree because they describe agent identity). + // This module is under `src/openhuman/context/`, so the relative path + // walks up one level and back into `agent/prompts/`. + match filename { + "SOUL.md" => include_str!("SOUL.md"), + "IDENTITY.md" => include_str!("IDENTITY.md"), + "HEARTBEAT.md" => { + "# Periodic Tasks\n\n# Add tasks below (one per line, starting with `- `)\n" + } + _ => "", + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// Internal helpers +// ───────────────────────────────────────────────────────────────────────────── + +/// Build a throwaway `PromptContext` for sections whose `build` only +/// uses static/immutable inputs (currently just `SafetySection`). Keeps +/// the `render_safety()` free function from forcing callers to +/// manufacture a full context when they only need the static text. +fn empty_prompt_context_for_static_sections() -> PromptContext<'static> { + static EMPTY_TOOLS: &[PromptTool<'static>] = &[]; + static EMPTY_SKILLS: &[crate::openhuman::workflows::Workflow] = &[]; + static EMPTY_INTEGRATIONS: &[ConnectedIntegration] = &[]; + // SAFETY: the &HashSet reference must outlive the returned context; + // a leaked OnceLock-style allocation gives us a permanent 'static + // anchor without adding runtime cost on the hot path. + static EMPTY_VISIBLE: OnceLock> = OnceLock::new(); + let visible = EMPTY_VISIBLE.get_or_init(std::collections::HashSet::new); + PromptContext { + workspace_dir: std::path::Path::new(""), + model_name: "", + agent_id: "", + tools: EMPTY_TOOLS, + skills: EMPTY_SKILLS, + dispatcher_instructions: "", + learned: LearnedContextData::default(), + visible_tool_names: visible, + tool_call_format: ToolCallFormat::PFormat, + connected_integrations: EMPTY_INTEGRATIONS, + connected_identities_md: String::new(), + include_profile: false, + include_memory_md: false, + curated_snapshot: None, + user_identity: None, + personality_soul_md: None, + personality_memory_md: None, + personality_roster: vec![], + } +} + +/// Build a P-Format signature line (`name[a|b|c]`) from a `&dyn Tool`. +/// Used by `render_subagent_system_prompt` which operates on `Box` +/// directly (no intermediate `PromptTool`). Mirrors the `PromptTool` variant +/// below — both BTreeMap-iterate the schema's `properties` in the same order. +fn render_pformat_signature_for_box_tool(tool: &dyn crate::openhuman::tools::Tool) -> String { + let schema = tool.parameters_schema(); + let names: Vec = schema + .get("properties") + .and_then(|p| p.as_object()) + .map(|m| m.keys().cloned().collect()) + .unwrap_or_default(); + if names.is_empty() { + format!("{}[]", tool.name()) + } else { + format!("{}[{}]", tool.name(), names.join("|")) + } +} diff --git a/src/openhuman/agent/prompts/sections.rs b/src/openhuman/agent/prompts/sections.rs new file mode 100644 index 0000000000..7a1341c487 --- /dev/null +++ b/src/openhuman/agent/prompts/sections.rs @@ -0,0 +1,614 @@ +//! Concrete [`PromptSection`] implementations. +//! +//! Each unit struct renders one logical block of the system prompt. +//! The rendering logic delegates to the free helpers in +//! [`super::render_helpers`] for workspace-file injection and +//! sub-agent plumbing. + +use super::render_helpers::{ + inject_inline_content, inject_snapshot_content, inject_workspace_file, + inject_workspace_file_capped, sync_workspace_file, +}; +use super::types::*; +use anyhow::Result; +use std::fmt::Write; + +// ───────────────────────────────────────────────────────────────────────────── +// Special sections (archetype, dynamic, reflection) +// ───────────────────────────────────────────────────────────────────────────── + +/// "Memory context" section for chat threads spawned from a subconscious +/// reflection (#623). Renders the resolved [`SourceChunk`]s that the +/// subconscious LLM cited when it produced the reflection — gives the +/// orchestrator the same memory context the reflection-LLM had, so the +/// user can drill into the observation without the orchestrator +/// hallucinating details it never saw. +/// +/// Chunks are passed in at construction (snapshot at session-start) so +/// the rendered bytes stay stable for the whole session, matching the +/// "frozen prompt for prefix cache" contract documented on +/// [`super::builder::SystemPromptBuilder::build`]. +pub struct ReflectionMemoryContextSection { + chunks: Vec, +} + +impl ReflectionMemoryContextSection { + pub fn new(chunks: Vec) -> Self { + Self { chunks } + } +} + +impl PromptSection for ReflectionMemoryContextSection { + fn name(&self) -> &str { + "reflection_memory_context" + } + + fn build(&self, _ctx: &PromptContext<'_>) -> Result { + // Skip chunks the resolver couldn't populate — `not_found`, + // `db_error`, or stub kinds without a wired resolver yet. Earlier + // versions emitted "(content not yet resolved)" as a placeholder, + // but the orchestrator picks up that literal string as part of + // its memory context and ends up echoing it back to the user + // mid-reply. Better to give the LLM no chunk than a placeholder + // it'll quote. + let usable: Vec<&crate::openhuman::subconscious::SourceChunk> = self + .chunks + .iter() + .filter(|c| !c.content.trim().is_empty()) + .collect(); + if usable.is_empty() { + return Ok(String::new()); + } + let mut out = String::from("## Memory context\n\n"); + out.push_str( + "This thread was spawned from a subconscious reflection. The chunks below \ + are what OpenHuman was looking at when it surfaced the observation — \ + use them to ground follow-up answers in the same evidence the reflection \ + was based on.\n\n", + ); + for chunk in usable { + let body = chunk.content.replace('\n', " ").trim().to_string(); + let _ = writeln!( + out, + "- **{kind}** `{ref_id}`: {body}", + kind = chunk.kind, + ref_id = chunk.ref_id, + body = body, + ); + } + Ok(out) + } +} + +/// Sub-agent role prompt — pre-loaded text from an +/// [`crate::openhuman::agent::harness::definition::AgentDefinition`]'s +/// `system_prompt` field. Always rendered first when present. +pub struct ArchetypePromptSection { + body: String, +} + +impl ArchetypePromptSection { + pub fn new(body: String) -> Self { + Self { body } + } +} + +impl PromptSection for ArchetypePromptSection { + fn name(&self) -> &str { + "archetype_prompt" + } + + fn build(&self, _ctx: &PromptContext<'_>) -> Result { + if self.body.trim().is_empty() { + return Ok(String::new()); + } + Ok(self.body.clone()) + } +} + +/// Section that defers to a [`crate::openhuman::agent::harness::definition::PromptBuilder`] +/// every time it renders, so dynamic prompts (orchestrator, welcome, +/// integrations_agent, …) get to see the live runtime +/// [`PromptContext`] — including `connected_integrations`, which are +/// fetched asynchronously after the builder itself has been +/// constructed. +pub struct DynamicPromptSection { + builder: crate::openhuman::agent::harness::definition::PromptBuilder, +} + +impl DynamicPromptSection { + pub fn new(builder: crate::openhuman::agent::harness::definition::PromptBuilder) -> Self { + Self { builder } + } +} + +impl PromptSection for DynamicPromptSection { + fn name(&self) -> &str { + "dynamic_prompt" + } + + fn build(&self, ctx: &PromptContext<'_>) -> Result { + (self.builder)(ctx) + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// Standard section unit structs +// ───────────────────────────────────────────────────────────────────────────── + +pub struct IdentitySection; +pub struct ToolsSection; +pub struct SafetySection; +// `WorkflowsSection` and `ConnectedIntegrationsSection` previously lived +// here and branched on `ctx.agent_id` to pick between the skill- +// executor and delegator voice. They've been removed — each agent's +// `prompt.rs` now renders its own block inline (integrations_agent owns the +// `## Available Skills` + executor-voice `## Connected Integrations` +// blocks, orchestrator owns `## Delegation Guide — Integrations`, +// welcome owns its onboarding-flavoured connected list). +pub struct WorkspaceSection; +pub struct RuntimeSection; +pub struct DateTimeSection; +pub struct UserMemorySection; +/// Renders explicit user reflections — a privileged memory class +/// distinct from generic tree summaries. Rendered above +/// [`UserMemorySection`] so the orchestrator sees the user's own +/// intentional self-statements before any broader summary block. +/// +/// Empty (and skipped) when [`LearnedContextData::reflections`] is +/// empty — keeps the prompt clean for users who haven't yet expressed +/// any reflection-style content. +pub struct UserReflectionsSection; +/// Renders the authenticated user's non-secret identity fields +/// (`id` / `name` / `email`) into the system prompt — see issue #926. +/// +/// Empty when [`PromptContext::user_identity`] is `None` or the +/// identity has no populated fields. Tokens, refresh tokens, and any +/// opaque credential material are forbidden — only the three +/// identifying fields ship. +pub struct UserIdentitySection; + +/// Injects the user-specific, session-frozen workspace files +/// (`PROFILE.md` + `MEMORY.md`), each capped at [`USER_FILE_MAX_CHARS`]. +/// +/// Separate from [`IdentitySection`] so agents that strip the project- +/// context preamble (`omit_identity = true` — welcome, orchestrator, +/// the trigger pair) still get their user-file injection at runtime via +/// [`super::builder::SystemPromptBuilder::for_subagent`], which skips +/// `IdentitySection` entirely when `omit_identity` is on. +/// +/// Cache-stability: static per session — the whole point of the +/// 2000-char cap and the load-once rule documented on +/// [`AgentDefinition::omit_profile`] / `omit_memory_md`. +pub struct UserFilesSection; + +/// Renders the personality roster for the master agent's system prompt. +/// +/// When [`PromptContext::personality_roster`] is non-empty, emits an +/// `## Available Personalities` section listing each non-self personality +/// with its `id`, `name`, `description`, and an optional truncated +/// `memory_summary`. Empty (and skipped) for non-master agents. +pub struct PersonalityRosterSection; + +// ───────────────────────────────────────────────────────────────────────────── +// PromptSection implementations +// ───────────────────────────────────────────────────────────────────────────── + +impl PromptSection for PersonalityRosterSection { + fn name(&self) -> &str { + "personality_roster" + } + + fn build(&self, ctx: &PromptContext<'_>) -> Result { + if ctx.personality_roster.is_empty() { + return Ok(String::new()); + } + let mut out = String::from("## Available Personalities\n\n"); + out.push_str( + "You are the master agent. You can delegate tasks to these personality agents \ + using the `delegate_to_personality` tool. Each personality has its own memory, \ + identity, and expertise.\n\n", + ); + for entry in &ctx.personality_roster { + out.push_str(&format!( + "- **{}** (`{}`): {}", + entry.name, entry.id, entry.description + )); + if let Some(ref summary) = entry.memory_summary { + let truncated = if summary.chars().count() > 200 { + let head: String = summary.chars().take(200).collect(); + format!("{head}…") + } else { + summary.clone() + }; + out.push_str(&format!("\n Recent context: {truncated}")); + } + out.push('\n'); + } + Ok(out) + } +} + +impl PromptSection for IdentitySection { + fn name(&self) -> &str { + "identity" + } + + fn build(&self, ctx: &PromptContext<'_>) -> Result { + let mut prompt = String::from("## Project Context\n\n"); + prompt.push_str( + "The following workspace files define your identity, behavior, and context.\n\n", + ); + // When the visible-tool filter is active the main agent is a pure + // orchestrator: it routes via spawn_subagent, synthesises results, + // and talks to the user. It does NOT need the periodic-task config + // (HEARTBEAT.md) — subagents handle their own concerns. + let is_orchestrator = !ctx.visible_tool_names.is_empty(); + let all_files: &[&str] = &["SOUL.md", "IDENTITY.md", "HEARTBEAT.md"]; + // Orchestrator skips these from the prompt but we still sync them + // to disk so they stay current. + let skip_in_prompt: &[&str] = if is_orchestrator { + &["HEARTBEAT.md"] + } else { + &[] + }; + for file in all_files { + // Always sync to disk so builtin updates ship. + sync_workspace_file(ctx.workspace_dir, file); + if skip_in_prompt.contains(file) { + continue; + } + if *file == "SOUL.md" { + if let Some(ref soul) = ctx.personality_soul_md { + tracing::debug!( + "[identity] personality SOUL.md override active ({} chars)", + soul.len() + ); + inject_inline_content(&mut prompt, "SOUL.md", soul, BOOTSTRAP_MAX_CHARS); + continue; + } + } + inject_workspace_file(&mut prompt, ctx.workspace_dir, file); + } + + // PROFILE.md / MEMORY.md injection lives in the dedicated + // `UserFilesSection` (below) so agents that strip the identity + // preamble (`omit_identity = true`) — welcome, orchestrator, the + // trigger pair — still get their user files at runtime via + // `SystemPromptBuilder::for_subagent`, which omits + // `IdentitySection` entirely when `omit_identity` is set. + + Ok(prompt) + } +} + +impl PromptSection for UserFilesSection { + fn name(&self) -> &str { + "user_files" + } + + fn build(&self, ctx: &PromptContext<'_>) -> Result { + // Gate on the per-agent flags derived from + // `AgentDefinition::omit_profile` / `omit_memory_md`. Both files + // are user-specific, potentially growing, and capped at + // [`USER_FILE_MAX_CHARS`] (~1000 tokens) so they can't bloat the + // cached prefix. + // + // KV-cache contract: once injected into a session's rendered + // prompt, the bytes are frozen for the remainder of that + // session — any mid-session archivist write or enrichment + // refresh lands on the NEXT session, never the in-flight one. + let mut out = String::new(); + if ctx.include_profile { + inject_workspace_file_capped( + &mut out, + ctx.workspace_dir, + "PROFILE.md", + USER_FILE_MAX_CHARS, + ); + } + if ctx.include_memory_md { + // Personality-specific MEMORY.md takes highest priority, then + // the session-frozen curated-memory snapshot, then the + // workspace file (pure prompt-unit tests and older call sites). + if let Some(ref memory_md) = ctx.personality_memory_md { + tracing::debug!( + "[user_files] personality MEMORY.md override active ({} chars)", + memory_md.len() + ); + inject_inline_content(&mut out, "MEMORY.md", memory_md, USER_FILE_MAX_CHARS); + } else if let Some(snap) = &ctx.curated_snapshot { + inject_snapshot_content(&mut out, "MEMORY.md", &snap.memory, USER_FILE_MAX_CHARS); + inject_snapshot_content(&mut out, "USER.md", &snap.user, USER_FILE_MAX_CHARS); + } else { + inject_workspace_file_capped( + &mut out, + ctx.workspace_dir, + "MEMORY.md", + USER_FILE_MAX_CHARS, + ); + } + } + Ok(out) + } +} + +impl PromptSection for ToolsSection { + fn name(&self) -> &str { + "tools" + } + + fn build(&self, ctx: &PromptContext<'_>) -> Result { + // Native function-calling: the provider already sends full JSON + // schemas in the API request — no need to repeat the tool catalogue + // in the system prompt (pure token bloat). However, any non-empty + // `dispatcher_instructions` (e.g. the "## Tool Use Protocol" block + // from NativeToolDispatcher) must still be included so the model + // receives its behavioural guidance. + if ctx.tool_call_format == ToolCallFormat::Native { + if ctx.dispatcher_instructions.trim().is_empty() { + return Ok(String::new()); + } + return Ok(ctx.dispatcher_instructions.to_string()); + } + let mut out = String::from("## Tools\n\n"); + let has_filter = !ctx.visible_tool_names.is_empty(); + for tool in ctx.tools { + // Skip tools not in the visible set when a filter is active. + if has_filter && !ctx.visible_tool_names.contains(tool.name) { + continue; + } + + // One rendering shape for every dispatcher: a compact + // P-Format signature (`name[a|b|c]`). The signature comes + // straight from the parameter schema (alphabetical by + // property name — see `pformat` module docs for why) so + // model and parser agree on argument ordering. For + // `Native` dispatchers the provider already has the full + // JSON schema in the API request, so repeating it in the + // prompt is pure token bloat; for `Json` / `PFormat` text + // dispatchers the dispatcher's own `prompt_instructions` + // block (appended below) carries whatever schema detail + // the wire format needs. + let signature = render_pformat_signature_for_prompt(tool); + let _ = writeln!( + out, + "- **{}**: {}\n Call as: `{}`", + tool.name, tool.description, signature + ); + } + if !ctx.dispatcher_instructions.is_empty() { + out.push('\n'); + out.push_str(ctx.dispatcher_instructions); + } + Ok(out) + } +} + +impl PromptSection for SafetySection { + fn name(&self) -> &str { + "safety" + } + + fn build(&self, _ctx: &PromptContext<'_>) -> Result { + Ok("## Safety\n\n- Do not exfiltrate private data.\n- Do not run destructive commands without asking.\n- Do not bypass oversight or approval mechanisms.\n- Prefer `trash` over `rm`.\n- When in doubt, ask before acting externally.".into()) + } +} + +impl PromptSection for WorkspaceSection { + fn name(&self) -> &str { + "workspace" + } + + fn build(&self, ctx: &PromptContext<'_>) -> Result { + Ok(format!( + "## Workspace\n\nWorking directory: `{}`", + ctx.workspace_dir.display() + )) + } +} + +impl PromptSection for RuntimeSection { + fn name(&self) -> &str { + "runtime" + } + + fn build(&self, ctx: &PromptContext<'_>) -> Result { + let host = + hostname::get().map_or_else(|_| "unknown".into(), |h| h.to_string_lossy().to_string()); + Ok(format!( + "## Runtime\n\nHost: {host} | OS: {} | Model: {}", + std::env::consts::OS, + ctx.model_name + )) + } +} + +impl PromptSection for UserReflectionsSection { + fn name(&self) -> &str { + "user_reflections" + } + + fn build(&self, ctx: &PromptContext<'_>) -> Result { + if ctx.learned.reflections.is_empty() { + return Ok(String::new()); + } + + let mut out = String::from("## User Reflections\n\n"); + out.push_str( + "Explicit reflections the user authored about themselves, their goals, \ + or how they want you to behave going forward. Treat these as \ + higher-priority than the broader user-memory summaries below: \ + they are recent, intentional, identity-relevant signals and \ + should steer your responses ahead of any generic historical \ + context.\n\n", + ); + for reflection in &ctx.learned.reflections { + let trimmed = reflection.trim(); + if trimmed.is_empty() { + continue; + } + out.push_str("- "); + out.push_str(trimmed); + out.push('\n'); + } + out.push('\n'); + Ok(out) + } +} + +impl PromptSection for UserMemorySection { + fn name(&self) -> &str { + "user_memory" + } + + fn build(&self, ctx: &PromptContext<'_>) -> Result { + if ctx.learned.tree_root_summaries.is_empty() { + return Ok(String::new()); + } + + let mut out = String::from("## User Memory\n\n"); + out.push_str( + "Long-term memory distilled by the tree summarizer. \ + Each section is the root summary for a memory namespace, \ + representing everything we've learned about that domain over time. \ + Treat this as durable background context, but NOT as fresh, \ + present-tense fact: each section header shows when that memory \ + was last updated. Compare those dates against the `## Current \ + Date & Time` section below before answering time-sensitive \ + questions (today's briefing, daily summary, reminders, calendar, \ + notifications, \"today/tomorrow/this week\"). If a summary predates \ + the period the user is asking about, treat it as potentially \ + stale — say so explicitly and never present older memory as \ + today's update.\n\n", + ); + + for NamespaceSummary { + namespace, + body, + updated_at, + } in &ctx.learned.tree_root_summaries + { + let trimmed = body.trim(); + if trimmed.is_empty() { + continue; + } + // Absolute date (not "N days ago") keeps this front-of-prompt + // section byte-stable for KV-cache reuse — see `NamespaceSummary`. + let _ = writeln!( + out, + "### {namespace} (last updated {})\n", + super::render_helpers::memory_date_label(*updated_at) + ); + out.push_str(trimmed); + out.push_str("\n\n"); + } + + Ok(out) + } +} + +impl PromptSection for DateTimeSection { + fn name(&self) -> &str { + "datetime" + } + + fn build(&self, _ctx: &PromptContext<'_>) -> Result { + // IANA zone first because it's the unambiguous machine-readable + // form (`America/Los_Angeles`) — agents that need to reason about + // timezone rules should grep this, not the locale-dependent + // `%Z` abbreviation. Falls back to "UTC" when the host can't + // resolve a zone (CI, stripped containers). + let iana = iana_time_zone::get_timezone().unwrap_or_else(|_| "UTC".to_string()); + let now = chrono::Local::now(); + Ok(format!( + "## Current Date & Time\n\n{} {} ({}, UTC{})", + now.format("%Y-%m-%d %H:%M:%S"), + iana, + now.format("%Z"), + now.format("%:z"), + )) + } +} + +impl PromptSection for UserIdentitySection { + fn name(&self) -> &str { + "user_identity" + } + + fn build(&self, ctx: &PromptContext<'_>) -> Result { + let identity = match ctx.user_identity.as_ref() { + Some(id) if !id.is_empty() => id, + _ => return Ok(String::new()), + }; + + // Render the field list FIRST, then decide whether to ship the + // heading. `UserIdentity::is_empty()` only checks `None`-ness — + // a struct whose fields are all `Some("")` / whitespace would + // otherwise leave the prompt with a `## User` heading + intro + // pointing at zero fields, which is exactly the empty-prompt + // failure mode we're trying to suppress (#926). + let mut fields = String::new(); + if let Some(name) = identity.name.as_deref().filter(|s| !s.trim().is_empty()) { + let _ = writeln!(fields, "- name: {}", sanitize_identity_field(name)); + } + if let Some(email) = identity.email.as_deref().filter(|s| !s.trim().is_empty()) { + let _ = writeln!(fields, "- email: {}", sanitize_identity_field(email)); + } + if let Some(id) = identity.id.as_deref().filter(|s| !s.trim().is_empty()) { + let _ = writeln!(fields, "- id: {}", sanitize_identity_field(id)); + } + if fields.trim().is_empty() { + return Ok(String::new()); + } + + let mut out = String::from("## User\n\n"); + out.push_str( + "The signed-in user is identified below. Use these fields directly in tool \ + calls and do not ask the user to repeat them.\n\n", + ); + out.push_str(&fields); + Ok(out.trim_end().to_string()) + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// Private helpers +// ───────────────────────────────────────────────────────────────────────────── + +/// Collapse newlines and runs of whitespace in a user-identity field so +/// it fits on a single markdown bullet without breaking the prompt +/// structure. Values come from `auth_get_me` (server-controlled), but +/// defence-in-depth: a name with embedded newlines could split the +/// `- name:` bullet and reshape the `## User` block. +fn sanitize_identity_field(s: &str) -> String { + s.chars() + .map(|c| if c == '\n' || c == '\r' { ' ' } else { c }) + .collect::() + .split_whitespace() + .collect::>() + .join(" ") +} + +/// Build a P-Format signature line (`name[a|b|c]`) from a [`PromptTool`]. +/// Local to this module so [`ToolsSection`] doesn't have to depend on +/// the agent crate's `pformat` helper. The two implementations stay in +/// lockstep — both use BTreeMap iteration order on the schema's +/// `properties` field. +fn render_pformat_signature_for_prompt(tool: &PromptTool<'_>) -> String { + let names: Vec = tool + .parameters_schema + .as_deref() + .and_then(|s| serde_json::from_str::(s).ok()) + .and_then(|v| { + v.get("properties") + .and_then(|p| p.as_object()) + .map(|m| m.keys().cloned().collect()) + }) + .unwrap_or_default(); + if names.is_empty() { + format!("{}[]", tool.name) + } else { + format!("{}[{}]", tool.name, names.join("|")) + } +} diff --git a/src/openhuman/agent/task_dispatcher.rs b/src/openhuman/agent/task_dispatcher.rs deleted file mode 100644 index d6c2f73bb1..0000000000 --- a/src/openhuman/agent/task_dispatcher.rs +++ /dev/null @@ -1,1358 +0,0 @@ -//! Deterministic task-card dispatcher. -//! -//! Turns a [`TaskBoardCard`] into work: it **claims** the card via a -//! compare-and-set (re-load the board and transition only a `Todo`/`Ready` -//! card to `in_progress`, so a stale/concurrent re-dispatch of the same card -//! is rejected), runs a single **autonomous agent turn** toward the card's -//! objective, and **writes the outcome back** to the board (`done` + evidence -//! on success, `blocked` + reason on failure). -//! -//! This is the one executor both dispatch paths converge on: -//! - the **board poller** (cards that arrived without a proactive trigger), and -//! - the **proactive triage** arm (`agent::triage::apply_decision`), once it has -//! decided to act on a task-board card. -//! -//! The runner mirrors `skills::spawn_workflow_run_background`: build the -//! `orchestrator` agent fresh inside a detached task, cap tool iterations, and -//! run `agent.run_single` under `with_autonomous_iter_cap`. PR-4 generalises the -//! executor from the default agent to a resolved personality/skill; this module -//! keeps the default-agent path so the pipeline runs end-to-end first. - -use std::collections::HashMap; -use std::path::Path; -use std::sync::{Mutex, OnceLock}; -use std::time::Duration; - -use crate::openhuman::agent::harness::definition::{AgentDefinitionRegistry, PromptSource}; -use crate::openhuman::agent::harness::session::Agent; -use crate::openhuman::agent::harness::subagent_runner::with_autonomous_iter_cap; -use crate::openhuman::agent::personality_paths::PersonalityContext; -use crate::openhuman::agent::task_board::{TaskApprovalMode, TaskBoardCard, TaskCardStatus}; -use crate::openhuman::agent::task_session; -use crate::openhuman::config::Config; -use crate::openhuman::todos::ops::{self, BoardLocation, CardPatch, USER_TASKS_THREAD_ID}; -use crate::openhuman::todos::runs::{self, RunLimits, RunOutcome}; - -/// Max chars of a personality SOUL.md / MEMORY.md or skill guideline block -/// folded into the agent's system-prompt suffix. -const EXECUTOR_PREAMBLE_MAX_CHARS: usize = 800; - -/// Tool-iteration ceiling for an autonomous task run. Matches the skill-run -/// cap — a task brief is the same shape of bounded autonomous work. -const TASK_RUN_MAX_ITERATIONS: usize = 200; - -/// Max chars of the agent's final output retained as board `evidence`. -const EVIDENCE_MAX_CHARS: usize = 2_000; - -/// Handle to an in-flight autonomous run, keyed by its session `thread_id`. -/// -/// Autonomous runs are detached `tokio` tasks, not web-channel turns, so they -/// are invisible to the web channel's own in-flight registry — which is why the -/// chat **Cancel** button (which calls `channel_web_cancel`) couldn't stop them. -/// Registering the run's [`AbortHandle`](tokio::task::AbortHandle) here lets -/// [`cancel_session`] abort it from that same cancel path. -struct ActiveRun { - abort: tokio::task::AbortHandle, - hb_cancel: tokio::sync::watch::Sender, - location: BoardLocation, - card_id: String, - run_id: String, -} - -static ACTIVE_RUNS: OnceLock>> = OnceLock::new(); - -fn active_runs() -> &'static Mutex> { - ACTIVE_RUNS.get_or_init(|| Mutex::new(HashMap::new())) -} - -fn register_active_run(thread_id: String, run: ActiveRun) { - active_runs() - .lock() - .expect("active_runs mutex poisoned") - .insert(thread_id, run); -} - -/// Remove and return the active-run entry for `thread_id`. The naturally -/// completing run and a concurrent [`cancel_session`] race on this — whoever -/// gets `Some` "owns" the terminal board write-back, so it happens exactly once. -fn take_active_run(thread_id: &str) -> Option { - active_runs() - .lock() - .expect("active_runs mutex poisoned") - .remove(thread_id) -} - -/// Cancel the in-flight autonomous run streaming into session `thread_id`. -/// -/// Aborts the detached run task, stops its heartbeat, marks the card `blocked` -/// (user-cancelled) so it doesn't dangle `in_progress`, and emits the terminal -/// chat event (broadcast as `"system"`) so the session UI stops "processing". -/// Returns `true` if a run was found and cancelled. Wired into the web channel's -/// `channel_web_cancel` as the fallback when the thread has no web-channel turn. -pub async fn cancel_session(thread_id: &str) -> bool { - let Some(run) = take_active_run(thread_id) else { - return false; - }; - run.abort.abort(); - let _ = run.hb_cancel.send(true); - // The aborted task never reaches its own write-back — do it here so the - // card lands in a terminal state instead of a stale `in_progress`. - write_back( - &run.location, - &run.card_id, - &run.run_id, - Err("Cancelled by user".to_string()), - ); - crate::openhuman::channels::providers::web::publish_web_channel_event( - crate::core::socketio::WebChannelEvent { - event: "chat_error".to_string(), - client_id: "system".to_string(), - thread_id: thread_id.to_string(), - request_id: run.run_id.clone(), - message: Some("Cancelled".to_string()), - error_type: Some("cancelled".to_string()), - ..Default::default() - }, - ); - tracing::info!( - thread_id = %thread_id, - card_id = %run.card_id, - run_id = %run.run_id, - "[task_dispatcher] cancelled autonomous run via chat cancel" - ); - true -} - -/// Render a card into the goal prompt handed to the autonomous run. -/// -/// The card's `content`/title is the display form; the prompt leads with the -/// clean `objective`, then any `plan` steps and `acceptance_criteria`, and a -/// pointer to the originating source so the agent can pull related context from -/// memory via its `memory_recall` tool (the GitHub/Notion/… activity for this -/// item is ingested into the summary tree by the memory-sources domain). -pub fn build_task_prompt(card: &TaskBoardCard) -> String { - let mut lines: Vec = Vec::new(); - - let objective = card - .objective - .as_deref() - .map(str::trim) - .filter(|s| !s.is_empty()) - .unwrap_or_else(|| card.title.trim()); - lines.push(format!( - "You are autonomously executing one task to completion. Objective:\n{objective}" - )); - - if !card.plan.is_empty() { - lines.push("\nPlan:".to_string()); - for (i, step) in card.plan.iter().enumerate() { - lines.push(format!("{}. {}", i + 1, step.trim())); - } - } - - if !card.acceptance_criteria.is_empty() { - lines.push("\nAcceptance criteria (the task is done only when all hold):".to_string()); - for c in &card.acceptance_criteria { - lines.push(format!("- {}", c.trim())); - } - } - - if let Some(meta) = &card.source_metadata { - let provider = meta.get("provider").and_then(|v| v.as_str()); - let repo = meta.get("repo").and_then(|v| v.as_str()); - let external_id = meta.get("external_id").and_then(|v| v.as_str()); - let url = meta.get("url").and_then(|v| v.as_str()); - let mut origin = String::new(); - if let Some(p) = provider { - origin.push_str(p); - } - if let Some(r) = repo { - origin.push_str(&format!(" {r}")); - } - if let Some(id) = external_id { - origin.push_str(&format!("#{id}")); - } - // Gate on a known provider so the origin string is always meaningful - // (an id-only card would render "#123" with a leading space). - if provider.is_some() { - lines.push(format!( - "\nThis task originates from {}. Its activity has been ingested into memory — use \ - your memory_recall tool to pull related context (prior discussion, linked items) \ - before and while you work.", - origin.trim() - )); - } - if let Some(u) = url { - lines.push(format!("Source link: {u}")); - } - // G9b — agent-driven external write-back. When the upstream item is - // addressable (provider + id), instruct the agent to close the loop on - // the source itself via its integration tools. Runs under the - // connection's existing write scope (no extra approval gate); if it - // can't, it reports that instead of failing. - if provider.is_some() && external_id.is_some() { - lines.push(format!( - "\nWhen the task is complete, record the outcome on the upstream source ({}): use \ - your integration tools to add a comment summarising the resolution and, if the \ - work fully addresses it, close/resolve the item. If you lack the permission or \ - connection to do so, say so in your final summary instead of guessing.", - origin.trim() - )); - } - } - - lines.push( - "\nWork the task to completion. Do not pick up unrelated work. When finished, your final \ - message should summarise what you did and the evidence (commits, PRs, results)." - .to_string(), - ); - - lines.join("\n") -} - -/// Instruction appended to the run prompt so the autonomous turn keeps its own -/// task card current via the `update_task` tool while it works. -/// -/// The card is already `in_progress` (the dispatcher claimed it before -/// spawning the run), addressed by the exact card id + board the run owns -/// (without the explicit `threadId` the tool defaults to the `task-sources` -/// board and would miss a `user-tasks` card). Two things this asks for: -/// 1. *progress* updates (notes/evidence) as the run works, and -/// 2. an explicit `status: blocked` + `blocker` when the run needs a -/// decision/information from the user or cannot proceed — which -/// [`write_back`] now preserves rather than force-completing, so the task -/// pauses for the user instead of being silently marked done. -fn build_progress_instruction(card_id: &str, thread_id: &str) -> String { - format!( - "\n\nThis task is tracked as card `{card_id}` on the `{thread_id}` board. As you work, \ - call the `update_task` tool (id `{card_id}`, threadId `{thread_id}`) to keep the card \ - current — append `notes`/`evidence` as you make progress.\n\nIf you need a decision or \ - information from the user, or you genuinely cannot proceed (missing access, ambiguous \ - requirement, an action that needs the user's confirmation), call `update_task` with \ - `status: blocked` and a `blocker` that states exactly what you need from the user. The \ - task will stay paused in that blocked state until the user responds — do NOT guess, \ - fabricate, or take a risky irreversible action just to avoid blocking. If instead you \ - finish the work, end with a summary of what you did and the evidence; completion is \ - recorded automatically." - ) -} - -/// Outcome of a dispatch attempt. -#[derive(Debug)] -pub enum DispatchOutcome { - /// The card was claimed and a detached autonomous run was spawned. - Running { run_id: String }, - /// Plan approval is required; the card was parked at `awaiting_approval` - /// and a `TaskPlanAwaitingApproval` event was emitted. No run was spawned. - AwaitingApproval, -} - -/// Dispatch one card: gate on plan approval, claim it, run an autonomous turn, -/// write the result back. -/// -/// Returns `Ok(Running)` once the card is claimed and the detached run is -/// spawned, `Ok(AwaitingApproval)` if the card was parked for human approval, -/// or `Err` *without* spawning when the card is no longer claimable — its -/// freshly-loaded status isn't `Todo`/`Ready` (already running/done, or another -/// dispatcher won the claim). Benign: the poller retries next tick. -pub async fn dispatch_card( - location: BoardLocation, - card: TaskBoardCard, -) -> Result { - let card_id = card.id.clone(); - - let config = Config::load_or_init() - .await - .map_err(|e| format!("load config: {e:#}"))?; - - // Plan-approval gate: when required, a `todo` card is parked for human - // approval before it can run. `Ready` (already approved) bypasses. We - // attempt the AwaitingApproval claim first so the gate is also atomic — - // two dispatchers racing the same Todo card won't both park it. - // - // A card explicitly marked `approval_mode = NotRequired` also bypasses the - // gate: it has already cleared human review (e.g. a task approved out of - // the `task-sources` inbox onto the `user-tasks` board, stamped - // `not_required` at approval time). Re-parking it under the global default - // would strand it on a board nobody approves from. Per-card opt-out wins. - if requires_plan_approval( - config.autonomy.require_task_plan_approval, - card.approval_mode.as_ref(), - ) { - match ops::claim_card( - &location, - &card_id, - &[TaskCardStatus::Todo], - TaskCardStatus::AwaitingApproval, - ) { - Ok(_parked) => { - if let Some(thread_id) = location.thread_id() { - crate::core::event_bus::publish_global( - crate::core::event_bus::DomainEvent::TaskPlanAwaitingApproval { - card_id: card_id.clone(), - thread_id: thread_id.to_string(), - }, - ); - } - tracing::info!(card_id = %card_id, "[task_dispatcher] parked card awaiting plan approval"); - return Ok(DispatchOutcome::AwaitingApproval); - } - Err(_) => { - // Card wasn't `Todo` — fall through to the main claim path, - // which handles `Ready` cards and rejects everything else. - } - } - } - - // Atomic claim: transition Todo|Ready → InProgress under a per-board - // lock so concurrent dispatchers cannot both succeed. The returned card - // is the freshly-loaded snapshot — the prompt uses it, not the caller's - // potentially stale copy. - let fresh_card = ops::claim_card( - &location, - &card_id, - &[TaskCardStatus::Todo, TaskCardStatus::Ready], - TaskCardStatus::InProgress, - ) - .map_err(|e| format!("[task_dispatcher] claim rejected for {card_id}: {e}"))?; - - let mut prompt = build_task_prompt(&fresh_card); - // Tell the run which card it owns so it can post live progress via the - // `update_task` tool (notes/evidence) as it works. The terminal - // `done`/`blocked` transition is still stamped deterministically by - // `write_back` from the run outcome. - if let Some(thread_id) = location.thread_id() { - prompt.push_str(&build_progress_instruction(&card_id, thread_id)); - } - - let run_id = uuid::Uuid::new_v4().to_string(); - - // Resolve which executor runs this card: default agent, a personality, or - // a skill — one autonomous-run interface, three presets (G4 + G3). - let executor = resolve_executor(&config.workspace_dir, fresh_card.assigned_agent.as_deref()); - tracing::info!( - card_id = %card_id, - run_id = %run_id, - executor = %executor.label, - agent_id = %executor.agent_id, - prompt_chars = prompt.chars().count(), - "[task_dispatcher] card claimed (→in_progress), spawning autonomous run" - ); - - if let Err(e) = runs::create_run(&location, &run_id, &card_id, &executor.label) { - tracing::warn!( - run_id = %run_id, - card_id = %card_id, - error = %e, - "[task_dispatcher] failed to create run record (proceeding without liveness tracking)" - ); - } - - let (hb_cancel_tx, hb_cancel_rx) = tokio::sync::watch::channel(false); - runs::spawn_heartbeat_task(location.clone(), run_id.clone(), hb_cancel_rx); - - // Materialise this autonomous run as a top-level task-session thread so it - // surfaces in Conversations → Tasks like a manually-run todo. Best-effort: - // `None` just means the run streams nowhere (headless), exactly as before. - let session_thread_id = task_session::create_session_thread( - config.workspace_dir.clone(), - &fresh_card, - &run_id, - &prompt, - ); - - // Stamp the session thread onto the card so the board UI can offer a - // "View session" jump into Conversations. Best-effort: a failure here just - // means the link is unavailable; the run proceeds regardless. - if let Some(thread_id) = session_thread_id.as_deref() { - if let Err(e) = ops::set_session_thread(&location, &card_id, Some(thread_id.to_string())) { - tracing::warn!( - card_id = %card_id, - thread_id = %thread_id, - error = %e, - "[task_dispatcher] failed to stamp session thread on card (View session link unavailable)" - ); - } - } - - let run_id_for_return = run_id.clone(); - let location_for_run = location.clone(); - // Clones for the active-run registry (the originals move into the task). - let reg_thread = session_thread_id.clone(); - let reg_location = location.clone(); - let reg_card_id = card_id.clone(); - let reg_run_id = run_id.clone(); - let hb_cancel_for_task = hb_cancel_tx.clone(); - let task_thread = session_thread_id.clone(); - // Gate the task on registration: a fast-finishing run could otherwise reach - // its terminal `take_active_run` before `register_active_run` below has run, - // see no entry, and skip `write_back` — leaving card/run state inconsistent. - // The task parks on `start_rx` until we release it after registration. - let (start_tx, start_rx) = tokio::sync::oneshot::channel::<()>(); - let join = tokio::spawn(async move { - let _ = start_rx.await; - let outcome = run_autonomous(config, &executor, &prompt, &run_id, session_thread_id).await; - let _ = hb_cancel_for_task.send(true); - // Race with a concurrent cancel: whoever removes the registry entry owns - // the write-back, so it runs exactly once. No entry (no session thread, - // or a cancel already took it) → we skip it. - let still_ours = match &task_thread { - Some(tid) => take_active_run(tid).is_some(), - None => true, - }; - if still_ours { - write_back(&location_for_run, &card_id, &run_id, outcome); - } - }); - - // Register the run so the chat Cancel (web `channel_web_cancel` → - // `cancel_session`) can abort it — task threads aren't web-channel turns. - if let Some(tid) = reg_thread { - register_active_run( - tid, - ActiveRun { - abort: join.abort_handle(), - hb_cancel: hb_cancel_tx, - location: reg_location, - card_id: reg_card_id, - run_id: reg_run_id, - }, - ); - } - // Registration (if any) is in place — release the task to start running. - let _ = start_tx.send(()); - - Ok(DispatchOutcome::Running { - run_id: run_id_for_return, - }) -} - -/// A resolved executor: which built-in agent definition to build, an optional -/// system-prompt suffix carrying a personality identity or skill guidelines, -/// and a label for logs/telemetry. -#[derive(Debug, Clone, PartialEq)] -struct ResolvedExecutor { - agent_id: String, - prompt_suffix: Option, - label: String, -} - -impl ResolvedExecutor { - fn default_agent() -> Self { - Self { - agent_id: "orchestrator".to_string(), - prompt_suffix: None, - label: "default".to_string(), - } - } -} - -/// Map a card's `assigned_agent` handle to one of three executor presets: -/// **personality** (scoped SOUL/MEMORY folded into the prompt suffix, run as -/// that profile's agent), **skill** (orchestrator seeded with the skill's -/// `SKILL.md` guidelines), or **built-in agent**. An unset or unresolved handle -/// degrades to the default `orchestrator` — "use the personality if valid, -/// otherwise the default agent." -fn resolve_executor(workspace_dir: &Path, assigned: Option<&str>) -> ResolvedExecutor { - let Some(handle) = assigned.map(str::trim).filter(|s| !s.is_empty()) else { - return ResolvedExecutor::default_agent(); - }; - if handle == "orchestrator" { - return ResolvedExecutor::default_agent(); - } - - // 1) Personality (#2895): a user-defined profile with scoped identity. - if let Ok(state) = crate::openhuman::agent::profiles::load_profiles(workspace_dir) { - if let Some(profile) = state.profiles.iter().find(|p| p.id == handle) { - let ctx = PersonalityContext::from_profile(workspace_dir, profile.clone()); - let mut preamble = format!( - "You are acting as the personality `{}` (\"{}\"). {}", - profile.id, profile.name, profile.description - ); - if let Some(soul) = &ctx.soul_md_override { - preamble.push_str("\n\n[Personality SOUL.md]\n"); - preamble.push_str(&truncate_chars(soul, EXECUTOR_PREAMBLE_MAX_CHARS)); - } - if let Some(mem) = &ctx.memory_md_override { - preamble.push_str("\n\n[Personality MEMORY.md]\n"); - preamble.push_str(&truncate_chars(mem, EXECUTOR_PREAMBLE_MAX_CHARS)); - } - return ResolvedExecutor { - agent_id: profile.agent_id.clone(), - prompt_suffix: Some(preamble), - label: format!("personality:{handle}"), - }; - } - } - - // 2) Workflow (#2824): the same autonomous run, seeded with SKILL.md. - if let Some(skill) = crate::openhuman::workflows::registry::get_workflow(workspace_dir, handle) - { - let guidelines = match &skill.definition.system_prompt { - PromptSource::Inline(s) => truncate_chars(s, EXECUTOR_PREAMBLE_MAX_CHARS), - _ => String::new(), - }; - let suffix = format!( - "You are executing this task as the skill `{handle}`. Follow these skill \ - guidelines exactly:\n\n{guidelines}" - ); - return ResolvedExecutor { - agent_id: "orchestrator".to_string(), - prompt_suffix: Some(suffix), - label: format!("skill:{handle}"), - }; - } - - // 3) Built-in agent definition. - if AgentDefinitionRegistry::global() - .and_then(|r| r.get(handle)) - .is_some() - { - return ResolvedExecutor { - agent_id: handle.to_string(), - prompt_suffix: None, - label: format!("agent:{handle}"), - }; - } - - // 4) Unresolved → degrade to the default agent (don't fail the card). - tracing::warn!( - handle = %handle, - "[task_dispatcher] assigned executor did not resolve to a personality/skill/agent; \ - using default orchestrator" - ); - ResolvedExecutor { - label: "default-fallback".to_string(), - ..ResolvedExecutor::default_agent() - } -} - -/// Run the resolved executor as a single autonomous turn using the -/// already-loaded config. The executor's prompt suffix (personality identity or -/// skill guidelines) rides in the system prompt; the card goal is the turn input. -/// -/// SECURITY / threat model (prompt injection): the card objective/content and -/// `source_metadata` derive from external, attacker-influenceable text (e.g. a -/// GitHub issue body anyone in a watched repo can file), and this background -/// run is gate-free at the per-tool level (background turns auto-allow, like -/// skill runs) while `build_task_prompt` may instruct it to write back to the -/// upstream item. The interactive checkpoint is therefore the up-front -/// **plan-approval gate** (`require_task_plan_approval`), which a human reviews -/// before the run starts — not per-action egress/write approval. Egress is -/// widened to `*` only when the operator set no explicit allow-list (matching -/// skill runs, since real task work needs broad reach: git, package registries, -/// provider APIs). Tightening egress to the source provider's domains for -/// source-ingested runs is a considered follow-up (it would break general task -/// work, so it needs to key off provenance) — tracked for a later PR. -async fn run_autonomous( - mut config: Config, - executor: &ResolvedExecutor, - prompt: &str, - run_id: &str, - session_thread_id: Option, -) -> Result { - config.agent.max_tool_iterations = TASK_RUN_MAX_ITERATIONS; - // Match skill-run egress handling: only widen to the permissive default - // when the operator hasn't configured an explicit allow-list. See the - // threat-model note above on why `*` is the default here. - if config.http_request.allowed_domains.is_empty() { - config.http_request.allowed_domains = vec!["*".to_string()]; - } - - let mut agent = Agent::from_config_for_agent_with_profile( - &config, - &executor.agent_id, - None, - executor.prompt_suffix.clone(), - ) - .map_err(|e| format!("build agent: {e:#}"))?; - agent.set_event_context(run_id.to_string(), "task"); - agent.set_agent_definition_name(format!( - "task-{}-{}", - executor.label, - run_id.get(..8).unwrap_or(run_id) - )); - - // Stream this autonomous run into its task-session thread exactly like a - // chat turn: wire the agent's progress into the web-channel bridge with the - // broadcast client id "system" — the same mechanism cron/welcome agents use. - // The bridge (a) emits live text/tool socket events that any client viewing - // the thread renders in real time (the frontend keys by thread_id), and - // (b) persists a TurnStateMirror so the tool timeline replays when the - // session is opened mid/after run. Best-effort — with no session thread the - // run is headless, exactly as before this feature. - let workspace_dir = config.workspace_dir.clone(); - if let Some(thread_id) = session_thread_id.as_deref() { - let (progress_tx, progress_rx) = tokio::sync::mpsc::channel(64); - agent.set_on_progress(Some(progress_tx)); - crate::openhuman::channels::providers::web::spawn_progress_bridge( - progress_rx, - "system".to_string(), - thread_id.to_string(), - run_id.to_string(), - crate::openhuman::threads::turn_state::TurnStateStore::new(workspace_dir.clone()), - crate::openhuman::channels::providers::web::ChatRequestMetadata::default(), - config.clone(), - ); - } - - // Sub-agent task runs are internal to the agent harness — the user - // already authorized the parent turn that dispatched this task. Label - // as CLI so the approval gate doesn't fail closed on internal - // sub-agent invocations. - let run = crate::openhuman::agent::turn_origin::with_origin( - crate::openhuman::agent::turn_origin::AgentTurnOrigin::Cli, - with_autonomous_iter_cap(TASK_RUN_MAX_ITERATIONS, agent.run_single(prompt)), - ); - let result = match session_thread_id.as_deref() { - Some(thread_id) => { - crate::openhuman::inference::provider::thread_context::with_thread_id( - thread_id.to_string(), - run, - ) - .await - } - None => run.await, - } - .map_err(|e| format!("{e:#}")); - - // Emit the terminal chat event so a client viewing the session stops - // "processing" and finalizes the assistant bubble — the SAME chat_done / - // chat_error the web channel emits at the end of a normal turn. The - // progress bridge only streams intermediate deltas; without this terminal - // signal the live-streamed session spins forever. Broadcast as "system" so - // any viewer of the thread receives it (frontend keys by thread_id). - if let Some(thread_id) = session_thread_id.as_deref() { - match &result { - Ok(response) => { - crate::openhuman::channels::providers::presentation::deliver_response( - "system", - thread_id, - run_id, - response, - prompt, - &[], - ) - .await; - } - Err(err) => { - crate::openhuman::channels::providers::web::publish_web_channel_event( - crate::core::socketio::WebChannelEvent { - event: "chat_error".to_string(), - client_id: "system".to_string(), - thread_id: thread_id.to_string(), - request_id: run_id.to_string(), - message: Some(err.clone()), - error_type: Some("agent_error".to_string()), - ..Default::default() - }, - ); - } - } - // Persist the final response as the closing assistant message so a - // reopened session shows the outcome like a finished manual run. - task_session::append_final(workspace_dir, thread_id, &result); - } - result -} - -/// Deterministic board write-back: the dispatcher owns the card lifecycle. -/// Success → `done` + evidence; failure → `blocked` + blocker reason. An -/// external write failure here is logged, never propagated — the run already -/// happened. -/// Current persisted status of a card, or `None` if the board can't be read or -/// the card is gone. Used by `write_back` to detect a run that blocked itself. -fn current_card_status(location: &BoardLocation, card_id: &str) -> Option { - ops::list(location) - .ok() - .and_then(|snap| snap.cards.into_iter().find(|c| c.id == card_id)) - .map(|c| c.status) -} - -fn write_back( - location: &BoardLocation, - card_id: &str, - run_id: &str, - outcome: Result, -) { - // Respect a status the run set for itself: if the agent marked the card - // `blocked` via `update_task` (it needs a decision/input from the user, or - // genuinely cannot proceed), leave it blocked — do NOT force-complete it. - // The task then stays paused in that state until the user responds, instead - // of a "clean turn" being silently recorded as done. Otherwise mark done - // with evidence; a run error marks blocked with the error as the blocker. - let agent_self_blocked = - outcome.is_ok() && current_card_status(location, card_id) == Some(TaskCardStatus::Blocked); - - let patch = if agent_self_blocked { - tracing::info!( - card_id = %card_id, - run_id = %run_id, - "[task_dispatcher] run ended with card self-blocked → leaving blocked (awaiting user input), not auto-completing" - ); - None - } else { - match &outcome { - Ok(output) => { - tracing::info!( - card_id = %card_id, - run_id = %run_id, - output_chars = output.chars().count(), - "[task_dispatcher] run complete → done" - ); - Some(CardPatch { - status: Some(TaskCardStatus::Done), - evidence: Some(vec![truncate_chars(output.trim(), EVIDENCE_MAX_CHARS)]), - ..Default::default() - }) - } - Err(err) => { - tracing::warn!( - card_id = %card_id, - run_id = %run_id, - error = %err, - "[task_dispatcher] run failed → blocked" - ); - Some(CardPatch { - status: Some(TaskCardStatus::Blocked), - blocker: Some(truncate_chars(err, EVIDENCE_MAX_CHARS)), - ..Default::default() - }) - } - } - }; - - if let Some(patch) = patch { - if let Err(e) = ops::edit(location, card_id, patch) { - tracing::error!( - card_id = %card_id, - run_id = %run_id, - error = %e, - "[task_dispatcher] board write-back failed (run outcome lost from board)" - ); - } - } - - let (run_outcome, run_error, run_evidence) = match &outcome { - Ok(output) => ( - RunOutcome::Success, - None, - vec![truncate_chars(output.trim(), EVIDENCE_MAX_CHARS)], - ), - Err(err) => ( - RunOutcome::Failed, - Some(truncate_chars(err, EVIDENCE_MAX_CHARS)), - Vec::new(), - ), - }; - if let Err(e) = runs::complete_run(location, run_id, run_outcome, run_error, run_evidence) { - tracing::warn!( - run_id = %run_id, - error = %e, - "[task_dispatcher] run record completion failed" - ); - } -} - -fn truncate_chars(s: &str, max: usize) -> String { - if s.chars().count() <= max { - return s.to_string(); - } - let mut out: String = s.chars().take(max.saturating_sub(1)).collect(); - out.push('…'); - out -} - -// ── Board poller ────────────────────────────────────────────────────────── - -/// How often the poller wakes to look for a dispatchable card. -const POLLER_TICK_SECONDS: u64 = 60; - -static POLLER_STARTED: OnceLock<()> = OnceLock::new(); - -/// Spawn the board poller. Idempotent — only the first call installs the loop. -/// -/// Each tick it scans the `task-sources` board and dispatches the -/// highest-urgency `todo` card via [`dispatch_card`], gated by background-AI -/// capacity (`scheduler_gate`). This is the catch-all for cards that arrive -/// without a proactive trigger (`TodoOnly` sources, manual cards, or proactive -/// turns the gate skipped). Cards that *did* get a proactive trigger are -/// dispatched by the triage arm; the claim-based lock makes firing both safe. -pub fn start_board_poller() { - if POLLER_STARTED.set(()).is_err() { - tracing::debug!("[task_dispatcher:poller] already running, skipping start"); - return; - } - tokio::spawn(async move { - tracing::info!( - tick_seconds = POLLER_TICK_SECONDS, - "[task_dispatcher:poller] starting" - ); - let mut ticker = tokio::time::interval(Duration::from_secs(POLLER_TICK_SECONDS)); - ticker.tick().await; // skip the immediate fire so startup isn't slammed - loop { - ticker.tick().await; - if let Err(e) = poll_once().await { - tracing::warn!(error = %e, "[task_dispatcher:poller] tick failed (continuing)"); - } - } - }); -} - -/// One poller tick: sweep each executor board and dispatch its highest-urgency -/// dispatchable card, if any and if capacity allows. `pub(crate)` so tests can -/// drive a tick without the real interval. -/// -/// Two boards are swept, each independently (own stale-reclaim + single -/// `in_progress` cap): -/// - **`user-tasks`** (the kanban work board) — always swept, but only -/// **agent-assigned** cards are run, so a human's manually-created todo is -/// never auto-executed. This is where tasks approved out of the inbox run. -/// - **`task-sources`** (the proactive inbox) — swept only when ingestion is -/// enabled. With plan-approval required this only ever parks a `todo` at -/// `awaiting_approval`; it runs a card directly only when approval is off. -/// Kept in the sweep so its stale/wedged runs are still reclaimed. -pub(crate) async fn poll_once() -> Result<(), String> { - // Gate on background-AI capacity (autonomy / power / pause). Dropping the - // permit immediately is fine: this is a "may background work start now" - // check; the run itself is detached. - let Some(_permit) = crate::openhuman::scheduler_gate::wait_for_capacity().await else { - tracing::debug!("[task_dispatcher:poller] scheduler gate denied capacity; idle tick"); - return Ok(()); - }; - - let config = Config::load_or_init() - .await - .map_err(|e| format!("load config: {e:#}"))?; - - // (board location, agent_assigned_only). user-tasks first — it's the real - // work board; task-sources is only included for parking + reclaim. - let mut boards: Vec<(BoardLocation, bool)> = vec![( - BoardLocation::Thread { - workspace_dir: config.workspace_dir.clone(), - thread_id: USER_TASKS_THREAD_ID.to_string(), - }, - true, - )]; - if config.task_sources.enabled { - boards.push(( - BoardLocation::Thread { - workspace_dir: config.workspace_dir.clone(), - thread_id: crate::openhuman::task_sources::TASK_SOURCES_THREAD_ID.to_string(), - }, - false, - )); - } - - for (location, agent_assigned_only) in boards { - if let Err(e) = poll_board(&location, agent_assigned_only).await { - tracing::warn!( - thread_id = ?location.thread_id(), - error = %e, - "[task_dispatcher:poller] board sweep failed (continuing)" - ); - } - } - Ok(()) -} - -/// Sweep one board: reclaim stale runs, then (unless one is already running) -/// dispatch its highest-urgency dispatchable card. When `agent_assigned_only` -/// is set, only cards with an `assigned_agent` are eligible — the guard that -/// keeps the poller off a human's manual `user-tasks` cards. -async fn poll_board(location: &BoardLocation, agent_assigned_only: bool) -> Result<(), String> { - // Reclaim stale/wedged runs before looking for new work. Reclaimed - // cards move back to `todo` (re-dispatchable) so they appear in the - // snapshot below and can be picked up in the same tick. - match runs::reclaim_stale(location, &RunLimits::default()) { - Ok(result) if result.reclaimed_count > 0 || result.blocked_count > 0 => { - tracing::info!( - thread_id = ?location.thread_id(), - reclaimed = result.reclaimed_count, - blocked = result.blocked_count, - "[task_dispatcher:poller] stale runs reclaimed" - ); - } - Err(e) => { - tracing::warn!( - thread_id = ?location.thread_id(), - error = %e, - "[task_dispatcher:poller] stale reclaim failed (continuing)" - ); - } - _ => {} - } - - let snapshot = ops::list(location)?; - - // `enforce_single_in_progress` caps the board at one running card, so if - // one is already in progress there's nothing for this tick to claim. - if snapshot - .cards - .iter() - .any(|c| c.status == TaskCardStatus::InProgress) - { - return Ok(()); - } - - let Some(card) = pick_next_todo(&snapshot.cards, agent_assigned_only) else { - return Ok(()); - }; - - tracing::info!( - card_id = %card.id, - thread_id = ?location.thread_id(), - urgency = card_urgency(&card), - agent_assigned_only, - "[task_dispatcher:poller] dispatching highest-urgency dispatchable card" - ); - dispatch_card(location.clone(), card).await.map(|_| ()) -} - -/// Highest-urgency dispatchable card (`todo` or approved `ready`; urgency from -/// `source_metadata.urgency`, default 0.0; ties broken toward the lower board -/// `order`). Returns a clone. `dispatch_card` then either runs a `ready` card -/// or parks a `todo` one for approval, per the autonomy setting. -/// -/// When `agent_assigned_only` is set, cards without an `assigned_agent` are -/// excluded — used on the `user-tasks` board so the poller runs only -/// agent-generated tasks and never picks up a human's manually-created card. -fn pick_next_todo(cards: &[TaskBoardCard], agent_assigned_only: bool) -> Option { - cards - .iter() - .filter(|c| matches!(c.status, TaskCardStatus::Todo | TaskCardStatus::Ready)) - .filter(|c| { - !agent_assigned_only - || c.assigned_agent - .as_deref() - .map(|a| !a.trim().is_empty()) - .unwrap_or(false) - }) - .max_by(|a, b| { - card_urgency(a) - .partial_cmp(&card_urgency(b)) - .unwrap_or(std::cmp::Ordering::Equal) - // On equal urgency, prefer the lower `order` (earlier card): - // reversing the order comparison makes it the "greater" pick. - .then(b.order.cmp(&a.order)) - }) - .cloned() -} - -/// Whether a card must be parked at `awaiting_approval` before it can run. -/// -/// The global `require_task_plan_approval` setting applies *unless* the card is -/// explicitly marked `approval_mode = NotRequired` — a per-card opt-out for -/// tasks that have already cleared human review (e.g. approved out of the -/// `task-sources` inbox onto `user-tasks`). Per-card opt-out wins over the -/// global default; without this, an already-approved card would be re-parked -/// and stranded. -fn requires_plan_approval(global_required: bool, approval_mode: Option<&TaskApprovalMode>) -> bool { - global_required && approval_mode != Some(&TaskApprovalMode::NotRequired) -} - -fn card_urgency(card: &TaskBoardCard) -> f64 { - card.source_metadata - .as_ref() - .and_then(|m| m.get("urgency")) - .and_then(serde_json::Value::as_f64) - .unwrap_or(0.0) -} - -#[cfg(test)] -mod tests { - use super::*; - use serde_json::json; - - #[tokio::test] - async fn active_run_registry_take_is_once() { - // Race-safety: the completing run and a concurrent cancel both call - // `take_active_run`; exactly one gets `Some` (and owns the write-back). - let (tx, _rx) = tokio::sync::watch::channel(false); - let handle = tokio::spawn(async { std::future::pending::<()>().await }); - let key = "task-cancel-registry-test"; - register_active_run( - key.to_string(), - ActiveRun { - abort: handle.abort_handle(), - hb_cancel: tx, - location: BoardLocation::Scratch, - card_id: "c1".to_string(), - run_id: "r1".to_string(), - }, - ); - assert!(take_active_run(key).is_some(), "first take owns the run"); - assert!( - take_active_run(key).is_none(), - "second take gets nothing — write-back happens exactly once" - ); - handle.abort(); - } - - fn card(objective: Option<&str>) -> TaskBoardCard { - TaskBoardCard { - id: "task-1".into(), - title: "[GitHub] Fix login bug".into(), - status: TaskCardStatus::Todo, - objective: objective.map(str::to_string), - plan: vec![], - assigned_agent: None, - allowed_tools: vec![], - approval_mode: None, - acceptance_criteria: vec![], - evidence: vec![], - notes: None, - blocker: None, - session_thread_id: None, - source_metadata: None, - order: 0, - updated_at: String::new(), - } - } - - #[test] - fn prompt_uses_objective_then_falls_back_to_title() { - let p = build_task_prompt(&card(Some("Fix the login bug"))); - assert!(p.contains("Fix the login bug")); - assert!(!p.contains("[GitHub]")); - - let p2 = build_task_prompt(&card(None)); - assert!(p2.contains("[GitHub] Fix login bug")); - } - - #[test] - fn prompt_includes_plan_and_acceptance_criteria() { - let mut c = card(Some("Do it")); - c.plan = vec!["step one".into(), "step two".into()]; - c.acceptance_criteria = vec!["tests pass".into()]; - let p = build_task_prompt(&c); - assert!(p.contains("Plan:")); - assert!(p.contains("1. step one")); - assert!(p.contains("2. step two")); - assert!(p.contains("Acceptance criteria")); - assert!(p.contains("- tests pass")); - } - - #[test] - fn prompt_points_at_source_and_memory_when_metadata_present() { - let mut c = card(Some("Resolve issue")); - c.source_metadata = Some(json!({ - "provider": "github", - "repo": "octo/repo", - "external_id": "123", - "url": "https://github.com/octo/repo/issues/123", - })); - let p = build_task_prompt(&c); - assert!(p.contains("github octo/repo#123")); - assert!(p.contains("memory_recall")); - assert!(p.contains("https://github.com/octo/repo/issues/123")); - } - - #[test] - fn prompt_omits_source_block_without_metadata() { - let p = build_task_prompt(&card(Some("Do it"))); - assert!(!p.contains("memory_recall")); - assert!(!p.contains("record the outcome on the upstream source")); - } - - #[test] - fn prompt_includes_external_writeback_when_addressable() { - let mut c = card(Some("Resolve issue")); - c.source_metadata = Some(json!({ - "provider": "github", - "repo": "octo/repo", - "external_id": "123", - })); - let p = build_task_prompt(&c); - assert!(p.contains("record the outcome on the upstream source")); - assert!(p.contains("close/resolve the item")); - } - - #[test] - fn prompt_omits_writeback_when_not_addressable() { - // Urgency-only metadata (no provider/external_id) can't address an - // upstream item, so no write-back instruction. - let mut c = card(Some("Do it")); - c.source_metadata = Some(json!({ "urgency": 0.5 })); - let p = build_task_prompt(&c); - assert!(!p.contains("record the outcome on the upstream source")); - } - - #[test] - fn truncate_caps_long_strings() { - let s = "x".repeat(5_000); - let out = truncate_chars(&s, EVIDENCE_MAX_CHARS); - assert!(out.chars().count() <= EVIDENCE_MAX_CHARS); - assert!(out.ends_with('…')); - } - - fn card_with( - id: &str, - status: TaskCardStatus, - urgency: Option, - order: u32, - ) -> TaskBoardCard { - let mut c = card(Some("obj")); - c.id = id.into(); - c.status = status; - c.order = order; - c.source_metadata = urgency.map(|u| json!({ "urgency": u })); - c - } - - #[test] - fn poller_picks_highest_urgency_todo_skipping_other_statuses() { - let cards = vec![ - card_with("a", TaskCardStatus::Todo, Some(0.3), 0), - card_with("b", TaskCardStatus::Done, Some(0.99), 1), - card_with("c", TaskCardStatus::Todo, Some(0.8), 2), - card_with("d", TaskCardStatus::Todo, None, 3), - ]; - let picked = pick_next_todo(&cards, false).expect("a todo card is available"); - assert_eq!( - picked.id, "c", - "highest-urgency todo wins, done card ignored" - ); - } - - #[test] - fn poller_breaks_urgency_ties_toward_lower_order() { - let cards = vec![ - card_with("late", TaskCardStatus::Todo, Some(0.5), 5), - card_with("early", TaskCardStatus::Todo, Some(0.5), 2), - ]; - assert_eq!(pick_next_todo(&cards, false).unwrap().id, "early"); - } - - #[test] - fn poller_returns_none_when_no_todo_cards() { - let cards = vec![card_with("a", TaskCardStatus::Done, Some(0.9), 0)]; - assert!(pick_next_todo(&cards, false).is_none()); - } - - #[test] - fn poller_dispatches_ready_cards_and_skips_approval_states() { - // Approved `ready` cards are dispatchable; `awaiting_approval` and - // `rejected` are not. - let cards = vec![ - card_with("await", TaskCardStatus::AwaitingApproval, Some(0.99), 0), - card_with("rej", TaskCardStatus::Rejected, Some(0.95), 1), - card_with("ready", TaskCardStatus::Ready, Some(0.5), 2), - ]; - assert_eq!(pick_next_todo(&cards, false).unwrap().id, "ready"); - } - - #[test] - fn poller_prefers_higher_urgency_across_todo_and_ready() { - let cards = vec![ - card_with("ready-low", TaskCardStatus::Ready, Some(0.3), 0), - card_with("todo-high", TaskCardStatus::Todo, Some(0.9), 1), - ]; - assert_eq!(pick_next_todo(&cards, false).unwrap().id, "todo-high"); - } - - #[test] - fn poller_agent_only_skips_unassigned_cards() { - // On the user-tasks board we run only agent-assigned cards. A human's - // manual todo (no assigned_agent) must be skipped even at high urgency. - let mut human = card_with("human", TaskCardStatus::Todo, Some(0.99), 0); - human.assigned_agent = None; - let mut agent = card_with("agent", TaskCardStatus::Todo, Some(0.20), 1); - agent.assigned_agent = Some("orchestrator".into()); - let cards = vec![human, agent]; - - // Agent-only: the lower-urgency assigned card wins; the human card is invisible. - assert_eq!(pick_next_todo(&cards, true).unwrap().id, "agent"); - // Unfiltered (task-sources behaviour): highest urgency wins regardless. - assert_eq!(pick_next_todo(&cards, false).unwrap().id, "human"); - } - - #[test] - fn poller_agent_only_returns_none_when_all_unassigned() { - let mut a = card_with("a", TaskCardStatus::Todo, Some(0.9), 0); - a.assigned_agent = None; - let mut b = card_with("b", TaskCardStatus::Todo, Some(0.5), 1); - b.assigned_agent = Some(" ".into()); // blank handle is not "assigned" - let cards = vec![a, b]; - assert!(pick_next_todo(&cards, true).is_none()); - } - - #[test] - fn approval_gate_respects_global_and_per_card_optout() { - // Global off → never park. - assert!(!requires_plan_approval(false, None)); - assert!(!requires_plan_approval( - false, - Some(&TaskApprovalMode::Required) - )); - // Global on → park, unless the card opts out via NotRequired. - assert!(requires_plan_approval(true, None)); - assert!(requires_plan_approval( - true, - Some(&TaskApprovalMode::Required) - )); - assert!(!requires_plan_approval( - true, - Some(&TaskApprovalMode::NotRequired) - )); - } - - #[test] - fn progress_instruction_names_card_thread_and_tool() { - let s = build_progress_instruction("task-42", "user-tasks"); - assert!(s.contains("task-42")); - assert!(s.contains("user-tasks")); - assert!(s.contains("update_task")); - // It must instruct the agent to self-block (status: blocked + blocker) - // when it needs the user, so write_back can preserve that state. - assert!(s.contains("status: blocked")); - assert!(s.contains("blocker")); - } - - #[test] - fn resolver_defaults_to_orchestrator_for_unset_or_orchestrator_handle() { - let dir = tempfile::tempdir().unwrap(); - for handle in [None, Some(""), Some(" "), Some("orchestrator")] { - let r = resolve_executor(dir.path(), handle); - assert_eq!(r.agent_id, "orchestrator"); - assert_eq!(r.label, "default"); - assert!(r.prompt_suffix.is_none()); - } - } - - #[test] - fn resolver_uses_personality_branch_for_builtin_profile() { - // `load_profiles` returns built-in profiles for any empty workspace, so - // the personality branch is reachable with no fixture file. "research" - // is a built-in profile backed by the "researcher" agent. - let dir = tempfile::tempdir().unwrap(); - let r = resolve_executor(dir.path(), Some("research")); - assert_eq!(r.label, "personality:research"); - assert_eq!(r.agent_id, "researcher"); - let suffix = r.prompt_suffix.expect("personality preamble present"); - assert!(suffix.contains("acting as the personality `research`")); - } - - #[test] - fn resolver_degrades_to_default_for_unresolved_handle() { - let dir = tempfile::tempdir().unwrap(); - let r = resolve_executor(dir.path(), Some("no-such-executor-xyz")); - assert_eq!(r.agent_id, "orchestrator"); - assert_eq!(r.label, "default-fallback"); - assert!(r.prompt_suffix.is_none()); - } - - fn board_loc(dir: &std::path::Path) -> BoardLocation { - BoardLocation::Thread { - workspace_dir: dir.to_path_buf(), - thread_id: "t1".to_string(), - } - } - - #[test] - fn write_back_marks_done_with_evidence_on_success() { - let dir = tempfile::tempdir().unwrap(); - let loc = board_loc(dir.path()); - let id = ops::add(&loc, "do the thing", CardPatch::default()) - .unwrap() - .cards[0] - .id - .clone(); - ops::update_status(&loc, &id, TaskCardStatus::InProgress).unwrap(); - - write_back( - &loc, - &id, - "run-1", - Ok("completed: opened PR #5".to_string()), - ); - - let card = ops::list(&loc) - .unwrap() - .cards - .into_iter() - .find(|c| c.id == id) - .unwrap(); - assert_eq!(card.status, TaskCardStatus::Done); - assert!(card.evidence.iter().any(|e| e.contains("opened PR #5"))); - } - - #[test] - fn write_back_preserves_agent_set_blocked_on_clean_run() { - // The run marked its own card `blocked` (needs user input) via - // update_task, then ended cleanly. write_back must NOT force it to - // `done` — the task stays blocked, with the agent's blocker intact, - // awaiting the user. - let dir = tempfile::tempdir().unwrap(); - let loc = board_loc(dir.path()); - let id = ops::add(&loc, "update alan", CardPatch::default()) - .unwrap() - .cards[0] - .id - .clone(); - ops::update_status(&loc, &id, TaskCardStatus::InProgress).unwrap(); - // Agent self-blocks mid-run, as build_progress_instruction asks it to. - ops::edit( - &loc, - &id, - CardPatch { - status: Some(TaskCardStatus::Blocked), - blocker: Some("Slack isn't connected — confirm how to reach Alan".to_string()), - ..Default::default() - }, - ) - .unwrap(); - - // Run returns Ok (the turn finished) — but the card is self-blocked. - write_back( - &loc, - &id, - "run-2", - Ok("I checked GitHub and memory…".to_string()), - ); - - let card = ops::list(&loc) - .unwrap() - .cards - .into_iter() - .find(|c| c.id == id) - .unwrap(); - assert_eq!( - card.status, - TaskCardStatus::Blocked, - "a clean run over a self-blocked card must stay blocked, not auto-done" - ); - assert_eq!( - card.blocker.as_deref(), - Some("Slack isn't connected — confirm how to reach Alan"), - "the agent's blocker reason is preserved" - ); - } - - #[test] - fn write_back_marks_blocked_with_reason_on_failure() { - let dir = tempfile::tempdir().unwrap(); - let loc = board_loc(dir.path()); - let id = ops::add(&loc, "do the thing", CardPatch::default()) - .unwrap() - .cards[0] - .id - .clone(); - ops::update_status(&loc, &id, TaskCardStatus::InProgress).unwrap(); - - write_back(&loc, &id, "run-1", Err("agent build failed".to_string())); - - let card = ops::list(&loc) - .unwrap() - .cards - .into_iter() - .find(|c| c.id == id) - .unwrap(); - assert_eq!(card.status, TaskCardStatus::Blocked); - assert!(card - .blocker - .as_deref() - .unwrap_or_default() - .contains("agent build failed")); - } -} diff --git a/src/openhuman/agent/task_dispatcher/dispatch.rs b/src/openhuman/agent/task_dispatcher/dispatch.rs new file mode 100644 index 0000000000..0910ff7fd6 --- /dev/null +++ b/src/openhuman/agent/task_dispatcher/dispatch.rs @@ -0,0 +1,196 @@ +//! Card dispatch: claim a card and spawn an autonomous run. + +use crate::openhuman::agent::task_board::{TaskBoardCard, TaskCardStatus}; +use crate::openhuman::agent::task_session; +use crate::openhuman::config::Config; +use crate::openhuman::todos::ops::{self, BoardLocation, CardPatch}; +use crate::openhuman::todos::runs; + +use super::executor::{resolve_executor, run_autonomous, TASK_RUN_MAX_ITERATIONS}; +use super::poller::requires_plan_approval; +use super::prompt::{build_progress_instruction, build_task_prompt}; +use super::registry::{register_active_run, take_active_run}; +use super::types::{ActiveRun, DispatchOutcome}; + +/// Dispatch one card: gate on plan approval, claim it, run an autonomous turn, +/// write the result back. +/// +/// Returns `Ok(Running)` once the card is claimed and the detached run is +/// spawned, `Ok(AwaitingApproval)` if the card was parked for human approval, +/// or `Err` *without* spawning when the card is no longer claimable — its +/// freshly-loaded status isn't `Todo`/`Ready` (already running/done, or another +/// dispatcher won the claim). Benign: the poller retries next tick. +pub async fn dispatch_card( + location: BoardLocation, + card: TaskBoardCard, +) -> Result { + let card_id = card.id.clone(); + + let config = Config::load_or_init() + .await + .map_err(|e| format!("load config: {e:#}"))?; + + // Plan-approval gate: when required, a `todo` card is parked for human + // approval before it can run. `Ready` (already approved) bypasses. We + // attempt the AwaitingApproval claim first so the gate is also atomic — + // two dispatchers racing the same Todo card won't both park it. + // + // A card explicitly marked `approval_mode = NotRequired` also bypasses the + // gate: it has already cleared human review (e.g. a task approved out of + // the `task-sources` inbox onto the `user-tasks` board, stamped + // `not_required` at approval time). Re-parking it under the global default + // would strand it on a board nobody approves from. Per-card opt-out wins. + if requires_plan_approval( + config.autonomy.require_task_plan_approval, + card.approval_mode.as_ref(), + ) { + match ops::claim_card( + &location, + &card_id, + &[TaskCardStatus::Todo], + TaskCardStatus::AwaitingApproval, + ) { + Ok(_parked) => { + if let Some(thread_id) = location.thread_id() { + crate::core::event_bus::publish_global( + crate::core::event_bus::DomainEvent::TaskPlanAwaitingApproval { + card_id: card_id.clone(), + thread_id: thread_id.to_string(), + }, + ); + } + tracing::info!(card_id = %card_id, "[task_dispatcher] parked card awaiting plan approval"); + return Ok(DispatchOutcome::AwaitingApproval); + } + Err(_) => { + // Card wasn't `Todo` — fall through to the main claim path, + // which handles `Ready` cards and rejects everything else. + } + } + } + + // Atomic claim: transition Todo|Ready → InProgress under a per-board + // lock so concurrent dispatchers cannot both succeed. The returned card + // is the freshly-loaded snapshot — the prompt uses it, not the caller's + // potentially stale copy. + let fresh_card = ops::claim_card( + &location, + &card_id, + &[TaskCardStatus::Todo, TaskCardStatus::Ready], + TaskCardStatus::InProgress, + ) + .map_err(|e| format!("[task_dispatcher] claim rejected for {card_id}: {e}"))?; + + let mut prompt = build_task_prompt(&fresh_card); + // Tell the run which card it owns so it can post live progress via the + // `update_task` tool (notes/evidence) as it works. The terminal + // `done`/`blocked` transition is still stamped deterministically by + // `write_back` from the run outcome. + if let Some(thread_id) = location.thread_id() { + prompt.push_str(&build_progress_instruction(&card_id, thread_id)); + } + + let run_id = uuid::Uuid::new_v4().to_string(); + + // Resolve which executor runs this card: default agent, a personality, or + // a skill — one autonomous-run interface, three presets (G4 + G3). + let executor = resolve_executor(&config.workspace_dir, fresh_card.assigned_agent.as_deref()); + tracing::info!( + card_id = %card_id, + run_id = %run_id, + executor = %executor.label, + agent_id = %executor.agent_id, + prompt_chars = prompt.chars().count(), + "[task_dispatcher] card claimed (→in_progress), spawning autonomous run" + ); + + if let Err(e) = runs::create_run(&location, &run_id, &card_id, &executor.label) { + tracing::warn!( + run_id = %run_id, + card_id = %card_id, + error = %e, + "[task_dispatcher] failed to create run record (proceeding without liveness tracking)" + ); + } + + let (hb_cancel_tx, hb_cancel_rx) = tokio::sync::watch::channel(false); + runs::spawn_heartbeat_task(location.clone(), run_id.clone(), hb_cancel_rx); + + // Materialise this autonomous run as a top-level task-session thread so it + // surfaces in Conversations → Tasks like a manually-run todo. Best-effort: + // `None` just means the run streams nowhere (headless), exactly as before. + let session_thread_id = task_session::create_session_thread( + config.workspace_dir.clone(), + &fresh_card, + &run_id, + &prompt, + ); + + // Stamp the session thread onto the card so the board UI can offer a + // "View session" jump into Conversations. Best-effort: a failure here just + // means the link is unavailable; the run proceeds regardless. + if let Some(thread_id) = session_thread_id.as_deref() { + if let Err(e) = ops::set_session_thread(&location, &card_id, Some(thread_id.to_string())) { + tracing::warn!( + card_id = %card_id, + thread_id = %thread_id, + error = %e, + "[task_dispatcher] failed to stamp session thread on card (View session link unavailable)" + ); + } + } + + let run_id_for_return = run_id.clone(); + let location_for_run = location.clone(); + // Clones for the active-run registry (the originals move into the task). + let reg_thread = session_thread_id.clone(); + let reg_location = location.clone(); + let reg_card_id = card_id.clone(); + let reg_run_id = run_id.clone(); + let hb_cancel_for_task = hb_cancel_tx.clone(); + let task_thread = session_thread_id.clone(); + // Gate the task on registration: a fast-finishing run could otherwise reach + // its terminal `take_active_run` before `register_active_run` below has run, + // see no entry, and skip `write_back` — leaving card/run state inconsistent. + // The task parks on `start_rx` until we release it after registration. + let (start_tx, start_rx) = tokio::sync::oneshot::channel::<()>(); + let join = tokio::spawn(async move { + let _ = start_rx.await; + let outcome = run_autonomous(config, &executor, &prompt, &run_id, session_thread_id).await; + let _ = hb_cancel_for_task.send(true); + // Race with a concurrent cancel: whoever removes the registry entry owns + // the write-back, so it runs exactly once. No entry (no session thread, + // or a cancel already took it) → we skip it. + let still_ours = match &task_thread { + Some(tid) => take_active_run(tid).is_some(), + None => true, + }; + if still_ours { + super::executor::write_back(&location_for_run, &card_id, &run_id, outcome); + } + }); + + // Register the run so the chat Cancel (web `channel_web_cancel` → + // `cancel_session`) can abort it — task threads aren't web-channel turns. + if let Some(tid) = reg_thread { + register_active_run( + tid, + ActiveRun { + abort: join.abort_handle(), + hb_cancel: hb_cancel_tx, + location: reg_location, + card_id: reg_card_id, + run_id: reg_run_id, + }, + ); + } + // Registration (if any) is in place — release the task to start running. + let _ = start_tx.send(()); + + Ok(DispatchOutcome::Running { + run_id: run_id_for_return, + }) +} + +#[allow(unused_imports)] +use CardPatch as _CardPatch; diff --git a/src/openhuman/agent/task_dispatcher/executor.rs b/src/openhuman/agent/task_dispatcher/executor.rs new file mode 100644 index 0000000000..16caf13929 --- /dev/null +++ b/src/openhuman/agent/task_dispatcher/executor.rs @@ -0,0 +1,346 @@ +//! Executor resolution and autonomous run logic. +//! +//! Resolves a card's `assigned_agent` to a concrete executor (personality, +//! skill, or built-in agent) and drives the autonomous agent turn, writing +//! the outcome back to the board when done. + +use std::path::Path; + +use crate::openhuman::agent::harness::definition::{AgentDefinitionRegistry, PromptSource}; +use crate::openhuman::agent::harness::session::Agent; +use crate::openhuman::agent::harness::subagent_runner::with_autonomous_iter_cap; +use crate::openhuman::agent::personality_paths::PersonalityContext; +use crate::openhuman::agent::task_board::TaskCardStatus; +use crate::openhuman::agent::task_session; +use crate::openhuman::config::Config; +use crate::openhuman::todos::ops::{self, BoardLocation, CardPatch}; +use crate::openhuman::todos::runs::{self, RunOutcome}; + +use super::types::ResolvedExecutor; + +/// Max chars of a personality SOUL.md / MEMORY.md or skill guideline block +/// folded into the agent's system-prompt suffix. +pub(super) const EXECUTOR_PREAMBLE_MAX_CHARS: usize = 800; + +/// Tool-iteration ceiling for an autonomous task run. Matches the skill-run +/// cap — a task brief is the same shape of bounded autonomous work. +pub(super) const TASK_RUN_MAX_ITERATIONS: usize = 200; + +/// Max chars of the agent's final output retained as board `evidence`. +pub(super) const EVIDENCE_MAX_CHARS: usize = 2_000; + +/// Map a card's `assigned_agent` handle to one of three executor presets: +/// **personality** (scoped SOUL/MEMORY folded into the prompt suffix, run as +/// that profile's agent), **skill** (orchestrator seeded with the skill's +/// `SKILL.md` guidelines), or **built-in agent**. An unset or unresolved handle +/// degrades to the default `orchestrator` — "use the personality if valid, +/// otherwise the default agent." +pub(super) fn resolve_executor(workspace_dir: &Path, assigned: Option<&str>) -> ResolvedExecutor { + let Some(handle) = assigned.map(str::trim).filter(|s| !s.is_empty()) else { + return ResolvedExecutor::default_agent(); + }; + if handle == "orchestrator" { + return ResolvedExecutor::default_agent(); + } + + // 1) Personality (#2895): a user-defined profile with scoped identity. + if let Ok(state) = crate::openhuman::agent::profiles::load_profiles(workspace_dir) { + if let Some(profile) = state.profiles.iter().find(|p| p.id == handle) { + let ctx = PersonalityContext::from_profile(workspace_dir, profile.clone()); + let mut preamble = format!( + "You are acting as the personality `{}` (\"{}\"). {}", + profile.id, profile.name, profile.description + ); + if let Some(soul) = &ctx.soul_md_override { + preamble.push_str("\n\n[Personality SOUL.md]\n"); + preamble.push_str(&truncate_chars(soul, EXECUTOR_PREAMBLE_MAX_CHARS)); + } + if let Some(mem) = &ctx.memory_md_override { + preamble.push_str("\n\n[Personality MEMORY.md]\n"); + preamble.push_str(&truncate_chars(mem, EXECUTOR_PREAMBLE_MAX_CHARS)); + } + return ResolvedExecutor { + agent_id: profile.agent_id.clone(), + prompt_suffix: Some(preamble), + label: format!("personality:{handle}"), + }; + } + } + + // 2) Workflow (#2824): the same autonomous run, seeded with SKILL.md. + if let Some(skill) = crate::openhuman::workflows::registry::get_workflow(workspace_dir, handle) + { + let guidelines = match &skill.definition.system_prompt { + PromptSource::Inline(s) => truncate_chars(s, EXECUTOR_PREAMBLE_MAX_CHARS), + _ => String::new(), + }; + let suffix = format!( + "You are executing this task as the skill `{handle}`. Follow these skill \ + guidelines exactly:\n\n{guidelines}" + ); + return ResolvedExecutor { + agent_id: "orchestrator".to_string(), + prompt_suffix: Some(suffix), + label: format!("skill:{handle}"), + }; + } + + // 3) Built-in agent definition. + if AgentDefinitionRegistry::global() + .and_then(|r| r.get(handle)) + .is_some() + { + return ResolvedExecutor { + agent_id: handle.to_string(), + prompt_suffix: None, + label: format!("agent:{handle}"), + }; + } + + // 4) Unresolved → degrade to the default agent (don't fail the card). + tracing::warn!( + handle = %handle, + "[task_dispatcher] assigned executor did not resolve to a personality/skill/agent; \ + using default orchestrator" + ); + ResolvedExecutor { + label: "default-fallback".to_string(), + ..ResolvedExecutor::default_agent() + } +} + +/// Run the resolved executor as a single autonomous turn using the +/// already-loaded config. The executor's prompt suffix (personality identity or +/// skill guidelines) rides in the system prompt; the card goal is the turn input. +/// +/// SECURITY / threat model (prompt injection): the card objective/content and +/// `source_metadata` derive from external, attacker-influenceable text (e.g. a +/// GitHub issue body anyone in a watched repo can file), and this background +/// run is gate-free at the per-tool level (background turns auto-allow, like +/// skill runs) while `build_task_prompt` may instruct it to write back to the +/// upstream item. The interactive checkpoint is therefore the up-front +/// **plan-approval gate** (`require_task_plan_approval`), which a human reviews +/// before the run starts — not per-action egress/write approval. Egress is +/// widened to `*` only when the operator set no explicit allow-list (matching +/// skill runs, since real task work needs broad reach: git, package registries, +/// provider APIs). Tightening egress to the source provider's domains for +/// source-ingested runs is a considered follow-up (it would break general task +/// work, so it needs to key off provenance) — tracked for a later PR. +pub(super) async fn run_autonomous( + mut config: Config, + executor: &ResolvedExecutor, + prompt: &str, + run_id: &str, + session_thread_id: Option, +) -> Result { + config.agent.max_tool_iterations = TASK_RUN_MAX_ITERATIONS; + // Match skill-run egress handling: only widen to the permissive default + // when the operator hasn't configured an explicit allow-list. See the + // threat-model note above on why `*` is the default here. + if config.http_request.allowed_domains.is_empty() { + config.http_request.allowed_domains = vec!["*".to_string()]; + } + + let mut agent = Agent::from_config_for_agent_with_profile( + &config, + &executor.agent_id, + None, + executor.prompt_suffix.clone(), + ) + .map_err(|e| format!("build agent: {e:#}"))?; + agent.set_event_context(run_id.to_string(), "task"); + agent.set_agent_definition_name(format!( + "task-{}-{}", + executor.label, + run_id.get(..8).unwrap_or(run_id) + )); + + // Stream this autonomous run into its task-session thread exactly like a + // chat turn: wire the agent's progress into the web-channel bridge with the + // broadcast client id "system" — the same mechanism cron/welcome agents use. + // The bridge (a) emits live text/tool socket events that any client viewing + // the thread renders in real time (the frontend keys by thread_id), and + // (b) persists a TurnStateMirror so the tool timeline replays when the + // session is opened mid/after run. Best-effort — with no session thread the + // run is headless, exactly as before this feature. + let workspace_dir = config.workspace_dir.clone(); + if let Some(thread_id) = session_thread_id.as_deref() { + let (progress_tx, progress_rx) = tokio::sync::mpsc::channel(64); + agent.set_on_progress(Some(progress_tx)); + crate::openhuman::channels::providers::web::spawn_progress_bridge( + progress_rx, + "system".to_string(), + thread_id.to_string(), + run_id.to_string(), + crate::openhuman::threads::turn_state::TurnStateStore::new(workspace_dir.clone()), + crate::openhuman::channels::providers::web::ChatRequestMetadata::default(), + config.clone(), + ); + } + + // Sub-agent task runs are internal to the agent harness — the user + // already authorized the parent turn that dispatched this task. Label + // as CLI so the approval gate doesn't fail closed on internal + // sub-agent invocations. + let run = crate::openhuman::agent::turn_origin::with_origin( + crate::openhuman::agent::turn_origin::AgentTurnOrigin::Cli, + with_autonomous_iter_cap(TASK_RUN_MAX_ITERATIONS, agent.run_single(prompt)), + ); + let result = match session_thread_id.as_deref() { + Some(thread_id) => { + crate::openhuman::inference::provider::thread_context::with_thread_id( + thread_id.to_string(), + run, + ) + .await + } + None => run.await, + } + .map_err(|e| format!("{e:#}")); + + // Emit the terminal chat event so a client viewing the session stops + // "processing" and finalizes the assistant bubble — the SAME chat_done / + // chat_error the web channel emits at the end of a normal turn. The + // progress bridge only streams intermediate deltas; without this terminal + // signal the live-streamed session spins forever. Broadcast as "system" so + // any viewer of the thread receives it (frontend keys by thread_id). + if let Some(thread_id) = session_thread_id.as_deref() { + match &result { + Ok(response) => { + crate::openhuman::channels::providers::presentation::deliver_response( + "system", + thread_id, + run_id, + response, + prompt, + &[], + ) + .await; + } + Err(err) => { + crate::openhuman::channels::providers::web::publish_web_channel_event( + crate::core::socketio::WebChannelEvent { + event: "chat_error".to_string(), + client_id: "system".to_string(), + thread_id: thread_id.to_string(), + request_id: run_id.to_string(), + message: Some(err.clone()), + error_type: Some("agent_error".to_string()), + ..Default::default() + }, + ); + } + } + // Persist the final response as the closing assistant message so a + // reopened session shows the outcome like a finished manual run. + task_session::append_final(workspace_dir, thread_id, &result); + } + result +} + +/// Deterministic board write-back: the dispatcher owns the card lifecycle. +/// Success → `done` + evidence; failure → `blocked` + blocker reason. An +/// external write failure here is logged, never propagated — the run already +/// happened. +pub(super) fn write_back( + location: &BoardLocation, + card_id: &str, + run_id: &str, + outcome: Result, +) { + // Respect a status the run set for itself: if the agent marked the card + // `blocked` via `update_task` (it needs a decision/input from the user, or + // genuinely cannot proceed), leave it blocked — do NOT force-complete it. + // The task then stays paused in that state until the user responds, instead + // of a "clean turn" being silently recorded as done. Otherwise mark done + // with evidence; a run error marks blocked with the error as the blocker. + let agent_self_blocked = + outcome.is_ok() && current_card_status(location, card_id) == Some(TaskCardStatus::Blocked); + + let patch = if agent_self_blocked { + tracing::info!( + card_id = %card_id, + run_id = %run_id, + "[task_dispatcher] run ended with card self-blocked → leaving blocked (awaiting user input), not auto-completing" + ); + None + } else { + match &outcome { + Ok(output) => { + tracing::info!( + card_id = %card_id, + run_id = %run_id, + output_chars = output.chars().count(), + "[task_dispatcher] run complete → done" + ); + Some(CardPatch { + status: Some(TaskCardStatus::Done), + evidence: Some(vec![truncate_chars(output.trim(), EVIDENCE_MAX_CHARS)]), + ..Default::default() + }) + } + Err(err) => { + tracing::warn!( + card_id = %card_id, + run_id = %run_id, + error = %err, + "[task_dispatcher] run failed → blocked" + ); + Some(CardPatch { + status: Some(TaskCardStatus::Blocked), + blocker: Some(truncate_chars(err, EVIDENCE_MAX_CHARS)), + ..Default::default() + }) + } + } + }; + + if let Some(patch) = patch { + if let Err(e) = ops::edit(location, card_id, patch) { + tracing::error!( + card_id = %card_id, + run_id = %run_id, + error = %e, + "[task_dispatcher] board write-back failed (run outcome lost from board)" + ); + } + } + + let (run_outcome, run_error, run_evidence) = match &outcome { + Ok(output) => ( + RunOutcome::Success, + None, + vec![truncate_chars(output.trim(), EVIDENCE_MAX_CHARS)], + ), + Err(err) => ( + RunOutcome::Failed, + Some(truncate_chars(err, EVIDENCE_MAX_CHARS)), + Vec::new(), + ), + }; + if let Err(e) = runs::complete_run(location, run_id, run_outcome, run_error, run_evidence) { + tracing::warn!( + run_id = %run_id, + error = %e, + "[task_dispatcher] run record completion failed" + ); + } +} + +/// Current persisted status of a card, or `None` if the board can't be read or +/// the card is gone. Used by `write_back` to detect a run that blocked itself. +fn current_card_status(location: &BoardLocation, card_id: &str) -> Option { + ops::list(location) + .ok() + .and_then(|snap| snap.cards.into_iter().find(|c| c.id == card_id)) + .map(|c| c.status) +} + +pub(super) fn truncate_chars(s: &str, max: usize) -> String { + if s.chars().count() <= max { + return s.to_string(); + } + let mut out: String = s.chars().take(max.saturating_sub(1)).collect(); + out.push('…'); + out +} diff --git a/src/openhuman/agent/task_dispatcher/mod.rs b/src/openhuman/agent/task_dispatcher/mod.rs new file mode 100644 index 0000000000..fbef72323f --- /dev/null +++ b/src/openhuman/agent/task_dispatcher/mod.rs @@ -0,0 +1,40 @@ +//! Deterministic task-card dispatcher. +//! +//! Turns a [`TaskBoardCard`] into work: it **claims** the card via a +//! compare-and-set (re-load the board and transition only a `Todo`/`Ready` +//! card to `in_progress`, so a stale/concurrent re-dispatch of the same card +//! is rejected), runs a single **autonomous agent turn** toward the card's +//! objective, and **writes the outcome back** to the board (`done` + evidence +//! on success, `blocked` + reason on failure). +//! +//! This is the one executor both dispatch paths converge on: +//! - the **board poller** (cards that arrived without a proactive trigger), and +//! - the **proactive triage** arm (`agent::triage::apply_decision`), once it has +//! decided to act on a task-board card. +//! +//! The runner mirrors `skills::spawn_workflow_run_background`: build the +//! `orchestrator` agent fresh inside a detached task, cap tool iterations, and +//! run `agent.run_single` under `with_autonomous_iter_cap`. PR-4 generalises the +//! executor from the default agent to a resolved personality/skill; this module +//! keeps the default-agent path so the pipeline runs end-to-end first. + +mod dispatch; +mod executor; +mod poller; +mod prompt; +mod registry; +mod types; + +#[cfg(test)] +mod tests; + +// ── Public API ──────────────────────────────────────────────────────────────── + +pub use dispatch::dispatch_card; +pub use poller::start_board_poller; +pub use prompt::build_task_prompt; +pub use registry::cancel_session; +pub use types::DispatchOutcome; + +// `pub(crate)` for test drivers. +pub(crate) use poller::poll_once; diff --git a/src/openhuman/agent/task_dispatcher/poller.rs b/src/openhuman/agent/task_dispatcher/poller.rs new file mode 100644 index 0000000000..6a576b87c2 --- /dev/null +++ b/src/openhuman/agent/task_dispatcher/poller.rs @@ -0,0 +1,215 @@ +//! Board poller: periodic sweep that dispatches dispatchable cards. +//! +//! Each tick scans the `task-sources` board and the `user-tasks` board, +//! reclaims stale runs, and dispatches the highest-urgency dispatchable card +//! via [`dispatch_card`], gated by background-AI capacity (`scheduler_gate`). + +use std::sync::OnceLock; +use std::time::Duration; + +use crate::openhuman::agent::task_board::{TaskApprovalMode, TaskBoardCard, TaskCardStatus}; +use crate::openhuman::config::Config; +use crate::openhuman::todos::ops::{self, BoardLocation, USER_TASKS_THREAD_ID}; +use crate::openhuman::todos::runs::{self, RunLimits}; + +use super::dispatch::dispatch_card; + +/// How often the poller wakes to look for a dispatchable card. +const POLLER_TICK_SECONDS: u64 = 60; + +static POLLER_STARTED: OnceLock<()> = OnceLock::new(); + +/// Spawn the board poller. Idempotent — only the first call installs the loop. +/// +/// Each tick it scans the `task-sources` board and dispatches the +/// highest-urgency `todo` card via [`dispatch_card`], gated by background-AI +/// capacity (`scheduler_gate`). This is the catch-all for cards that arrive +/// without a proactive trigger (`TodoOnly` sources, manual cards, or proactive +/// turns the gate skipped). Cards that *did* get a proactive trigger are +/// dispatched by the triage arm; the claim-based lock makes firing both safe. +pub fn start_board_poller() { + if POLLER_STARTED.set(()).is_err() { + tracing::debug!("[task_dispatcher:poller] already running, skipping start"); + return; + } + tokio::spawn(async move { + tracing::info!( + tick_seconds = POLLER_TICK_SECONDS, + "[task_dispatcher:poller] starting" + ); + let mut ticker = tokio::time::interval(Duration::from_secs(POLLER_TICK_SECONDS)); + ticker.tick().await; // skip the immediate fire so startup isn't slammed + loop { + ticker.tick().await; + if let Err(e) = poll_once().await { + tracing::warn!(error = %e, "[task_dispatcher:poller] tick failed (continuing)"); + } + } + }); +} + +/// One poller tick: sweep each executor board and dispatch its highest-urgency +/// dispatchable card, if any and if capacity allows. `pub(crate)` so tests can +/// drive a tick without the real interval. +/// +/// Two boards are swept, each independently (own stale-reclaim + single +/// `in_progress` cap): +/// - **`user-tasks`** (the kanban work board) — always swept, but only +/// **agent-assigned** cards are run, so a human's manually-created todo is +/// never auto-executed. This is where tasks approved out of the inbox run. +/// - **`task-sources`** (the proactive inbox) — swept only when ingestion is +/// enabled. With plan-approval required this only ever parks a `todo` at +/// `awaiting_approval`; it runs a card directly only when approval is off. +/// Kept in the sweep so its stale/wedged runs are still reclaimed. +pub(crate) async fn poll_once() -> Result<(), String> { + // Gate on background-AI capacity (autonomy / power / pause). Dropping the + // permit immediately is fine: this is a "may background work start now" + // check; the run itself is detached. + let Some(_permit) = crate::openhuman::scheduler_gate::wait_for_capacity().await else { + tracing::debug!("[task_dispatcher:poller] scheduler gate denied capacity; idle tick"); + return Ok(()); + }; + + let config = Config::load_or_init() + .await + .map_err(|e| format!("load config: {e:#}"))?; + + // (board location, agent_assigned_only). user-tasks first — it's the real + // work board; task-sources is only included for parking + reclaim. + let mut boards: Vec<(BoardLocation, bool)> = vec![( + BoardLocation::Thread { + workspace_dir: config.workspace_dir.clone(), + thread_id: USER_TASKS_THREAD_ID.to_string(), + }, + true, + )]; + if config.task_sources.enabled { + boards.push(( + BoardLocation::Thread { + workspace_dir: config.workspace_dir.clone(), + thread_id: crate::openhuman::task_sources::TASK_SOURCES_THREAD_ID.to_string(), + }, + false, + )); + } + + for (location, agent_assigned_only) in boards { + if let Err(e) = poll_board(&location, agent_assigned_only).await { + tracing::warn!( + thread_id = ?location.thread_id(), + error = %e, + "[task_dispatcher:poller] board sweep failed (continuing)" + ); + } + } + Ok(()) +} + +/// Sweep one board: reclaim stale runs, then (unless one is already running) +/// dispatch its highest-urgency dispatchable card. When `agent_assigned_only` +/// is set, only cards with an `assigned_agent` are eligible — the guard that +/// keeps the poller off a human's manual `user-tasks` cards. +async fn poll_board(location: &BoardLocation, agent_assigned_only: bool) -> Result<(), String> { + // Reclaim stale/wedged runs before looking for new work. Reclaimed + // cards move back to `todo` (re-dispatchable) so they appear in the + // snapshot below and can be picked up in the same tick. + match runs::reclaim_stale(location, &RunLimits::default()) { + Ok(result) if result.reclaimed_count > 0 || result.blocked_count > 0 => { + tracing::info!( + thread_id = ?location.thread_id(), + reclaimed = result.reclaimed_count, + blocked = result.blocked_count, + "[task_dispatcher:poller] stale runs reclaimed" + ); + } + Err(e) => { + tracing::warn!( + thread_id = ?location.thread_id(), + error = %e, + "[task_dispatcher:poller] stale reclaim failed (continuing)" + ); + } + _ => {} + } + + let snapshot = ops::list(location)?; + + // `enforce_single_in_progress` caps the board at one running card, so if + // one is already in progress there's nothing for this tick to claim. + if snapshot + .cards + .iter() + .any(|c| c.status == TaskCardStatus::InProgress) + { + return Ok(()); + } + + let Some(card) = pick_next_todo(&snapshot.cards, agent_assigned_only) else { + return Ok(()); + }; + + tracing::info!( + card_id = %card.id, + thread_id = ?location.thread_id(), + urgency = card_urgency(&card), + agent_assigned_only, + "[task_dispatcher:poller] dispatching highest-urgency dispatchable card" + ); + dispatch_card(location.clone(), card).await.map(|_| ()) +} + +/// Highest-urgency dispatchable card (`todo` or approved `ready`; urgency from +/// `source_metadata.urgency`, default 0.0; ties broken toward the lower board +/// `order`). Returns a clone. `dispatch_card` then either runs a `ready` card +/// or parks a `todo` one for approval, per the autonomy setting. +/// +/// When `agent_assigned_only` is set, cards without an `assigned_agent` are +/// excluded — used on the `user-tasks` board so the poller runs only +/// agent-generated tasks and never picks up a human's manually-created card. +pub(super) fn pick_next_todo( + cards: &[TaskBoardCard], + agent_assigned_only: bool, +) -> Option { + cards + .iter() + .filter(|c| matches!(c.status, TaskCardStatus::Todo | TaskCardStatus::Ready)) + .filter(|c| { + !agent_assigned_only + || c.assigned_agent + .as_deref() + .map(|a| !a.trim().is_empty()) + .unwrap_or(false) + }) + .max_by(|a, b| { + card_urgency(a) + .partial_cmp(&card_urgency(b)) + .unwrap_or(std::cmp::Ordering::Equal) + // On equal urgency, prefer the lower `order` (earlier card): + // reversing the order comparison makes it the "greater" pick. + .then(b.order.cmp(&a.order)) + }) + .cloned() +} + +/// Whether a card must be parked at `awaiting_approval` before it can run. +/// +/// The global `require_task_plan_approval` setting applies *unless* the card is +/// explicitly marked `approval_mode = NotRequired` — a per-card opt-out for +/// tasks that have already cleared human review (e.g. approved out of the +/// `task-sources` inbox onto `user-tasks`). Per-card opt-out wins over the +/// global default; without this, an already-approved card would be re-parked +/// and stranded. +pub(super) fn requires_plan_approval( + global_required: bool, + approval_mode: Option<&TaskApprovalMode>, +) -> bool { + global_required && approval_mode != Some(&TaskApprovalMode::NotRequired) +} + +pub(super) fn card_urgency(card: &TaskBoardCard) -> f64 { + card.source_metadata + .as_ref() + .and_then(|m| m.get("urgency")) + .and_then(serde_json::Value::as_f64) + .unwrap_or(0.0) +} diff --git a/src/openhuman/agent/task_dispatcher/prompt.rs b/src/openhuman/agent/task_dispatcher/prompt.rs new file mode 100644 index 0000000000..48d95a36fd --- /dev/null +++ b/src/openhuman/agent/task_dispatcher/prompt.rs @@ -0,0 +1,121 @@ +//! Task prompt construction helpers. +//! +//! Builds the goal prompt handed to autonomous runs from a [`TaskBoardCard`], +//! and the live-progress instruction that keeps the card current while the +//! run works. + +use crate::openhuman::agent::task_board::TaskBoardCard; + +/// Render a card into the goal prompt handed to the autonomous run. +/// +/// The card's `content`/title is the display form; the prompt leads with the +/// clean `objective`, then any `plan` steps and `acceptance_criteria`, and a +/// pointer to the originating source so the agent can pull related context from +/// memory via its `memory_recall` tool (the GitHub/Notion/… activity for this +/// item is ingested into the summary tree by the memory-sources domain). +pub fn build_task_prompt(card: &TaskBoardCard) -> String { + let mut lines: Vec = Vec::new(); + + let objective = card + .objective + .as_deref() + .map(str::trim) + .filter(|s| !s.is_empty()) + .unwrap_or_else(|| card.title.trim()); + lines.push(format!( + "You are autonomously executing one task to completion. Objective:\n{objective}" + )); + + if !card.plan.is_empty() { + lines.push("\nPlan:".to_string()); + for (i, step) in card.plan.iter().enumerate() { + lines.push(format!("{}. {}", i + 1, step.trim())); + } + } + + if !card.acceptance_criteria.is_empty() { + lines.push("\nAcceptance criteria (the task is done only when all hold):".to_string()); + for c in &card.acceptance_criteria { + lines.push(format!("- {}", c.trim())); + } + } + + if let Some(meta) = &card.source_metadata { + let provider = meta.get("provider").and_then(|v| v.as_str()); + let repo = meta.get("repo").and_then(|v| v.as_str()); + let external_id = meta.get("external_id").and_then(|v| v.as_str()); + let url = meta.get("url").and_then(|v| v.as_str()); + let mut origin = String::new(); + if let Some(p) = provider { + origin.push_str(p); + } + if let Some(r) = repo { + origin.push_str(&format!(" {r}")); + } + if let Some(id) = external_id { + origin.push_str(&format!("#{id}")); + } + // Gate on a known provider so the origin string is always meaningful + // (an id-only card would render "#123" with a leading space). + if provider.is_some() { + lines.push(format!( + "\nThis task originates from {}. Its activity has been ingested into memory — use \ + your memory_recall tool to pull related context (prior discussion, linked items) \ + before and while you work.", + origin.trim() + )); + } + if let Some(u) = url { + lines.push(format!("Source link: {u}")); + } + // G9b — agent-driven external write-back. When the upstream item is + // addressable (provider + id), instruct the agent to close the loop on + // the source itself via its integration tools. Runs under the + // connection's existing write scope (no extra approval gate); if it + // can't, it reports that instead of failing. + if provider.is_some() && external_id.is_some() { + lines.push(format!( + "\nWhen the task is complete, record the outcome on the upstream source ({}): use \ + your integration tools to add a comment summarising the resolution and, if the \ + work fully addresses it, close/resolve the item. If you lack the permission or \ + connection to do so, say so in your final summary instead of guessing.", + origin.trim() + )); + } + } + + lines.push( + "\nWork the task to completion. Do not pick up unrelated work. When finished, your final \ + message should summarise what you did and the evidence (commits, PRs, results)." + .to_string(), + ); + + lines.join("\n") +} + +/// Instruction appended to the run prompt so the autonomous turn keeps its own +/// task card current via the `update_task` tool while it works. +/// +/// The card is already `in_progress` (the dispatcher claimed it before +/// spawning the run), addressed by the exact card id + board the run owns +/// (without the explicit `threadId` the tool defaults to the `task-sources` +/// board and would miss a `user-tasks` card). Two things this asks for: +/// 1. *progress* updates (notes/evidence) as the run works, and +/// 2. an explicit `status: blocked` + `blocker` when the run needs a +/// decision/information from the user or cannot proceed — which +/// [`write_back`] now preserves rather than force-completing, so the task +/// pauses for the user instead of being silently marked done. +pub(super) fn build_progress_instruction(card_id: &str, thread_id: &str) -> String { + format!( + "\n\nThis task is tracked as card `{card_id}` on the `{thread_id}` board. As you work, \ + call the `update_task` tool (id `{card_id}`, threadId `{thread_id}`) to keep the card \ + current — append `notes`/`evidence` as you make progress.\n\nIf you need a decision or \ + information from the user, or you genuinely cannot proceed (missing access, ambiguous \ + requirement, an action that needs the user's confirmation), call `update_task` with \ + `status: blocked` and a `blocker` that states exactly what you need from the user. The \ + task will stay paused in that blocked state until the user responds — do NOT guess, \ + fabricate, or take a risky irreversible action just to avoid blocking. If instead you \ + finish the work, end with a summary of what you did and the evidence; completion is \ + recorded automatically." + ) +} diff --git a/src/openhuman/agent/task_dispatcher/registry.rs b/src/openhuman/agent/task_dispatcher/registry.rs new file mode 100644 index 0000000000..abc522fb4f --- /dev/null +++ b/src/openhuman/agent/task_dispatcher/registry.rs @@ -0,0 +1,74 @@ +//! In-flight autonomous run registry. +//! +//! Tracks active runs by session `thread_id` so the web-channel cancel path +//! can abort them even though they are detached tokio tasks rather than +//! web-channel turns. + +use std::collections::HashMap; +use std::sync::{Mutex, OnceLock}; + +use super::types::ActiveRun; + +static ACTIVE_RUNS: OnceLock>> = OnceLock::new(); + +pub(super) fn active_runs() -> &'static Mutex> { + ACTIVE_RUNS.get_or_init(|| Mutex::new(HashMap::new())) +} + +pub(super) fn register_active_run(thread_id: String, run: ActiveRun) { + active_runs() + .lock() + .expect("active_runs mutex poisoned") + .insert(thread_id, run); +} + +/// Remove and return the active-run entry for `thread_id`. The naturally +/// completing run and a concurrent [`cancel_session`] race on this — whoever +/// gets `Some` "owns" the terminal board write-back, so it happens exactly once. +pub(super) fn take_active_run(thread_id: &str) -> Option { + active_runs() + .lock() + .expect("active_runs mutex poisoned") + .remove(thread_id) +} + +/// Cancel the in-flight autonomous run streaming into session `thread_id`. +/// +/// Aborts the detached run task, stops its heartbeat, marks the card `blocked` +/// (user-cancelled) so it doesn't dangle `in_progress`, and emits the terminal +/// chat event (broadcast as `"system"`) so the session UI stops "processing". +/// Returns `true` if a run was found and cancelled. Wired into the web channel's +/// `channel_web_cancel` as the fallback when the thread has no web-channel turn. +pub async fn cancel_session(thread_id: &str) -> bool { + let Some(run) = take_active_run(thread_id) else { + return false; + }; + run.abort.abort(); + let _ = run.hb_cancel.send(true); + // The aborted task never reaches its own write-back — do it here so the + // card lands in a terminal state instead of a stale `in_progress`. + super::executor::write_back( + &run.location, + &run.card_id, + &run.run_id, + Err("Cancelled by user".to_string()), + ); + crate::openhuman::channels::providers::web::publish_web_channel_event( + crate::core::socketio::WebChannelEvent { + event: "chat_error".to_string(), + client_id: "system".to_string(), + thread_id: thread_id.to_string(), + request_id: run.run_id.clone(), + message: Some("Cancelled".to_string()), + error_type: Some("cancelled".to_string()), + ..Default::default() + }, + ); + tracing::info!( + thread_id = %thread_id, + card_id = %run.card_id, + run_id = %run.run_id, + "[task_dispatcher] cancelled autonomous run via chat cancel" + ); + true +} diff --git a/src/openhuman/agent/task_dispatcher/tests.rs b/src/openhuman/agent/task_dispatcher/tests.rs new file mode 100644 index 0000000000..0b75b15d91 --- /dev/null +++ b/src/openhuman/agent/task_dispatcher/tests.rs @@ -0,0 +1,402 @@ +//! Unit tests for the task dispatcher sub-modules. + +use serde_json::json; + +use crate::openhuman::agent::task_board::{TaskApprovalMode, TaskBoardCard, TaskCardStatus}; +use crate::openhuman::todos::ops::{self, BoardLocation, CardPatch}; + +use super::executor::{truncate_chars, write_back, EVIDENCE_MAX_CHARS}; +use super::poller::{pick_next_todo, requires_plan_approval}; +use super::prompt::{build_progress_instruction, build_task_prompt}; +use super::registry::{register_active_run, take_active_run}; +use super::types::ActiveRun; + +#[tokio::test] +async fn active_run_registry_take_is_once() { + // Race-safety: the completing run and a concurrent cancel both call + // `take_active_run`; exactly one gets `Some` (and owns the write-back). + let (tx, _rx) = tokio::sync::watch::channel(false); + let handle = tokio::spawn(async { std::future::pending::<()>().await }); + let key = "task-cancel-registry-test"; + register_active_run( + key.to_string(), + ActiveRun { + abort: handle.abort_handle(), + hb_cancel: tx, + location: BoardLocation::Scratch, + card_id: "c1".to_string(), + run_id: "r1".to_string(), + }, + ); + assert!(take_active_run(key).is_some(), "first take owns the run"); + assert!( + take_active_run(key).is_none(), + "second take gets nothing — write-back happens exactly once" + ); + handle.abort(); +} + +fn card(objective: Option<&str>) -> TaskBoardCard { + TaskBoardCard { + id: "task-1".into(), + title: "[GitHub] Fix login bug".into(), + status: TaskCardStatus::Todo, + objective: objective.map(str::to_string), + plan: vec![], + assigned_agent: None, + allowed_tools: vec![], + approval_mode: None, + acceptance_criteria: vec![], + evidence: vec![], + notes: None, + blocker: None, + session_thread_id: None, + source_metadata: None, + order: 0, + updated_at: String::new(), + } +} + +#[test] +fn prompt_uses_objective_then_falls_back_to_title() { + let p = build_task_prompt(&card(Some("Fix the login bug"))); + assert!(p.contains("Fix the login bug")); + assert!(!p.contains("[GitHub]")); + + let p2 = build_task_prompt(&card(None)); + assert!(p2.contains("[GitHub] Fix login bug")); +} + +#[test] +fn prompt_includes_plan_and_acceptance_criteria() { + let mut c = card(Some("Do it")); + c.plan = vec!["step one".into(), "step two".into()]; + c.acceptance_criteria = vec!["tests pass".into()]; + let p = build_task_prompt(&c); + assert!(p.contains("Plan:")); + assert!(p.contains("1. step one")); + assert!(p.contains("2. step two")); + assert!(p.contains("Acceptance criteria")); + assert!(p.contains("- tests pass")); +} + +#[test] +fn prompt_points_at_source_and_memory_when_metadata_present() { + let mut c = card(Some("Resolve issue")); + c.source_metadata = Some(json!({ + "provider": "github", + "repo": "octo/repo", + "external_id": "123", + "url": "https://github.com/octo/repo/issues/123", + })); + let p = build_task_prompt(&c); + assert!(p.contains("github octo/repo#123")); + assert!(p.contains("memory_recall")); + assert!(p.contains("https://github.com/octo/repo/issues/123")); +} + +#[test] +fn prompt_omits_source_block_without_metadata() { + let p = build_task_prompt(&card(Some("Do it"))); + assert!(!p.contains("memory_recall")); + assert!(!p.contains("record the outcome on the upstream source")); +} + +#[test] +fn prompt_includes_external_writeback_when_addressable() { + let mut c = card(Some("Resolve issue")); + c.source_metadata = Some(json!({ + "provider": "github", + "repo": "octo/repo", + "external_id": "123", + })); + let p = build_task_prompt(&c); + assert!(p.contains("record the outcome on the upstream source")); + assert!(p.contains("close/resolve the item")); +} + +#[test] +fn prompt_omits_writeback_when_not_addressable() { + // Urgency-only metadata (no provider/external_id) can't address an + // upstream item, so no write-back instruction. + let mut c = card(Some("Do it")); + c.source_metadata = Some(json!({ "urgency": 0.5 })); + let p = build_task_prompt(&c); + assert!(!p.contains("record the outcome on the upstream source")); +} + +#[test] +fn truncate_caps_long_strings() { + let s = "x".repeat(5_000); + let out = truncate_chars(&s, EVIDENCE_MAX_CHARS); + assert!(out.chars().count() <= EVIDENCE_MAX_CHARS); + assert!(out.ends_with('…')); +} + +fn card_with(id: &str, status: TaskCardStatus, urgency: Option, order: u32) -> TaskBoardCard { + let mut c = card(Some("obj")); + c.id = id.into(); + c.status = status; + c.order = order; + c.source_metadata = urgency.map(|u| json!({ "urgency": u })); + c +} + +#[test] +fn poller_picks_highest_urgency_todo_skipping_other_statuses() { + let cards = vec![ + card_with("a", TaskCardStatus::Todo, Some(0.3), 0), + card_with("b", TaskCardStatus::Done, Some(0.99), 1), + card_with("c", TaskCardStatus::Todo, Some(0.8), 2), + card_with("d", TaskCardStatus::Todo, None, 3), + ]; + let picked = pick_next_todo(&cards, false).expect("a todo card is available"); + assert_eq!( + picked.id, "c", + "highest-urgency todo wins, done card ignored" + ); +} + +#[test] +fn poller_breaks_urgency_ties_toward_lower_order() { + let cards = vec![ + card_with("late", TaskCardStatus::Todo, Some(0.5), 5), + card_with("early", TaskCardStatus::Todo, Some(0.5), 2), + ]; + assert_eq!(pick_next_todo(&cards, false).unwrap().id, "early"); +} + +#[test] +fn poller_returns_none_when_no_todo_cards() { + let cards = vec![card_with("a", TaskCardStatus::Done, Some(0.9), 0)]; + assert!(pick_next_todo(&cards, false).is_none()); +} + +#[test] +fn poller_dispatches_ready_cards_and_skips_approval_states() { + // Approved `ready` cards are dispatchable; `awaiting_approval` and + // `rejected` are not. + let cards = vec![ + card_with("await", TaskCardStatus::AwaitingApproval, Some(0.99), 0), + card_with("rej", TaskCardStatus::Rejected, Some(0.95), 1), + card_with("ready", TaskCardStatus::Ready, Some(0.5), 2), + ]; + assert_eq!(pick_next_todo(&cards, false).unwrap().id, "ready"); +} + +#[test] +fn poller_prefers_higher_urgency_across_todo_and_ready() { + let cards = vec![ + card_with("ready-low", TaskCardStatus::Ready, Some(0.3), 0), + card_with("todo-high", TaskCardStatus::Todo, Some(0.9), 1), + ]; + assert_eq!(pick_next_todo(&cards, false).unwrap().id, "todo-high"); +} + +#[test] +fn poller_agent_only_skips_unassigned_cards() { + // On the user-tasks board we run only agent-assigned cards. A human's + // manual todo (no assigned_agent) must be skipped even at high urgency. + let mut human = card_with("human", TaskCardStatus::Todo, Some(0.99), 0); + human.assigned_agent = None; + let mut agent = card_with("agent", TaskCardStatus::Todo, Some(0.20), 1); + agent.assigned_agent = Some("orchestrator".into()); + let cards = vec![human, agent]; + + // Agent-only: the lower-urgency assigned card wins; the human card is invisible. + assert_eq!(pick_next_todo(&cards, true).unwrap().id, "agent"); + // Unfiltered (task-sources behaviour): highest urgency wins regardless. + assert_eq!(pick_next_todo(&cards, false).unwrap().id, "human"); +} + +#[test] +fn poller_agent_only_returns_none_when_all_unassigned() { + let mut a = card_with("a", TaskCardStatus::Todo, Some(0.9), 0); + a.assigned_agent = None; + let mut b = card_with("b", TaskCardStatus::Todo, Some(0.5), 1); + b.assigned_agent = Some(" ".into()); // blank handle is not "assigned" + let cards = vec![a, b]; + assert!(pick_next_todo(&cards, true).is_none()); +} + +#[test] +fn approval_gate_respects_global_and_per_card_optout() { + // Global off → never park. + assert!(!requires_plan_approval(false, None)); + assert!(!requires_plan_approval( + false, + Some(&TaskApprovalMode::Required) + )); + // Global on → park, unless the card opts out via NotRequired. + assert!(requires_plan_approval(true, None)); + assert!(requires_plan_approval( + true, + Some(&TaskApprovalMode::Required) + )); + assert!(!requires_plan_approval( + true, + Some(&TaskApprovalMode::NotRequired) + )); +} + +#[test] +fn progress_instruction_names_card_thread_and_tool() { + let s = build_progress_instruction("task-42", "user-tasks"); + assert!(s.contains("task-42")); + assert!(s.contains("user-tasks")); + assert!(s.contains("update_task")); + // It must instruct the agent to self-block (status: blocked + blocker) + // when it needs the user, so write_back can preserve that state. + assert!(s.contains("status: blocked")); + assert!(s.contains("blocker")); +} + +#[test] +fn resolver_defaults_to_orchestrator_for_unset_or_orchestrator_handle() { + use super::executor::resolve_executor; + let dir = tempfile::tempdir().unwrap(); + for handle in [None, Some(""), Some(" "), Some("orchestrator")] { + let r = resolve_executor(dir.path(), handle); + assert_eq!(r.agent_id, "orchestrator"); + assert_eq!(r.label, "default"); + assert!(r.prompt_suffix.is_none()); + } +} + +#[test] +fn resolver_uses_personality_branch_for_builtin_profile() { + use super::executor::resolve_executor; + // `load_profiles` returns built-in profiles for any empty workspace, so + // the personality branch is reachable with no fixture file. "research" + // is a built-in profile backed by the "researcher" agent. + let dir = tempfile::tempdir().unwrap(); + let r = resolve_executor(dir.path(), Some("research")); + assert_eq!(r.label, "personality:research"); + assert_eq!(r.agent_id, "researcher"); + let suffix = r.prompt_suffix.expect("personality preamble present"); + assert!(suffix.contains("acting as the personality `research`")); +} + +#[test] +fn resolver_degrades_to_default_for_unresolved_handle() { + use super::executor::resolve_executor; + let dir = tempfile::tempdir().unwrap(); + let r = resolve_executor(dir.path(), Some("no-such-executor-xyz")); + assert_eq!(r.agent_id, "orchestrator"); + assert_eq!(r.label, "default-fallback"); + assert!(r.prompt_suffix.is_none()); +} + +fn board_loc(dir: &std::path::Path) -> BoardLocation { + BoardLocation::Thread { + workspace_dir: dir.to_path_buf(), + thread_id: "t1".to_string(), + } +} + +#[test] +fn write_back_marks_done_with_evidence_on_success() { + let dir = tempfile::tempdir().unwrap(); + let loc = board_loc(dir.path()); + let id = ops::add(&loc, "do the thing", CardPatch::default()) + .unwrap() + .cards[0] + .id + .clone(); + ops::update_status(&loc, &id, TaskCardStatus::InProgress).unwrap(); + + write_back( + &loc, + &id, + "run-1", + Ok("completed: opened PR #5".to_string()), + ); + + let card = ops::list(&loc) + .unwrap() + .cards + .into_iter() + .find(|c| c.id == id) + .unwrap(); + assert_eq!(card.status, TaskCardStatus::Done); + assert!(card.evidence.iter().any(|e| e.contains("opened PR #5"))); +} + +#[test] +fn write_back_preserves_agent_set_blocked_on_clean_run() { + // The run marked its own card `blocked` (needs user input) via + // update_task, then ended cleanly. write_back must NOT force it to + // `done` — the task stays blocked, with the agent's blocker intact, + // awaiting the user. + let dir = tempfile::tempdir().unwrap(); + let loc = board_loc(dir.path()); + let id = ops::add(&loc, "update alan", CardPatch::default()) + .unwrap() + .cards[0] + .id + .clone(); + ops::update_status(&loc, &id, TaskCardStatus::InProgress).unwrap(); + // Agent self-blocks mid-run, as build_progress_instruction asks it to. + ops::edit( + &loc, + &id, + CardPatch { + status: Some(TaskCardStatus::Blocked), + blocker: Some("Slack isn't connected — confirm how to reach Alan".to_string()), + ..Default::default() + }, + ) + .unwrap(); + + // Run returns Ok (the turn finished) — but the card is self-blocked. + write_back( + &loc, + &id, + "run-2", + Ok("I checked GitHub and memory…".to_string()), + ); + + let card = ops::list(&loc) + .unwrap() + .cards + .into_iter() + .find(|c| c.id == id) + .unwrap(); + assert_eq!( + card.status, + TaskCardStatus::Blocked, + "a clean run over a self-blocked card must stay blocked, not auto-done" + ); + assert_eq!( + card.blocker.as_deref(), + Some("Slack isn't connected — confirm how to reach Alan"), + "the agent's blocker reason is preserved" + ); +} + +#[test] +fn write_back_marks_blocked_with_reason_on_failure() { + let dir = tempfile::tempdir().unwrap(); + let loc = board_loc(dir.path()); + let id = ops::add(&loc, "do the thing", CardPatch::default()) + .unwrap() + .cards[0] + .id + .clone(); + ops::update_status(&loc, &id, TaskCardStatus::InProgress).unwrap(); + + write_back(&loc, &id, "run-1", Err("agent build failed".to_string())); + + let card = ops::list(&loc) + .unwrap() + .cards + .into_iter() + .find(|c| c.id == id) + .unwrap(); + assert_eq!(card.status, TaskCardStatus::Blocked); + assert!(card + .blocker + .as_deref() + .unwrap_or_default() + .contains("agent build failed")); +} diff --git a/src/openhuman/agent/task_dispatcher/types.rs b/src/openhuman/agent/task_dispatcher/types.rs new file mode 100644 index 0000000000..7bd50e1872 --- /dev/null +++ b/src/openhuman/agent/task_dispatcher/types.rs @@ -0,0 +1,48 @@ +//! Shared types for the task dispatcher. + +use crate::openhuman::todos::ops::BoardLocation; + +/// Handle to an in-flight autonomous run, keyed by its session `thread_id`. +/// +/// Autonomous runs are detached `tokio` tasks, not web-channel turns, so they +/// are invisible to the web channel's own in-flight registry — which is why the +/// chat **Cancel** button (which calls `channel_web_cancel`) couldn't stop them. +/// Registering the run's [`AbortHandle`](tokio::task::AbortHandle) here lets +/// [`cancel_session`] abort it from that same cancel path. +pub(super) struct ActiveRun { + pub(super) abort: tokio::task::AbortHandle, + pub(super) hb_cancel: tokio::sync::watch::Sender, + pub(super) location: BoardLocation, + pub(super) card_id: String, + pub(super) run_id: String, +} + +/// A resolved executor: which built-in agent definition to build, an optional +/// system-prompt suffix carrying a personality identity or skill guidelines, +/// and a label for logs/telemetry. +#[derive(Debug, Clone, PartialEq)] +pub(super) struct ResolvedExecutor { + pub(super) agent_id: String, + pub(super) prompt_suffix: Option, + pub(super) label: String, +} + +impl ResolvedExecutor { + pub(super) fn default_agent() -> Self { + Self { + agent_id: "orchestrator".to_string(), + prompt_suffix: None, + label: "default".to_string(), + } + } +} + +/// Outcome of a dispatch attempt. +#[derive(Debug)] +pub enum DispatchOutcome { + /// The card was claimed and a detached autonomous run was spawned. + Running { run_id: String }, + /// Plan approval is required; the card was parked at `awaiting_approval` + /// and a `TaskPlanAwaitingApproval` event was emitted. No run was spawned. + AwaitingApproval, +} diff --git a/src/openhuman/channels/controllers/ops.rs b/src/openhuman/channels/controllers/ops.rs deleted file mode 100644 index a5b714023d..0000000000 --- a/src/openhuman/channels/controllers/ops.rs +++ /dev/null @@ -1,1333 +0,0 @@ -//! Channel controller business logic. - -use serde::{Deserialize, Serialize}; -use serde_json::{json, Value}; - -use crate::api::config::{app_env_from_env, effective_backend_api_url, is_staging_app_env}; -use crate::api::jwt::get_session_token; -use crate::api::rest::BackendOAuthClient; -use crate::openhuman::channels::providers::yuanbao::sign::SignManager; -use crate::openhuman::channels::providers::yuanbao::YuanbaoConfig; -use crate::openhuman::config::{Config, DiscordConfig, IMessageConfig, TelegramConfig}; -use crate::openhuman::credentials; -use crate::openhuman::memory_store::chunks::store as memory_tree_store; -use crate::openhuman::memory_store::chunks::types::SourceKind; -use crate::rpc::RpcOutcome; - -use super::definitions::{ - all_channel_definitions, find_channel_definition, ChannelAuthMode, ChannelDefinition, -}; - -/// Result returned by `connect_channel`. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ChannelConnectionResult { - /// `"connected"` for credential-based modes, `"pending_auth"` for OAuth/managed. - pub status: String, - /// Whether the service must be restarted for the channel to become active. - pub restart_required: bool, - /// For OAuth/managed modes: the action ID the frontend should handle. - #[serde(skip_serializing_if = "Option::is_none")] - pub auth_action: Option, - /// Human-readable status message. - #[serde(skip_serializing_if = "Option::is_none")] - pub message: Option, -} - -/// Single entry returned by `channel_status`. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ChannelStatusEntry { - pub channel_id: String, - pub auth_mode: ChannelAuthMode, - pub connected: bool, - pub has_credentials: bool, -} - -/// Result returned by `test_channel`. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ChannelTestResult { - pub success: bool, - pub message: String, -} - -/// Credential provider key for channel connections: `"channel:{id}:{mode}"`. -fn credential_provider(channel_id: &str, mode: ChannelAuthMode) -> String { - format!("channel:{}:{}", channel_id, mode) -} - -fn channel_config_connected(config: &Config, channel_id: &str, mode: ChannelAuthMode) -> bool { - let channels = &config.channels_config; - match (channel_id, mode) { - ("telegram", ChannelAuthMode::BotToken) => channels.telegram.is_some(), - ("discord", ChannelAuthMode::BotToken) => channels.discord.is_some(), - ("slack", _) => channels.slack.is_some(), - ("mattermost", _) => channels.mattermost.is_some(), - ("imessage", ChannelAuthMode::ManagedDm) => channels.imessage.is_some(), - ("matrix", _) => channels.matrix.is_some(), - ("signal", _) => channels.signal.is_some(), - ("whatsapp", _) => channels.whatsapp.is_some(), - ("linq", _) => channels.linq.is_some(), - ("email", _) => channels.email.is_some(), - ("irc", _) => channels.irc.is_some(), - ("lark", _) => channels.lark.is_some(), - ("dingtalk", _) => channels.dingtalk.is_some(), - ("qq", _) => channels.qq.is_some(), - ("yuanbao", ChannelAuthMode::ApiKey) => channels.yuanbao.is_some(), - _ => false, - } -} - -fn parse_allowed_users(value: Option<&Value>) -> Vec { - let mut out: Vec = Vec::new(); - - let mut push_identity = |raw: &str| { - let trimmed = raw.trim(); - if trimmed.is_empty() { - return; - } - let normalized = trimmed.trim_start_matches('@').trim(); - if normalized.is_empty() { - return; - } - let canonical = normalized.to_lowercase(); - if !out - .iter() - .any(|existing| existing.eq_ignore_ascii_case(&canonical)) - { - out.push(canonical); - } - }; - - match value { - Some(Value::String(s)) => { - for part in s.split([',', '\n', '\r']) { - push_identity(part); - } - } - Some(Value::Array(items)) => { - for item in items { - if let Some(s) = item.as_str() { - for part in s.split([',', '\n', '\r']) { - push_identity(part); - } - } - } - } - _ => {} - } - - out -} - -fn parse_optional_bool(value: Option<&Value>) -> Option { - match value { - Some(Value::Bool(b)) => Some(*b), - Some(Value::Number(n)) => n.as_i64().map(|v| v != 0), - Some(Value::String(s)) => { - let normalized = s.trim().to_ascii_lowercase(); - match normalized.as_str() { - "1" | "true" | "yes" | "on" => Some(true), - "0" | "false" | "no" | "off" => Some(false), - _ => None, - } - } - _ => None, - } -} - -/// Read a required non-empty Yuanbao credential field from the connect-channel -/// payload. Returns the trimmed value or an error naming the missing field. -fn require_yuanbao_field( - creds_map: &serde_json::Map, - key: &str, -) -> Result { - creds_map - .get(key) - .and_then(|v| v.as_str()) - .map(str::trim) - .filter(|s| !s.is_empty()) - .map(|s| s.to_string()) - .ok_or_else(|| format!("missing required {key}")) -} - -/// Build the **effective** Yuanbao config that will be used for both -/// preflight verification and persistence. -/// -/// Starts from the existing TOML (so manually-installed deployments keep -/// any custom routes), overlays the client-supplied endpoint overrides -/// (`env` / `api_domain` / `ws_domain` / `route_env`), then calls -/// `apply_env_defaults` so the verifier hits the correct cluster — e.g. a -/// user submitting `env = "pre"` is verified against the pre-release -/// sign-token endpoint instead of the default prod one. -/// -/// `app_secret` is intentionally left empty: the runtime loads it from -/// the encrypted credentials store at startup, never from `config.toml`. -fn build_effective_yuanbao_config( - base: YuanbaoConfig, - creds_map: &serde_json::Map, - app_key: String, -) -> YuanbaoConfig { - let opt_string = |key: &str| -> Option { - creds_map - .get(key) - .and_then(|v| v.as_str()) - .map(str::trim) - .filter(|s| !s.is_empty()) - .map(|s| s.to_string()) - }; - - let mut cfg = base; - cfg.app_key = app_key; - cfg.app_secret = String::new(); - if let Some(env) = opt_string("env") { - cfg.env = env; - } - if let Some(api_domain) = opt_string("api_domain") { - cfg.api_domain = api_domain; - } - if let Some(ws_domain) = opt_string("ws_domain") { - cfg.ws_domain = ws_domain; - } - if let Some(route_env) = opt_string("route_env") { - cfg.route_env = route_env; - } - cfg.apply_env_defaults(); - cfg -} - -/// Verify Yuanbao credentials against the `sign-token` endpoint before any -/// persistence so invalid `app_key` / `app_secret` surface the upstream API -/// error to the user instead of silently succeeding. -/// -/// Takes the **effective** `YuanbaoConfig` already built from the client's -/// overrides + TOML defaults, so the verifier targets whatever cluster the -/// runtime will use after restart. -async fn verify_yuanbao_credentials( - yb_cfg: &YuanbaoConfig, - app_secret: &str, -) -> Result<(), String> { - SignManager::new(reqwest::Client::new()) - .get_token( - &yb_cfg.app_key, - app_secret, - &yb_cfg.api_domain, - &yb_cfg.route_env, - ) - .await - .map_err(|e| format!("yuanbao credential verification failed: {e}"))?; - Ok(()) -} - -/// List all available channel definitions. -pub async fn list_channels() -> Result>, String> { - Ok(RpcOutcome::new(all_channel_definitions(), vec![])) -} - -/// Describe a single channel by id. -pub async fn describe_channel(channel_id: &str) -> Result, String> { - let def = find_channel_definition(channel_id) - .ok_or_else(|| format!("unknown channel: {channel_id}"))?; - Ok(RpcOutcome::new(def, vec![])) -} - -/// Initiate a channel connection. -/// -/// For `BotToken`/`ApiKey` modes: validates fields and stores credentials. -/// For `OAuth`/`ManagedDm` modes: returns the auth action the frontend should handle. -pub async fn connect_channel( - config: &Config, - channel_id: &str, - auth_mode: ChannelAuthMode, - credentials_value: Value, -) -> Result, String> { - let def = find_channel_definition(channel_id) - .ok_or_else(|| format!("unknown channel: {channel_id}"))?; - - let spec = def.auth_mode_spec(auth_mode).ok_or_else(|| { - format!( - "channel '{}' does not support auth mode '{}'", - channel_id, auth_mode - ) - })?; - - // For OAuth/managed modes, return the auth action without storing credentials. - if let Some(action) = spec.auth_action { - return Ok(RpcOutcome::new( - ChannelConnectionResult { - status: "pending_auth".to_string(), - restart_required: false, - auth_action: Some(action.to_string()), - message: Some(format!("Initiate '{}' auth flow on the frontend. Ignore if you are already in the auth flow.", action)), - }, - vec![], - )); - } - - // Credential-based modes: validate required fields. - let creds_map = credentials_value - .as_object() - .ok_or("credentials must be a JSON object")?; - - def.validate_credentials(auth_mode, creds_map)?; - - // Yuanbao: build the effective config (with any client-supplied - // endpoint overrides applied) once, verify against THAT cluster, and - // reuse the same config for persistence below. This prevents the - // verifier from validating against prod while the runtime then - // reconnects to a pre-release cluster after restart. - let mut prebuilt_yuanbao_config: Option = None; - if channel_id == "yuanbao" && auth_mode == ChannelAuthMode::ApiKey { - let app_key = require_yuanbao_field(creds_map, "app_key")?; - let app_secret = require_yuanbao_field(creds_map, "app_secret")?; - let base = config.channels_config.yuanbao.clone().unwrap_or_default(); - let effective = build_effective_yuanbao_config(base, creds_map, app_key); - verify_yuanbao_credentials(&effective, &app_secret).await?; - prebuilt_yuanbao_config = Some(effective); - } - - // iMessage is local-only (no credentials): persist channels_config + return connected. - if channel_id == "imessage" && auth_mode == ChannelAuthMode::ManagedDm { - let allowed_contacts = parse_allowed_users(creds_map.get("allowed_contacts")); - let allowed_contacts_count = allowed_contacts.len(); - - let mut persisted = config.clone(); - persisted.channels_config.imessage = Some(IMessageConfig { allowed_contacts }); - - persisted - .save() - .await - .map_err(|e| format!("failed to persist imessage config.toml: {e}"))?; - - tracing::info!( - target: "openhuman::channels", - allowed_contacts_count, - "[imessage] connect_channel: wrote channels_config.imessage; restart core for AppleScript bridge to load" - ); - - return Ok(RpcOutcome::single_log( - ChannelConnectionResult { - status: "connected".to_string(), - restart_required: true, - auth_action: None, - message: Some( - "iMessage channel configured. Grant Full Disk Access and restart the service to activate.".to_string(), - ), - }, - "stored imessage channel config (local-only)".to_string(), - )); - } - - // Store credentials via the credentials domain. - let provider_key = credential_provider(channel_id, auth_mode); - - // Extract the primary token field (bot_token or api_key) if present. - let token = creds_map - .get("bot_token") - .or_else(|| creds_map.get("api_key")) - .and_then(|v| v.as_str()) - .map(|s| s.to_string()); - - // Store remaining fields as metadata. - let fields = if creds_map.len() > 1 || (creds_map.len() == 1 && token.is_none()) { - Some(Value::Object(creds_map.clone())) - } else { - None - }; - - credentials::ops::store_provider_credentials( - config, - &provider_key, - None, // default profile - token, - fields, - Some(true), - ) - .await - .map_err(|e| format!("failed to store credentials: {e}"))?; - - // Keep runtime channel config in sync so listeners can actually start - // with the credentials just connected from the UI. - if channel_id == "telegram" && auth_mode == ChannelAuthMode::BotToken { - let bot_token = creds_map - .get("bot_token") - .and_then(|v| v.as_str()) - .map(str::trim) - .filter(|s| !s.is_empty()) - .ok_or_else(|| "missing required bot_token".to_string())? - .to_string(); - let allowed_users = parse_allowed_users(creds_map.get("allowed_users")); - let allowed_users_count = allowed_users.len(); - - let mut persisted = config.clone(); - let (stream_mode, draft_update_interval_ms, silent_streaming, mention_only) = - if let Some(existing) = persisted.channels_config.telegram.as_ref() { - ( - existing.stream_mode, - existing.draft_update_interval_ms, - existing.silent_streaming, - existing.mention_only, - ) - } else { - ( - crate::openhuman::config::StreamMode::default(), - 1000, - true, - false, - ) - }; - - persisted.channels_config.telegram = Some(TelegramConfig { - bot_token, - allowed_users, - stream_mode, - draft_update_interval_ms, - silent_streaming, - mention_only, - }); - - persisted - .save() - .await - .map_err(|e| format!("failed to persist telegram config.toml: {e}"))?; - - tracing::info!( - target: "openhuman::channels", - allowed_users_count, - mention_only, - "[telegram] connect_channel: wrote channels_config.telegram; restart core for listener to load token" - ); - } else if channel_id == "discord" && auth_mode == ChannelAuthMode::BotToken { - let bot_token = creds_map - .get("bot_token") - .and_then(|v| v.as_str()) - .map(str::trim) - .filter(|s| !s.is_empty()) - .ok_or_else(|| "missing required bot_token".to_string())? - .to_string(); - - let guild_id = creds_map - .get("guild_id") - .and_then(|v| v.as_str()) - .map(str::trim) - .filter(|s| !s.is_empty()) - .map(|s| s.to_string()); - let discord_channel_id = creds_map - .get("channel_id") - .and_then(|v| v.as_str()) - .map(str::trim) - .filter(|s| !s.is_empty()) - .map(|s| s.to_string()); - - let mut persisted = config.clone(); - let existing = persisted.channels_config.discord.as_ref(); - let parsed_allowed_users = parse_allowed_users(creds_map.get("allowed_users")); - let allowed_users = if parsed_allowed_users.is_empty() { - existing - .map(|cfg| cfg.allowed_users.clone()) - .unwrap_or_default() - } else { - parsed_allowed_users - }; - let allowed_users_count = allowed_users.len(); - let listen_to_bots = parse_optional_bool(creds_map.get("listen_to_bots")) - .unwrap_or_else(|| existing.map(|cfg| cfg.listen_to_bots).unwrap_or(false)); - let mention_only = parse_optional_bool(creds_map.get("mention_only")) - .unwrap_or_else(|| existing.map(|cfg| cfg.mention_only).unwrap_or(false)); - - persisted.channels_config.discord = Some(DiscordConfig { - bot_token, - guild_id: guild_id.clone(), - channel_id: discord_channel_id.clone(), - allowed_users, - listen_to_bots, - mention_only, - }); - - persisted - .save() - .await - .map_err(|e| format!("failed to persist discord config.toml: {e}"))?; - - tracing::info!( - target: "openhuman::channels", - has_guild_id = guild_id.is_some(), - has_channel_id = discord_channel_id.is_some(), - allowed_users_count, - listen_to_bots, - mention_only, - "[discord] connect_channel: wrote channels_config.discord; restart core for listener to load token" - ); - } else if channel_id == "yuanbao" && auth_mode == ChannelAuthMode::ApiKey { - // Reuse the effective config built above (with `env` / `api_domain` - // / `ws_domain` / `route_env` overrides already applied and - // `app_secret` already cleared) so persistence and verification - // can never diverge. - let yb_config = prebuilt_yuanbao_config.take().ok_or_else(|| { - "internal error: yuanbao config not built before persistence".to_string() - })?; - - let mut persisted = config.clone(); - persisted.channels_config.yuanbao = Some(yb_config); - - persisted - .save() - .await - .map_err(|e| format!("failed to persist yuanbao config.toml: {e}"))?; - - tracing::info!( - target: "openhuman::channels", - "[yuanbao] connect_channel: wrote channels_config.yuanbao (secret stored in credentials); restart core for WS listener" - ); - } - - Ok(RpcOutcome::single_log( - ChannelConnectionResult { - status: "connected".to_string(), - restart_required: true, - auth_action: None, - message: Some(format!( - "Channel '{}' credentials stored. Restart the service to activate.", - channel_id - )), - }, - format!("stored credentials for {}", provider_key), - )) -} - -/// Disconnect a channel by removing stored credentials. -pub async fn disconnect_channel( - config: &Config, - channel_id: &str, - auth_mode: ChannelAuthMode, - clear_memory: bool, -) -> Result, String> { - // Verify channel exists. - find_channel_definition(channel_id).ok_or_else(|| format!("unknown channel: {channel_id}"))?; - - let provider_key = credential_provider(channel_id, auth_mode); - - // iMessage has no stored credentials (local-only); skip credential removal. - if !(channel_id == "imessage" && auth_mode == ChannelAuthMode::ManagedDm) { - credentials::ops::remove_provider_credentials(config, &provider_key, None) - .await - .map_err(|e| format!("failed to remove credentials: {e}"))?; - } - - if channel_id == "telegram" && auth_mode == ChannelAuthMode::BotToken { - let mut persisted = config.clone(); - if persisted.channels_config.telegram.take().is_some() { - persisted - .save() - .await - .map_err(|e| format!("failed to clear telegram config.toml: {e}"))?; - tracing::info!( - target: "openhuman::channels", - "[telegram] disconnect_channel: cleared channels_config.telegram" - ); - } - } else if channel_id == "discord" && auth_mode == ChannelAuthMode::BotToken { - let mut persisted = config.clone(); - if persisted.channels_config.discord.take().is_some() { - persisted - .save() - .await - .map_err(|e| format!("failed to clear discord config.toml: {e}"))?; - tracing::info!( - target: "openhuman::channels", - "[discord] disconnect_channel: cleared channels_config.discord" - ); - } - } else if channel_id == "imessage" && auth_mode == ChannelAuthMode::ManagedDm { - let mut persisted = config.clone(); - if persisted.channels_config.imessage.take().is_some() { - persisted - .save() - .await - .map_err(|e| format!("failed to clear imessage config.toml: {e}"))?; - tracing::info!( - target: "openhuman::channels", - "[imessage] disconnect_channel: cleared channels_config.imessage" - ); - } - } else if channel_id == "yuanbao" && auth_mode == ChannelAuthMode::ApiKey { - let mut persisted = config.clone(); - if persisted.channels_config.yuanbao.take().is_some() { - persisted - .save() - .await - .map_err(|e| format!("failed to clear yuanbao config.toml: {e}"))?; - tracing::info!( - target: "openhuman::channels", - "[yuanbao] disconnect_channel: cleared channels_config.yuanbao" - ); - } - } - - let memory_chunks_deleted = if clear_memory { - clear_channel_memory(config, channel_id).map_err(|e| { - format!("channel disconnected, but failed to clear memory chunks: {e:#}") - })? - } else { - 0 - }; - - Ok(RpcOutcome::single_log( - json!({ - "channel": channel_id, - "auth_mode": auth_mode, - "disconnected": true, - "restart_required": true, - "memory_chunks_deleted": memory_chunks_deleted, - }), - format!("removed credentials for {}", provider_key), - )) -} - -fn clear_channel_memory(config: &Config, channel_id: &str) -> anyhow::Result { - let exact = memory_tree_store::delete_chunks_by_source(config, SourceKind::Chat, channel_id)?; - let prefixed = memory_tree_store::delete_chunks_by_source_prefix( - config, - SourceKind::Chat, - &format!("{channel_id}:"), - )?; - Ok(exact + prefixed) -} - -/// Get connection status for one or all channels. -pub async fn channel_status( - config: &Config, - channel_id: Option<&str>, -) -> Result>, String> { - // List all stored credentials with "channel:" prefix. Uses the - // prefix-match helper because channel credentials are keyed as - // `channel::` and no single literal value matches them - // through `list_provider_credentials`'s exact-match filter. - let stored = credentials::ops::list_provider_credentials_by_prefix(config, "channel:") - .await - .map_err(|e| format!("failed to list credentials: {e}"))?; - - let stored_providers: Vec = stored.iter().map(|p| p.provider.clone()).collect(); - - let defs = match channel_id { - Some(id) => { - let def = - find_channel_definition(id).ok_or_else(|| format!("unknown channel: {id}"))?; - vec![def] - } - None => all_channel_definitions(), - }; - - let mut entries = Vec::new(); - for def in &defs { - for spec in &def.auth_modes { - let provider_key = credential_provider(def.id, spec.mode); - let has_creds = stored_providers.iter().any(|p| p == &provider_key); - let has_config = channel_config_connected(config, def.id, spec.mode); - let connected = has_creds || has_config; - entries.push(ChannelStatusEntry { - channel_id: def.id.to_string(), - auth_mode: spec.mode, - connected, - // Reflect actual credential presence, not connection state: - // a config-only channel is `connected` but has no stored - // credentials. Collapsing these misleads callers that branch on - // credential presence (e.g. "needs re-auth" surfaces). - has_credentials: has_creds, - }); - } - } - - Ok(RpcOutcome::new(entries, vec![])) -} - -/// Return the slugs of all messaging channels currently connected, -/// merging the two storage layers OpenHuman uses for connection state. -/// -/// Two equally-authoritative sources exist today: -/// -/// * `config.channels_config.` — the legacy TOML field set by -/// credential-mode connects that need a runtime listener -/// (`bot_token` / `webhook` / `oauth`). These trigger -/// `restart_required = true` on the connect call. -/// * Provider credentials keyed `channel::` — set by the -/// newer managed-DM and OAuth flows that don't materialise a TOML -/// block but do persist a credential marker. -/// -/// Until both stores merge, any caller that only reads one will report -/// stale state to the user (e.g. the agent will say "Telegram not -/// connected" right after a managed-DM link succeeds — issue #1149). -/// This helper centralises the merge so every consumer agrees. -pub async fn connected_channel_slugs(config: &Config) -> Result, String> { - use std::collections::BTreeSet; - - let mut slugs: BTreeSet = BTreeSet::new(); - - // Layer 1: credential-mode channels written to TOML config. - let cc = &config.channels_config; - if cc.telegram.is_some() { - slugs.insert("telegram".to_string()); - } - if cc.discord.is_some() { - slugs.insert("discord".to_string()); - } - if cc.slack.is_some() { - slugs.insert("slack".to_string()); - } - if cc.mattermost.is_some() { - slugs.insert("mattermost".to_string()); - } - if cc.email.is_some() { - slugs.insert("email".to_string()); - } - if cc.whatsapp.is_some() { - slugs.insert("whatsapp".to_string()); - } - if cc.signal.is_some() { - slugs.insert("signal".to_string()); - } - if cc.matrix.is_some() { - slugs.insert("matrix".to_string()); - } - if cc.imessage.is_some() { - slugs.insert("imessage".to_string()); - } - if cc.yuanbao.is_some() { - slugs.insert("yuanbao".to_string()); - } - if cc.irc.is_some() { - slugs.insert("irc".to_string()); - } - if cc.lark.is_some() { - slugs.insert("lark".to_string()); - } - if cc.dingtalk.is_some() { - slugs.insert("dingtalk".to_string()); - } - if cc.linq.is_some() { - slugs.insert("linq".to_string()); - } - if cc.qq.is_some() { - slugs.insert("qq".to_string()); - } - - // Layer 2: managed-DM / OAuth channels stored only as credentials - // under `channel::`. - let stored = credentials::ops::list_provider_credentials_by_prefix(config, "channel:") - .await - .map_err(|e| format!("failed to list channel credentials: {e}"))?; - for entry in &stored { - // provider format: "channel::" — extract slug. - if let Some(rest) = entry.provider.strip_prefix("channel:") { - if let Some((slug, _mode)) = rest.split_once(':') { - if !slug.is_empty() { - slugs.insert(slug.to_string()); - } - } - } - } - - Ok(slugs.into_iter().collect()) -} - -/// Test a channel connection without persisting credentials. -pub async fn test_channel( - _config: &Config, - channel_id: &str, - auth_mode: ChannelAuthMode, - credentials_value: Value, -) -> Result, String> { - let def = find_channel_definition(channel_id) - .ok_or_else(|| format!("unknown channel: {channel_id}"))?; - - let creds_map = credentials_value - .as_object() - .ok_or("credentials must be a JSON object")?; - - // Validate fields first. - def.validate_credentials(auth_mode, creds_map)?; - - // For now, field validation is the test. A future version can instantiate - // the channel provider and call health_check(). - Ok(RpcOutcome::new( - ChannelTestResult { - success: true, - message: format!( - "Credentials for '{}' ({}) are structurally valid.", - channel_id, auth_mode - ), - }, - vec![], - )) -} - -// --------------------------------------------------------------------------- -// Managed Telegram login flow -// --------------------------------------------------------------------------- - -/// Default managed Telegram bot when `OPENHUMAN_APP_ENV` is staging and no username override is set. -const DEFAULT_TELEGRAM_BOT_USERNAME_STAGING: &str = "alphahumantest_bot"; -/// Default managed Telegram bot when app env is production (or unset) and no username override is set. -const DEFAULT_TELEGRAM_BOT_USERNAME_PRODUCTION: &str = "openhumanaibot"; - -/// Resolve the managed Telegram bot username from env, or from staging vs production defaults using -/// `OPENHUMAN_APP_ENV` / `VITE_OPENHUMAN_APP_ENV` (via `app_env_from_env`). -fn telegram_bot_username() -> String { - if let Ok(v) = std::env::var("OPENHUMAN_TELEGRAM_BOT_USERNAME") { - return v; - } - if let Ok(v) = std::env::var("VITE_TELEGRAM_BOT_USERNAME") { - return v; - } - if is_staging_app_env(app_env_from_env().as_deref()) { - return DEFAULT_TELEGRAM_BOT_USERNAME_STAGING.to_string(); - } - DEFAULT_TELEGRAM_BOT_USERNAME_PRODUCTION.to_string() -} - -/// Result from `telegram_login_start`. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[serde(rename_all = "camelCase")] -pub struct TelegramLoginStartResult { - /// The short-lived link token created by the backend. - pub link_token: String, - /// Full Telegram deep link URL the user should open. - pub telegram_url: String, - /// Bot username used. - pub bot_username: String, -} - -/// Result from `telegram_login_check`. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[serde(rename_all = "camelCase")] -pub struct TelegramLoginCheckResult { - /// Whether the Telegram user has been linked to the app user. - pub linked: bool, - /// Backend-provided status payload (may include telegramUserId, etc.). - #[serde(skip_serializing_if = "Option::is_none")] - pub details: Option, -} - -/// Step 1: Create a channel link token for Telegram and return the deep link URL. -/// -/// Requires an active session JWT. -pub async fn telegram_login_start( - config: &Config, -) -> Result, String> { - let api_url = effective_backend_api_url(&config.api_url); - let jwt = get_session_token(config)? - .ok_or_else(|| "session JWT required; complete login first".to_string())?; - - log::debug!( - "[telegram-login] creating channel link token via {}", - api_url - ); - - let client = BackendOAuthClient::new(&api_url).map_err(|e| e.to_string())?; - let payload = client - .create_channel_link_token("telegram", &jwt) - .await - .map_err(|e| format!("failed to create Telegram link token: {e}"))?; - - // Extract the link token from the backend response. - // Expected shape: { "linkToken": "..." } or { "token": "..." } - let link_token = payload - .get("linkToken") - .or_else(|| payload.get("token")) - .and_then(|v| v.as_str()) - .ok_or_else(|| { - format!( - "backend response missing linkToken field: {}", - serde_json::to_string(&payload).unwrap_or_default() - ) - })? - .trim() - .to_string(); - - if link_token.is_empty() { - return Err("backend returned empty link token".to_string()); - } - - let bot_username = telegram_bot_username(); - let telegram_url = format!("https://t.me/{}?start={}", bot_username, link_token); - - log::debug!( - "[telegram-login] link token created, deep link: {}", - telegram_url - ); - - Ok(RpcOutcome::new( - TelegramLoginStartResult { - link_token, - telegram_url, - bot_username, - }, - vec![], - )) -} - -/// Step 2: Check whether the user has completed the Telegram link (clicked /start). -/// -/// Polls `GET /auth/me` and checks whether the user profile now has a `telegramId`. -/// The frontend should poll this until `linked` becomes `true`. -/// On success, stores a `channel:telegram:managed_dm` credential marker locally. -pub async fn telegram_login_check( - config: &Config, - _link_token: &str, -) -> Result, String> { - let api_url = effective_backend_api_url(&config.api_url); - let jwt = get_session_token(config)?.ok_or_else(|| "session JWT required".to_string())?; - - log::debug!("[telegram-login] checking if user profile has telegramId via GET /auth/me"); - - let client = BackendOAuthClient::new(&api_url).map_err(|e| e.to_string())?; - let user_payload = client - .fetch_current_user(&jwt) - .await - .map_err(|e| format!("failed to fetch user profile: {e}"))?; - - // Check if the user now has a telegramId set. - let telegram_id = user_payload - .get("telegramId") - .and_then(|v| v.as_str()) - .filter(|s| !s.is_empty()) - .or_else(|| { - user_payload - .get("telegram_id") - .and_then(|v| v.as_str()) - .filter(|s| !s.is_empty()) - }); - - let linked = telegram_id.is_some(); - - log::debug!( - "[telegram-login] user profile has_telegram_id={}, linked={}", - telegram_id.is_some(), - linked - ); - - if linked { - // Store a credential marker so `channel_status` reports connected. - let provider_key = credential_provider("telegram", ChannelAuthMode::ManagedDm); - - let telegram_user_id = telegram_id.unwrap_or("").to_string(); - - let mut fields_map = serde_json::Map::new(); - fields_map.insert("linked".to_string(), Value::Bool(true)); - if !telegram_user_id.is_empty() { - fields_map.insert( - "telegram_user_id".to_string(), - Value::String(telegram_user_id), - ); - } - - // Store using a placeholder token (managed mode has no user-visible token). - credentials::ops::store_provider_credentials( - config, - &provider_key, - None, - Some("managed".to_string()), - Some(Value::Object(fields_map)), - Some(true), - ) - .await - .map_err(|e| format!("failed to store managed channel credentials: {e}"))?; - - log::info!( - "[telegram-login] Telegram managed DM linked; credentials stored as {}", - provider_key - ); - } - - Ok(RpcOutcome::new( - TelegramLoginCheckResult { - linked, - details: if linked { Some(user_payload) } else { None }, - }, - vec![], - )) -} - -// --------------------------------------------------------------------------- -// Discord managed link flow -// --------------------------------------------------------------------------- - -/// Result from `discord_link_start`. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[serde(rename_all = "camelCase")] -pub struct DiscordLinkStartResult { - /// The short-lived link token to paste into Discord. - pub link_token: String, - /// Human-readable instruction shown to the user. - pub instructions: String, -} - -/// Result from `discord_link_check`. -#[derive(Debug, Clone, Serialize, Deserialize)] -#[serde(rename_all = "camelCase")] -pub struct DiscordLinkCheckResult { - /// Whether the Discord account has been linked to the app user. - pub linked: bool, - /// Backend-provided status payload (may include discordId, etc.). - #[serde(skip_serializing_if = "Option::is_none")] - pub details: Option, -} - -/// Step 1: Create a Discord channel link token. -/// -/// Returns a short-lived token the user pastes into Discord as `!start `. -/// Requires an active session JWT. -pub async fn discord_link_start( - config: &Config, -) -> Result, String> { - let api_url = effective_backend_api_url(&config.api_url); - let jwt = get_session_token(config)? - .ok_or_else(|| "session JWT required; complete login first".to_string())?; - - log::debug!("[discord-link] creating channel link token via {}", api_url); - - let client = BackendOAuthClient::new(&api_url).map_err(|e| e.to_string())?; - let payload = client - .create_channel_link_token("discord", &jwt) - .await - .map_err(|e| format!("failed to create Discord link token: {e}"))?; - - let link_token = payload - .get("linkToken") - .or_else(|| payload.get("token")) - .and_then(|v| v.as_str()) - .ok_or_else(|| { - format!( - "backend response missing linkToken field: {}", - serde_json::to_string(&payload).unwrap_or_default() - ) - })? - .trim() - .to_string(); - - if link_token.is_empty() { - return Err("backend returned empty link token".to_string()); - } - - let instructions = - format!("In Discord, send this message to the OpenHuman bot: !start {link_token}"); - - log::debug!( - "[discord-link] link token created, length={}", - link_token.len() - ); - - Ok(RpcOutcome::new( - DiscordLinkStartResult { - link_token, - instructions, - }, - vec![], - )) -} - -/// Step 2: Check whether the user has completed the Discord link. -/// -/// Polls `GET /auth/me` and checks whether the user profile now has a `discordId`. -/// On success, stores a `channel:discord:managed_dm` credential marker locally. -pub async fn discord_link_check( - config: &Config, - _link_token: &str, -) -> Result, String> { - let api_url = effective_backend_api_url(&config.api_url); - let jwt = get_session_token(config)?.ok_or_else(|| "session JWT required".to_string())?; - - log::debug!("[discord-link] checking if user profile has discordId via GET /auth/me"); - - let client = BackendOAuthClient::new(&api_url).map_err(|e| e.to_string())?; - let user_payload = client - .fetch_current_user(&jwt) - .await - .map_err(|e| format!("failed to fetch user profile: {e}"))?; - - let discord_id = user_payload - .get("discordId") - .and_then(|v| v.as_str()) - .filter(|s| !s.is_empty()) - .or_else(|| { - user_payload - .get("discord_id") - .and_then(|v| v.as_str()) - .filter(|s| !s.is_empty()) - }); - - let linked = discord_id.is_some(); - - log::debug!( - "[discord-link] user profile has_discord_id={}, linked={}", - discord_id.is_some(), - linked - ); - - if linked { - let provider_key = credential_provider("discord", ChannelAuthMode::ManagedDm); - let discord_user_id = discord_id.unwrap_or("").to_string(); - - let mut fields_map = serde_json::Map::new(); - fields_map.insert("linked".to_string(), Value::Bool(true)); - if !discord_user_id.is_empty() { - fields_map.insert( - "discord_user_id".to_string(), - Value::String(discord_user_id), - ); - } - - credentials::ops::store_provider_credentials( - config, - &provider_key, - None, - Some("managed".to_string()), - Some(Value::Object(fields_map)), - Some(true), - ) - .await - .map_err(|e| format!("failed to store Discord managed channel credentials: {e}"))?; - - log::info!( - "[discord-link] Discord managed DM linked; credentials stored as {}", - provider_key - ); - } - - Ok(RpcOutcome::new( - DiscordLinkCheckResult { - linked, - details: if linked { Some(user_payload) } else { None }, - }, - vec![], - )) -} - -// --------------------------------------------------------------------------- -// Channel messaging, reactions, and thread management -// --------------------------------------------------------------------------- - -/// Send a rich message to a channel via the backend API. -pub async fn channel_send_message( - config: &Config, - channel: &str, - message: Value, -) -> Result, String> { - let api_url = effective_backend_api_url(&config.api_url); - let jwt = get_session_token(config)? - .ok_or_else(|| "session JWT required; complete login first".to_string())?; - - log::debug!( - "[channels] sending message to channel '{}' via {}", - channel, - api_url - ); - - let client = BackendOAuthClient::new(&api_url).map_err(|e| e.to_string())?; - let result = client - .send_channel_message(channel, &jwt, message) - .await - .map_err(|e| format!("failed to send channel message: {e}"))?; - - log::debug!("[channels] send_message response: {:?}", result); - - Ok(RpcOutcome::new(result, vec![])) -} - -/// Send a reaction to a message in a channel via the backend API. -pub async fn channel_send_reaction( - config: &Config, - channel: &str, - reaction: Value, -) -> Result, String> { - let api_url = effective_backend_api_url(&config.api_url); - let jwt = get_session_token(config)? - .ok_or_else(|| "session JWT required; complete login first".to_string())?; - - log::debug!( - "[channels] sending reaction to channel '{}' via {}", - channel, - api_url - ); - - let client = BackendOAuthClient::new(&api_url).map_err(|e| e.to_string())?; - let result = client - .send_channel_reaction(channel, &jwt, reaction) - .await - .map_err(|e| format!("failed to send channel reaction: {e}"))?; - - log::debug!("[channels] send_reaction response: {:?}", result); - - Ok(RpcOutcome::new(result, vec![])) -} - -/// Create a thread in a channel via the backend API. -pub async fn channel_create_thread( - config: &Config, - channel: &str, - title: &str, -) -> Result, String> { - let api_url = effective_backend_api_url(&config.api_url); - let jwt = get_session_token(config)? - .ok_or_else(|| "session JWT required; complete login first".to_string())?; - - log::debug!( - "[channels] creating thread in channel '{}' title='{}' via {}", - channel, - title, - api_url - ); - - let client = BackendOAuthClient::new(&api_url).map_err(|e| e.to_string())?; - let result = client - .create_channel_thread(channel, &jwt, title) - .await - .map_err(|e| format!("failed to create channel thread: {e}"))?; - - log::debug!("[channels] create_thread response: {:?}", result); - - Ok(RpcOutcome::new(result, vec![])) -} - -/// Close or reopen a thread in a channel via the backend API. -pub async fn channel_update_thread( - config: &Config, - channel: &str, - thread_id: &str, - action: &str, -) -> Result, String> { - let api_url = effective_backend_api_url(&config.api_url); - let jwt = get_session_token(config)? - .ok_or_else(|| "session JWT required; complete login first".to_string())?; - - log::debug!( - "[channels] updating thread '{}' in channel '{}' action='{}' via {}", - thread_id, - channel, - action, - api_url - ); - - let client = BackendOAuthClient::new(&api_url).map_err(|e| e.to_string())?; - let result = client - .update_channel_thread(channel, &jwt, thread_id, action) - .await - .map_err(|e| format!("failed to update channel thread: {e}"))?; - - log::debug!("[channels] update_thread response: {:?}", result); - - Ok(RpcOutcome::new(result, vec![])) -} - -/// List threads in a channel via the backend API. -pub async fn channel_list_threads( - config: &Config, - channel: &str, - active: Option, -) -> Result, String> { - let api_url = effective_backend_api_url(&config.api_url); - let jwt = get_session_token(config)? - .ok_or_else(|| "session JWT required; complete login first".to_string())?; - - log::debug!( - "[channels] listing threads in channel '{}' active={:?} via {}", - channel, - active, - api_url - ); - - let client = BackendOAuthClient::new(&api_url).map_err(|e| e.to_string())?; - let result = client - .list_channel_threads(channel, &jwt, active) - .await - .map_err(|e| format!("failed to list channel threads: {e}"))?; - - log::debug!("[channels] list_threads response: {:?}", result); - - Ok(RpcOutcome::new(result, vec![])) -} - -// --------------------------------------------------------------------------- -// Discord guild/channel discovery -// --------------------------------------------------------------------------- - -/// Retrieve the stored Discord bot token from credentials. -async fn discord_bot_token(config: &Config) -> Result { - let provider_key = credential_provider("discord", ChannelAuthMode::BotToken); - let auth = credentials::AuthService::from_config(config); - let profile = auth - .get_profile(&provider_key, None) - .map_err(|e| format!("failed to load Discord credentials: {e}"))? - .ok_or("Discord bot token not configured. Connect Discord first.")?; - - let token = profile.token.unwrap_or_default(); - if token.is_empty() { - return Err("Discord bot token is empty.".to_string()); - } - Ok(token) -} - -/// List Discord guilds (servers) the connected bot is a member of. -pub async fn discord_list_guilds( - config: &Config, -) -> Result< - RpcOutcome>, - String, -> { - use crate::openhuman::channels::providers::discord::api; - - let token = discord_bot_token(config).await?; - let guilds = api::list_bot_guilds(&token) - .await - .map_err(|e| format!("Discord API error: {e}"))?; - Ok(RpcOutcome::single_log(guilds, "discord guilds listed")) -} - -/// List text channels in a Discord guild. -pub async fn discord_list_channels( - config: &Config, - guild_id: &str, -) -> Result< - RpcOutcome>, - String, -> { - use crate::openhuman::channels::providers::discord::api; - - if guild_id.is_empty() { - return Err("guild_id is required".to_string()); - } - let token = discord_bot_token(config).await?; - let channels = api::list_guild_channels(&token, guild_id) - .await - .map_err(|e| format!("Discord API error: {e}"))?; - Ok(RpcOutcome::single_log( - channels, - format!("discord channels listed for guild {guild_id}"), - )) -} - -/// Check bot permissions in a Discord channel. -pub async fn discord_check_permissions( - config: &Config, - guild_id: &str, - channel_id: &str, -) -> Result< - RpcOutcome, - String, -> { - use crate::openhuman::channels::providers::discord::api; - - if guild_id.is_empty() || channel_id.is_empty() { - return Err("guild_id and channel_id are required".to_string()); - } - let token = discord_bot_token(config).await?; - let check = api::check_channel_permissions(&token, guild_id, channel_id) - .await - .map_err(|e| format!("Discord API error: {e}"))?; - Ok(RpcOutcome::single_log( - check, - format!("discord permissions checked for channel {channel_id}"), - )) -} - -#[cfg(test)] -#[path = "ops_tests.rs"] -mod tests; diff --git a/src/openhuman/channels/controllers/ops/connect.rs b/src/openhuman/channels/controllers/ops/connect.rs new file mode 100644 index 0000000000..1021dd7d06 --- /dev/null +++ b/src/openhuman/channels/controllers/ops/connect.rs @@ -0,0 +1,649 @@ +//! Core channel connect/disconnect/status operations. + +use serde_json::{json, Value}; + +use crate::openhuman::channels::providers::yuanbao::YuanbaoConfig; +use crate::openhuman::config::{Config, DiscordConfig, IMessageConfig, TelegramConfig}; +use crate::openhuman::credentials; +use crate::openhuman::memory_store::chunks::store as memory_tree_store; +use crate::openhuman::memory_store::chunks::types::SourceKind; +use crate::rpc::RpcOutcome; + +use super::super::definitions::{ + all_channel_definitions, find_channel_definition, ChannelAuthMode, ChannelDefinition, +}; +use super::types::{ChannelConnectionResult, ChannelStatusEntry, ChannelTestResult}; +use super::yuanbao::{ + build_effective_yuanbao_config, require_yuanbao_field, verify_yuanbao_credentials, +}; + +/// Credential provider key for channel connections: `"channel:{id}:{mode}"`. +pub(crate) fn credential_provider(channel_id: &str, mode: ChannelAuthMode) -> String { + format!("channel:{}:{}", channel_id, mode) +} + +pub(crate) fn channel_config_connected( + config: &Config, + channel_id: &str, + mode: ChannelAuthMode, +) -> bool { + let channels = &config.channels_config; + match (channel_id, mode) { + ("telegram", ChannelAuthMode::BotToken) => channels.telegram.is_some(), + ("discord", ChannelAuthMode::BotToken) => channels.discord.is_some(), + ("slack", _) => channels.slack.is_some(), + ("mattermost", _) => channels.mattermost.is_some(), + ("imessage", ChannelAuthMode::ManagedDm) => channels.imessage.is_some(), + ("matrix", _) => channels.matrix.is_some(), + ("signal", _) => channels.signal.is_some(), + ("whatsapp", _) => channels.whatsapp.is_some(), + ("linq", _) => channels.linq.is_some(), + ("email", _) => channels.email.is_some(), + ("irc", _) => channels.irc.is_some(), + ("lark", _) => channels.lark.is_some(), + ("dingtalk", _) => channels.dingtalk.is_some(), + ("qq", _) => channels.qq.is_some(), + ("yuanbao", ChannelAuthMode::ApiKey) => channels.yuanbao.is_some(), + _ => false, + } +} + +pub(crate) fn parse_allowed_users(value: Option<&Value>) -> Vec { + let mut out: Vec = Vec::new(); + + let mut push_identity = |raw: &str| { + let trimmed = raw.trim(); + if trimmed.is_empty() { + return; + } + let normalized = trimmed.trim_start_matches('@').trim(); + if normalized.is_empty() { + return; + } + let canonical = normalized.to_lowercase(); + if !out + .iter() + .any(|existing| existing.eq_ignore_ascii_case(&canonical)) + { + out.push(canonical); + } + }; + + match value { + Some(Value::String(s)) => { + for part in s.split([',', '\n', '\r']) { + push_identity(part); + } + } + Some(Value::Array(items)) => { + for item in items { + if let Some(s) = item.as_str() { + for part in s.split([',', '\n', '\r']) { + push_identity(part); + } + } + } + } + _ => {} + } + + out +} + +pub(super) fn parse_optional_bool(value: Option<&Value>) -> Option { + match value { + Some(Value::Bool(b)) => Some(*b), + Some(Value::Number(n)) => n.as_i64().map(|v| v != 0), + Some(Value::String(s)) => { + let normalized = s.trim().to_ascii_lowercase(); + match normalized.as_str() { + "1" | "true" | "yes" | "on" => Some(true), + "0" | "false" | "no" | "off" => Some(false), + _ => None, + } + } + _ => None, + } +} + +fn clear_channel_memory(config: &Config, channel_id: &str) -> anyhow::Result { + let exact = memory_tree_store::delete_chunks_by_source(config, SourceKind::Chat, channel_id)?; + let prefixed = memory_tree_store::delete_chunks_by_source_prefix( + config, + SourceKind::Chat, + &format!("{channel_id}:"), + )?; + Ok(exact + prefixed) +} + +/// List all available channel definitions. +pub async fn list_channels() -> Result>, String> { + Ok(RpcOutcome::new(all_channel_definitions(), vec![])) +} + +/// Describe a single channel by id. +pub async fn describe_channel(channel_id: &str) -> Result, String> { + let def = find_channel_definition(channel_id) + .ok_or_else(|| format!("unknown channel: {channel_id}"))?; + Ok(RpcOutcome::new(def, vec![])) +} + +/// Initiate a channel connection. +/// +/// For `BotToken`/`ApiKey` modes: validates fields and stores credentials. +/// For `OAuth`/`ManagedDm` modes: returns the auth action the frontend should handle. +pub async fn connect_channel( + config: &Config, + channel_id: &str, + auth_mode: ChannelAuthMode, + credentials_value: Value, +) -> Result, String> { + let def = find_channel_definition(channel_id) + .ok_or_else(|| format!("unknown channel: {channel_id}"))?; + + let spec = def.auth_mode_spec(auth_mode).ok_or_else(|| { + format!( + "channel '{}' does not support auth mode '{}'", + channel_id, auth_mode + ) + })?; + + // For OAuth/managed modes, return the auth action without storing credentials. + if let Some(action) = spec.auth_action { + return Ok(RpcOutcome::new( + ChannelConnectionResult { + status: "pending_auth".to_string(), + restart_required: false, + auth_action: Some(action.to_string()), + message: Some(format!("Initiate '{}' auth flow on the frontend. Ignore if you are already in the auth flow.", action)), + }, + vec![], + )); + } + + // Credential-based modes: validate required fields. + let creds_map = credentials_value + .as_object() + .ok_or("credentials must be a JSON object")?; + + def.validate_credentials(auth_mode, creds_map)?; + + // Yuanbao: build the effective config (with any client-supplied + // endpoint overrides applied) once, verify against THAT cluster, and + // reuse the same config for persistence below. This prevents the + // verifier from validating against prod while the runtime then + // reconnects to a pre-release cluster after restart. + let mut prebuilt_yuanbao_config: Option = None; + if channel_id == "yuanbao" && auth_mode == ChannelAuthMode::ApiKey { + let app_key = require_yuanbao_field(creds_map, "app_key")?; + let app_secret = require_yuanbao_field(creds_map, "app_secret")?; + let base = config.channels_config.yuanbao.clone().unwrap_or_default(); + let effective = build_effective_yuanbao_config(base, creds_map, app_key); + verify_yuanbao_credentials(&effective, &app_secret).await?; + prebuilt_yuanbao_config = Some(effective); + } + + // iMessage is local-only (no credentials): persist channels_config + return connected. + if channel_id == "imessage" && auth_mode == ChannelAuthMode::ManagedDm { + let allowed_contacts = parse_allowed_users(creds_map.get("allowed_contacts")); + let allowed_contacts_count = allowed_contacts.len(); + + let mut persisted = config.clone(); + persisted.channels_config.imessage = Some(IMessageConfig { allowed_contacts }); + + persisted + .save() + .await + .map_err(|e| format!("failed to persist imessage config.toml: {e}"))?; + + tracing::info!( + target: "openhuman::channels", + allowed_contacts_count, + "[imessage] connect_channel: wrote channels_config.imessage; restart core for AppleScript bridge to load" + ); + + return Ok(RpcOutcome::single_log( + ChannelConnectionResult { + status: "connected".to_string(), + restart_required: true, + auth_action: None, + message: Some( + "iMessage channel configured. Grant Full Disk Access and restart the service to activate.".to_string(), + ), + }, + "stored imessage channel config (local-only)".to_string(), + )); + } + + // Store credentials via the credentials domain. + let provider_key = credential_provider(channel_id, auth_mode); + + // Extract the primary token field (bot_token or api_key) if present. + let token = creds_map + .get("bot_token") + .or_else(|| creds_map.get("api_key")) + .and_then(|v| v.as_str()) + .map(|s| s.to_string()); + + // Store remaining fields as metadata. + let fields = if creds_map.len() > 1 || (creds_map.len() == 1 && token.is_none()) { + Some(Value::Object(creds_map.clone())) + } else { + None + }; + + credentials::ops::store_provider_credentials( + config, + &provider_key, + None, // default profile + token, + fields, + Some(true), + ) + .await + .map_err(|e| format!("failed to store credentials: {e}"))?; + + // Keep runtime channel config in sync so listeners can actually start + // with the credentials just connected from the UI. + if channel_id == "telegram" && auth_mode == ChannelAuthMode::BotToken { + let bot_token = creds_map + .get("bot_token") + .and_then(|v| v.as_str()) + .map(str::trim) + .filter(|s| !s.is_empty()) + .ok_or_else(|| "missing required bot_token".to_string())? + .to_string(); + let allowed_users = parse_allowed_users(creds_map.get("allowed_users")); + let allowed_users_count = allowed_users.len(); + + let mut persisted = config.clone(); + let (stream_mode, draft_update_interval_ms, silent_streaming, mention_only) = + if let Some(existing) = persisted.channels_config.telegram.as_ref() { + ( + existing.stream_mode, + existing.draft_update_interval_ms, + existing.silent_streaming, + existing.mention_only, + ) + } else { + ( + crate::openhuman::config::StreamMode::default(), + 1000, + true, + false, + ) + }; + + persisted.channels_config.telegram = Some(TelegramConfig { + bot_token, + allowed_users, + stream_mode, + draft_update_interval_ms, + silent_streaming, + mention_only, + }); + + persisted + .save() + .await + .map_err(|e| format!("failed to persist telegram config.toml: {e}"))?; + + tracing::info!( + target: "openhuman::channels", + allowed_users_count, + mention_only, + "[telegram] connect_channel: wrote channels_config.telegram; restart core for listener to load token" + ); + } else if channel_id == "discord" && auth_mode == ChannelAuthMode::BotToken { + let bot_token = creds_map + .get("bot_token") + .and_then(|v| v.as_str()) + .map(str::trim) + .filter(|s| !s.is_empty()) + .ok_or_else(|| "missing required bot_token".to_string())? + .to_string(); + + let guild_id = creds_map + .get("guild_id") + .and_then(|v| v.as_str()) + .map(str::trim) + .filter(|s| !s.is_empty()) + .map(|s| s.to_string()); + let discord_channel_id = creds_map + .get("channel_id") + .and_then(|v| v.as_str()) + .map(str::trim) + .filter(|s| !s.is_empty()) + .map(|s| s.to_string()); + + let mut persisted = config.clone(); + let existing = persisted.channels_config.discord.as_ref(); + let parsed_allowed_users = parse_allowed_users(creds_map.get("allowed_users")); + let allowed_users = if parsed_allowed_users.is_empty() { + existing + .map(|cfg| cfg.allowed_users.clone()) + .unwrap_or_default() + } else { + parsed_allowed_users + }; + let allowed_users_count = allowed_users.len(); + let listen_to_bots = parse_optional_bool(creds_map.get("listen_to_bots")) + .unwrap_or_else(|| existing.map(|cfg| cfg.listen_to_bots).unwrap_or(false)); + let mention_only = parse_optional_bool(creds_map.get("mention_only")) + .unwrap_or_else(|| existing.map(|cfg| cfg.mention_only).unwrap_or(false)); + + persisted.channels_config.discord = Some(DiscordConfig { + bot_token, + guild_id: guild_id.clone(), + channel_id: discord_channel_id.clone(), + allowed_users, + listen_to_bots, + mention_only, + }); + + persisted + .save() + .await + .map_err(|e| format!("failed to persist discord config.toml: {e}"))?; + + tracing::info!( + target: "openhuman::channels", + has_guild_id = guild_id.is_some(), + has_channel_id = discord_channel_id.is_some(), + allowed_users_count, + listen_to_bots, + mention_only, + "[discord] connect_channel: wrote channels_config.discord; restart core for listener to load token" + ); + } else if channel_id == "yuanbao" && auth_mode == ChannelAuthMode::ApiKey { + // Reuse the effective config built above (with `env` / `api_domain` + // / `ws_domain` / `route_env` overrides already applied and + // `app_secret` already cleared) so persistence and verification + // can never diverge. + let yb_config = prebuilt_yuanbao_config.take().ok_or_else(|| { + "internal error: yuanbao config not built before persistence".to_string() + })?; + + let mut persisted = config.clone(); + persisted.channels_config.yuanbao = Some(yb_config); + + persisted + .save() + .await + .map_err(|e| format!("failed to persist yuanbao config.toml: {e}"))?; + + tracing::info!( + target: "openhuman::channels", + "[yuanbao] connect_channel: wrote channels_config.yuanbao (secret stored in credentials); restart core for WS listener" + ); + } + + Ok(RpcOutcome::single_log( + ChannelConnectionResult { + status: "connected".to_string(), + restart_required: true, + auth_action: None, + message: Some(format!( + "Channel '{}' credentials stored. Restart the service to activate.", + channel_id + )), + }, + format!("stored credentials for {}", provider_key), + )) +} + +/// Disconnect a channel by removing stored credentials. +pub async fn disconnect_channel( + config: &Config, + channel_id: &str, + auth_mode: ChannelAuthMode, + clear_memory: bool, +) -> Result, String> { + // Verify channel exists. + find_channel_definition(channel_id).ok_or_else(|| format!("unknown channel: {channel_id}"))?; + + let provider_key = credential_provider(channel_id, auth_mode); + + // iMessage has no stored credentials (local-only); skip credential removal. + if !(channel_id == "imessage" && auth_mode == ChannelAuthMode::ManagedDm) { + credentials::ops::remove_provider_credentials(config, &provider_key, None) + .await + .map_err(|e| format!("failed to remove credentials: {e}"))?; + } + + if channel_id == "telegram" && auth_mode == ChannelAuthMode::BotToken { + let mut persisted = config.clone(); + if persisted.channels_config.telegram.take().is_some() { + persisted + .save() + .await + .map_err(|e| format!("failed to clear telegram config.toml: {e}"))?; + tracing::info!( + target: "openhuman::channels", + "[telegram] disconnect_channel: cleared channels_config.telegram" + ); + } + } else if channel_id == "discord" && auth_mode == ChannelAuthMode::BotToken { + let mut persisted = config.clone(); + if persisted.channels_config.discord.take().is_some() { + persisted + .save() + .await + .map_err(|e| format!("failed to clear discord config.toml: {e}"))?; + tracing::info!( + target: "openhuman::channels", + "[discord] disconnect_channel: cleared channels_config.discord" + ); + } + } else if channel_id == "imessage" && auth_mode == ChannelAuthMode::ManagedDm { + let mut persisted = config.clone(); + if persisted.channels_config.imessage.take().is_some() { + persisted + .save() + .await + .map_err(|e| format!("failed to clear imessage config.toml: {e}"))?; + tracing::info!( + target: "openhuman::channels", + "[imessage] disconnect_channel: cleared channels_config.imessage" + ); + } + } else if channel_id == "yuanbao" && auth_mode == ChannelAuthMode::ApiKey { + let mut persisted = config.clone(); + if persisted.channels_config.yuanbao.take().is_some() { + persisted + .save() + .await + .map_err(|e| format!("failed to clear yuanbao config.toml: {e}"))?; + tracing::info!( + target: "openhuman::channels", + "[yuanbao] disconnect_channel: cleared channels_config.yuanbao" + ); + } + } + + let memory_chunks_deleted = if clear_memory { + clear_channel_memory(config, channel_id).map_err(|e| { + format!("channel disconnected, but failed to clear memory chunks: {e:#}") + })? + } else { + 0 + }; + + Ok(RpcOutcome::single_log( + json!({ + "channel": channel_id, + "auth_mode": auth_mode, + "disconnected": true, + "restart_required": true, + "memory_chunks_deleted": memory_chunks_deleted, + }), + format!("removed credentials for {}", provider_key), + )) +} + +/// Get connection status for one or all channels. +pub async fn channel_status( + config: &Config, + channel_id: Option<&str>, +) -> Result>, String> { + // List all stored credentials with "channel:" prefix. Uses the + // prefix-match helper because channel credentials are keyed as + // `channel::` and no single literal value matches them + // through `list_provider_credentials`'s exact-match filter. + let stored = credentials::ops::list_provider_credentials_by_prefix(config, "channel:") + .await + .map_err(|e| format!("failed to list credentials: {e}"))?; + + let stored_providers: Vec = stored.iter().map(|p| p.provider.clone()).collect(); + + let defs = match channel_id { + Some(id) => { + let def = + find_channel_definition(id).ok_or_else(|| format!("unknown channel: {id}"))?; + vec![def] + } + None => all_channel_definitions(), + }; + + let mut entries = Vec::new(); + for def in &defs { + for spec in &def.auth_modes { + let provider_key = credential_provider(def.id, spec.mode); + let has_creds = stored_providers.iter().any(|p| p == &provider_key); + let has_config = channel_config_connected(config, def.id, spec.mode); + let connected = has_creds || has_config; + entries.push(ChannelStatusEntry { + channel_id: def.id.to_string(), + auth_mode: spec.mode, + connected, + // Reflect actual credential presence, not connection state: + // a config-only channel is `connected` but has no stored + // credentials. Collapsing these misleads callers that branch on + // credential presence (e.g. "needs re-auth" surfaces). + has_credentials: has_creds, + }); + } + } + + Ok(RpcOutcome::new(entries, vec![])) +} + +/// Return the slugs of all messaging channels currently connected, +/// merging the two storage layers OpenHuman uses for connection state. +/// +/// Two equally-authoritative sources exist today: +/// +/// * `config.channels_config.` — the legacy TOML field set by +/// credential-mode connects that need a runtime listener +/// (`bot_token` / `webhook` / `oauth`). These trigger +/// `restart_required = true` on the connect call. +/// * Provider credentials keyed `channel::` — set by the +/// newer managed-DM and OAuth flows that don't materialise a TOML +/// block but do persist a credential marker. +/// +/// Until both stores merge, any caller that only reads one will report +/// stale state to the user (e.g. the agent will say "Telegram not +/// connected" right after a managed-DM link succeeds — issue #1149). +/// This helper centralises the merge so every consumer agrees. +pub async fn connected_channel_slugs(config: &Config) -> Result, String> { + use std::collections::BTreeSet; + + let mut slugs: BTreeSet = BTreeSet::new(); + + // Layer 1: credential-mode channels written to TOML config. + let cc = &config.channels_config; + if cc.telegram.is_some() { + slugs.insert("telegram".to_string()); + } + if cc.discord.is_some() { + slugs.insert("discord".to_string()); + } + if cc.slack.is_some() { + slugs.insert("slack".to_string()); + } + if cc.mattermost.is_some() { + slugs.insert("mattermost".to_string()); + } + if cc.email.is_some() { + slugs.insert("email".to_string()); + } + if cc.whatsapp.is_some() { + slugs.insert("whatsapp".to_string()); + } + if cc.signal.is_some() { + slugs.insert("signal".to_string()); + } + if cc.matrix.is_some() { + slugs.insert("matrix".to_string()); + } + if cc.imessage.is_some() { + slugs.insert("imessage".to_string()); + } + if cc.yuanbao.is_some() { + slugs.insert("yuanbao".to_string()); + } + if cc.irc.is_some() { + slugs.insert("irc".to_string()); + } + if cc.lark.is_some() { + slugs.insert("lark".to_string()); + } + if cc.dingtalk.is_some() { + slugs.insert("dingtalk".to_string()); + } + if cc.linq.is_some() { + slugs.insert("linq".to_string()); + } + if cc.qq.is_some() { + slugs.insert("qq".to_string()); + } + + // Layer 2: managed-DM / OAuth channels stored only as credentials + // under `channel::`. + let stored = credentials::ops::list_provider_credentials_by_prefix(config, "channel:") + .await + .map_err(|e| format!("failed to list channel credentials: {e}"))?; + for entry in &stored { + // provider format: "channel::" — extract slug. + if let Some(rest) = entry.provider.strip_prefix("channel:") { + if let Some((slug, _mode)) = rest.split_once(':') { + if !slug.is_empty() { + slugs.insert(slug.to_string()); + } + } + } + } + + Ok(slugs.into_iter().collect()) +} + +/// Test a channel connection without persisting credentials. +pub async fn test_channel( + _config: &Config, + channel_id: &str, + auth_mode: ChannelAuthMode, + credentials_value: Value, +) -> Result, String> { + let def = find_channel_definition(channel_id) + .ok_or_else(|| format!("unknown channel: {channel_id}"))?; + + let creds_map = credentials_value + .as_object() + .ok_or("credentials must be a JSON object")?; + + // Validate fields first. + def.validate_credentials(auth_mode, creds_map)?; + + // For now, field validation is the test. A future version can instantiate + // the channel provider and call health_check(). + Ok(RpcOutcome::new( + ChannelTestResult { + success: true, + message: format!( + "Credentials for '{}' ({}) are structurally valid.", + channel_id, auth_mode + ), + }, + vec![], + )) +} diff --git a/src/openhuman/channels/controllers/ops/discord.rs b/src/openhuman/channels/controllers/ops/discord.rs new file mode 100644 index 0000000000..0629b00fbb --- /dev/null +++ b/src/openhuman/channels/controllers/ops/discord.rs @@ -0,0 +1,231 @@ +//! Discord managed link flow and guild/channel discovery. + +use serde_json::Value; + +use crate::api::config::effective_backend_api_url; +use crate::api::jwt::get_session_token; +use crate::api::rest::BackendOAuthClient; +use crate::openhuman::config::Config; +use crate::openhuman::credentials; +use crate::rpc::RpcOutcome; + +use super::super::definitions::ChannelAuthMode; +use super::connect::credential_provider; +use super::types::{DiscordLinkCheckResult, DiscordLinkStartResult}; + +// --------------------------------------------------------------------------- +// Discord managed link flow +// --------------------------------------------------------------------------- + +/// Step 1: Create a Discord channel link token. +/// +/// Returns a short-lived token the user pastes into Discord as `!start `. +/// Requires an active session JWT. +pub async fn discord_link_start( + config: &Config, +) -> Result, String> { + let api_url = effective_backend_api_url(&config.api_url); + let jwt = get_session_token(config)? + .ok_or_else(|| "session JWT required; complete login first".to_string())?; + + log::debug!("[discord-link] creating channel link token via {}", api_url); + + let client = BackendOAuthClient::new(&api_url).map_err(|e| e.to_string())?; + let payload = client + .create_channel_link_token("discord", &jwt) + .await + .map_err(|e| format!("failed to create Discord link token: {e}"))?; + + let link_token = payload + .get("linkToken") + .or_else(|| payload.get("token")) + .and_then(|v| v.as_str()) + .ok_or_else(|| { + format!( + "backend response missing linkToken field: {}", + serde_json::to_string(&payload).unwrap_or_default() + ) + })? + .trim() + .to_string(); + + if link_token.is_empty() { + return Err("backend returned empty link token".to_string()); + } + + let instructions = + format!("In Discord, send this message to the OpenHuman bot: !start {link_token}"); + + log::debug!( + "[discord-link] link token created, length={}", + link_token.len() + ); + + Ok(RpcOutcome::new( + DiscordLinkStartResult { + link_token, + instructions, + }, + vec![], + )) +} + +/// Step 2: Check whether the user has completed the Discord link. +/// +/// Polls `GET /auth/me` and checks whether the user profile now has a `discordId`. +/// On success, stores a `channel:discord:managed_dm` credential marker locally. +pub async fn discord_link_check( + config: &Config, + _link_token: &str, +) -> Result, String> { + let api_url = effective_backend_api_url(&config.api_url); + let jwt = get_session_token(config)?.ok_or_else(|| "session JWT required".to_string())?; + + log::debug!("[discord-link] checking if user profile has discordId via GET /auth/me"); + + let client = BackendOAuthClient::new(&api_url).map_err(|e| e.to_string())?; + let user_payload = client + .fetch_current_user(&jwt) + .await + .map_err(|e| format!("failed to fetch user profile: {e}"))?; + + let discord_id = user_payload + .get("discordId") + .and_then(|v| v.as_str()) + .filter(|s| !s.is_empty()) + .or_else(|| { + user_payload + .get("discord_id") + .and_then(|v| v.as_str()) + .filter(|s| !s.is_empty()) + }); + + let linked = discord_id.is_some(); + + log::debug!( + "[discord-link] user profile has_discord_id={}, linked={}", + discord_id.is_some(), + linked + ); + + if linked { + let provider_key = credential_provider("discord", ChannelAuthMode::ManagedDm); + let discord_user_id = discord_id.unwrap_or("").to_string(); + + let mut fields_map = serde_json::Map::new(); + fields_map.insert("linked".to_string(), Value::Bool(true)); + if !discord_user_id.is_empty() { + fields_map.insert( + "discord_user_id".to_string(), + Value::String(discord_user_id), + ); + } + + credentials::ops::store_provider_credentials( + config, + &provider_key, + None, + Some("managed".to_string()), + Some(Value::Object(fields_map)), + Some(true), + ) + .await + .map_err(|e| format!("failed to store Discord managed channel credentials: {e}"))?; + + log::info!( + "[discord-link] Discord managed DM linked; credentials stored as {}", + provider_key + ); + } + + Ok(RpcOutcome::new( + DiscordLinkCheckResult { + linked, + details: if linked { Some(user_payload) } else { None }, + }, + vec![], + )) +} + +// --------------------------------------------------------------------------- +// Discord guild/channel discovery +// --------------------------------------------------------------------------- + +/// Retrieve the stored Discord bot token from credentials. +async fn discord_bot_token(config: &Config) -> Result { + let provider_key = credential_provider("discord", ChannelAuthMode::BotToken); + let auth = credentials::AuthService::from_config(config); + let profile = auth + .get_profile(&provider_key, None) + .map_err(|e| format!("failed to load Discord credentials: {e}"))? + .ok_or("Discord bot token not configured. Connect Discord first.")?; + + let token = profile.token.unwrap_or_default(); + if token.is_empty() { + return Err("Discord bot token is empty.".to_string()); + } + Ok(token) +} + +/// List Discord guilds (servers) the connected bot is a member of. +pub async fn discord_list_guilds( + config: &Config, +) -> Result< + RpcOutcome>, + String, +> { + use crate::openhuman::channels::providers::discord::api; + + let token = discord_bot_token(config).await?; + let guilds = api::list_bot_guilds(&token) + .await + .map_err(|e| format!("Discord API error: {e}"))?; + Ok(RpcOutcome::single_log(guilds, "discord guilds listed")) +} + +/// List text channels in a Discord guild. +pub async fn discord_list_channels( + config: &Config, + guild_id: &str, +) -> Result< + RpcOutcome>, + String, +> { + use crate::openhuman::channels::providers::discord::api; + + if guild_id.is_empty() { + return Err("guild_id is required".to_string()); + } + let token = discord_bot_token(config).await?; + let channels = api::list_guild_channels(&token, guild_id) + .await + .map_err(|e| format!("Discord API error: {e}"))?; + Ok(RpcOutcome::single_log( + channels, + format!("discord channels listed for guild {guild_id}"), + )) +} + +/// Check bot permissions in a Discord channel. +pub async fn discord_check_permissions( + config: &Config, + guild_id: &str, + channel_id: &str, +) -> Result< + RpcOutcome, + String, +> { + use crate::openhuman::channels::providers::discord::api; + + if guild_id.is_empty() || channel_id.is_empty() { + return Err("guild_id and channel_id are required".to_string()); + } + let token = discord_bot_token(config).await?; + let check = api::check_channel_permissions(&token, guild_id, channel_id) + .await + .map_err(|e| format!("Discord API error: {e}"))?; + Ok(RpcOutcome::single_log( + check, + format!("discord permissions checked for channel {channel_id}"), + )) +} diff --git a/src/openhuman/channels/controllers/ops/messaging.rs b/src/openhuman/channels/controllers/ops/messaging.rs new file mode 100644 index 0000000000..72567e3389 --- /dev/null +++ b/src/openhuman/channels/controllers/ops/messaging.rs @@ -0,0 +1,149 @@ +//! Channel messaging, reactions, and thread management. + +use serde_json::Value; + +use crate::api::config::effective_backend_api_url; +use crate::api::jwt::get_session_token; +use crate::api::rest::BackendOAuthClient; +use crate::openhuman::config::Config; +use crate::rpc::RpcOutcome; + +/// Send a rich message to a channel via the backend API. +pub async fn channel_send_message( + config: &Config, + channel: &str, + message: Value, +) -> Result, String> { + let api_url = effective_backend_api_url(&config.api_url); + let jwt = get_session_token(config)? + .ok_or_else(|| "session JWT required; complete login first".to_string())?; + + log::debug!( + "[channels] sending message to channel '{}' via {}", + channel, + api_url + ); + + let client = BackendOAuthClient::new(&api_url).map_err(|e| e.to_string())?; + let result = client + .send_channel_message(channel, &jwt, message) + .await + .map_err(|e| format!("failed to send channel message: {e}"))?; + + log::debug!("[channels] send_message response: {:?}", result); + + Ok(RpcOutcome::new(result, vec![])) +} + +/// Send a reaction to a message in a channel via the backend API. +pub async fn channel_send_reaction( + config: &Config, + channel: &str, + reaction: Value, +) -> Result, String> { + let api_url = effective_backend_api_url(&config.api_url); + let jwt = get_session_token(config)? + .ok_or_else(|| "session JWT required; complete login first".to_string())?; + + log::debug!( + "[channels] sending reaction to channel '{}' via {}", + channel, + api_url + ); + + let client = BackendOAuthClient::new(&api_url).map_err(|e| e.to_string())?; + let result = client + .send_channel_reaction(channel, &jwt, reaction) + .await + .map_err(|e| format!("failed to send channel reaction: {e}"))?; + + log::debug!("[channels] send_reaction response: {:?}", result); + + Ok(RpcOutcome::new(result, vec![])) +} + +/// Create a thread in a channel via the backend API. +pub async fn channel_create_thread( + config: &Config, + channel: &str, + title: &str, +) -> Result, String> { + let api_url = effective_backend_api_url(&config.api_url); + let jwt = get_session_token(config)? + .ok_or_else(|| "session JWT required; complete login first".to_string())?; + + log::debug!( + "[channels] creating thread in channel '{}' title='{}' via {}", + channel, + title, + api_url + ); + + let client = BackendOAuthClient::new(&api_url).map_err(|e| e.to_string())?; + let result = client + .create_channel_thread(channel, &jwt, title) + .await + .map_err(|e| format!("failed to create channel thread: {e}"))?; + + log::debug!("[channels] create_thread response: {:?}", result); + + Ok(RpcOutcome::new(result, vec![])) +} + +/// Close or reopen a thread in a channel via the backend API. +pub async fn channel_update_thread( + config: &Config, + channel: &str, + thread_id: &str, + action: &str, +) -> Result, String> { + let api_url = effective_backend_api_url(&config.api_url); + let jwt = get_session_token(config)? + .ok_or_else(|| "session JWT required; complete login first".to_string())?; + + log::debug!( + "[channels] updating thread '{}' in channel '{}' action='{}' via {}", + thread_id, + channel, + action, + api_url + ); + + let client = BackendOAuthClient::new(&api_url).map_err(|e| e.to_string())?; + let result = client + .update_channel_thread(channel, &jwt, thread_id, action) + .await + .map_err(|e| format!("failed to update channel thread: {e}"))?; + + log::debug!("[channels] update_thread response: {:?}", result); + + Ok(RpcOutcome::new(result, vec![])) +} + +/// List threads in a channel via the backend API. +pub async fn channel_list_threads( + config: &Config, + channel: &str, + active: Option, +) -> Result, String> { + let api_url = effective_backend_api_url(&config.api_url); + let jwt = get_session_token(config)? + .ok_or_else(|| "session JWT required; complete login first".to_string())?; + + log::debug!( + "[channels] listing threads in channel '{}' active={:?} via {}", + channel, + active, + api_url + ); + + let client = BackendOAuthClient::new(&api_url).map_err(|e| e.to_string())?; + let result = client + .list_channel_threads(channel, &jwt, active) + .await + .map_err(|e| format!("failed to list channel threads: {e}"))?; + + log::debug!("[channels] list_threads response: {:?}", result); + + Ok(RpcOutcome::new(result, vec![])) +} diff --git a/src/openhuman/channels/controllers/ops/mod.rs b/src/openhuman/channels/controllers/ops/mod.rs new file mode 100644 index 0000000000..d8f024e18f --- /dev/null +++ b/src/openhuman/channels/controllers/ops/mod.rs @@ -0,0 +1,45 @@ +//! Channel controller business logic. + +mod connect; +mod discord; +mod messaging; +mod telegram; +mod types; +mod yuanbao; + +// Re-export public types. +pub use types::{ + ChannelConnectionResult, ChannelStatusEntry, ChannelTestResult, DiscordLinkCheckResult, + DiscordLinkStartResult, TelegramLoginCheckResult, TelegramLoginStartResult, +}; + +// Re-export types needed by tests. +#[cfg(test)] +pub(crate) use crate::openhuman::channels::controllers::{ChannelAuthMode, ChannelDefinition}; +#[cfg(test)] +pub(crate) use crate::openhuman::config::Config; +#[cfg(test)] +pub(crate) use connect::channel_config_connected; +#[cfg(test)] +pub(crate) use connect::credential_provider; +#[cfg(test)] +pub(crate) use connect::parse_allowed_users; + +// Re-export public ops functions. +pub use connect::{ + channel_status, connect_channel, connected_channel_slugs, describe_channel, disconnect_channel, + list_channels, test_channel, +}; +pub use discord::{ + discord_check_permissions, discord_link_check, discord_link_start, discord_list_channels, + discord_list_guilds, +}; +pub use messaging::{ + channel_create_thread, channel_list_threads, channel_send_message, channel_send_reaction, + channel_update_thread, +}; +pub use telegram::{telegram_login_check, telegram_login_start}; + +#[cfg(test)] +#[path = "../ops_tests.rs"] +mod tests; diff --git a/src/openhuman/channels/controllers/ops/telegram.rs b/src/openhuman/channels/controllers/ops/telegram.rs new file mode 100644 index 0000000000..9cd6550319 --- /dev/null +++ b/src/openhuman/channels/controllers/ops/telegram.rs @@ -0,0 +1,174 @@ +//! Managed Telegram login flow. + +use serde_json::Value; + +use crate::api::config::{app_env_from_env, effective_backend_api_url, is_staging_app_env}; +use crate::api::jwt::get_session_token; +use crate::api::rest::BackendOAuthClient; +use crate::openhuman::config::Config; +use crate::openhuman::credentials; +use crate::rpc::RpcOutcome; + +use super::super::definitions::ChannelAuthMode; +use super::connect::credential_provider; +use super::types::{TelegramLoginCheckResult, TelegramLoginStartResult}; + +/// Default managed Telegram bot when `OPENHUMAN_APP_ENV` is staging and no username override is set. +const DEFAULT_TELEGRAM_BOT_USERNAME_STAGING: &str = "alphahumantest_bot"; +/// Default managed Telegram bot when app env is production (or unset) and no username override is set. +const DEFAULT_TELEGRAM_BOT_USERNAME_PRODUCTION: &str = "openhumanaibot"; + +/// Resolve the managed Telegram bot username from env, or from staging vs production defaults using +/// `OPENHUMAN_APP_ENV` / `VITE_OPENHUMAN_APP_ENV` (via `app_env_from_env`). +fn telegram_bot_username() -> String { + if let Ok(v) = std::env::var("OPENHUMAN_TELEGRAM_BOT_USERNAME") { + return v; + } + if let Ok(v) = std::env::var("VITE_TELEGRAM_BOT_USERNAME") { + return v; + } + if is_staging_app_env(app_env_from_env().as_deref()) { + return DEFAULT_TELEGRAM_BOT_USERNAME_STAGING.to_string(); + } + DEFAULT_TELEGRAM_BOT_USERNAME_PRODUCTION.to_string() +} + +/// Step 1: Create a channel link token for Telegram and return the deep link URL. +/// +/// Requires an active session JWT. +pub async fn telegram_login_start( + config: &Config, +) -> Result, String> { + let api_url = effective_backend_api_url(&config.api_url); + let jwt = get_session_token(config)? + .ok_or_else(|| "session JWT required; complete login first".to_string())?; + + log::debug!( + "[telegram-login] creating channel link token via {}", + api_url + ); + + let client = BackendOAuthClient::new(&api_url).map_err(|e| e.to_string())?; + let payload = client + .create_channel_link_token("telegram", &jwt) + .await + .map_err(|e| format!("failed to create Telegram link token: {e}"))?; + + // Extract the link token from the backend response. + // Expected shape: { "linkToken": "..." } or { "token": "..." } + let link_token = payload + .get("linkToken") + .or_else(|| payload.get("token")) + .and_then(|v| v.as_str()) + .ok_or_else(|| { + format!( + "backend response missing linkToken field: {}", + serde_json::to_string(&payload).unwrap_or_default() + ) + })? + .trim() + .to_string(); + + if link_token.is_empty() { + return Err("backend returned empty link token".to_string()); + } + + let bot_username = telegram_bot_username(); + let telegram_url = format!("https://t.me/{}?start={}", bot_username, link_token); + + log::debug!( + "[telegram-login] link token created, deep link: {}", + telegram_url + ); + + Ok(RpcOutcome::new( + TelegramLoginStartResult { + link_token, + telegram_url, + bot_username, + }, + vec![], + )) +} + +/// Step 2: Check whether the user has completed the Telegram link (clicked /start). +/// +/// Polls `GET /auth/me` and checks whether the user profile now has a `telegramId`. +/// The frontend should poll this until `linked` becomes `true`. +/// On success, stores a `channel:telegram:managed_dm` credential marker locally. +pub async fn telegram_login_check( + config: &Config, + _link_token: &str, +) -> Result, String> { + let api_url = effective_backend_api_url(&config.api_url); + let jwt = get_session_token(config)?.ok_or_else(|| "session JWT required".to_string())?; + + log::debug!("[telegram-login] checking if user profile has telegramId via GET /auth/me"); + + let client = BackendOAuthClient::new(&api_url).map_err(|e| e.to_string())?; + let user_payload = client + .fetch_current_user(&jwt) + .await + .map_err(|e| format!("failed to fetch user profile: {e}"))?; + + // Check if the user now has a telegramId set. + let telegram_id = user_payload + .get("telegramId") + .and_then(|v| v.as_str()) + .filter(|s| !s.is_empty()) + .or_else(|| { + user_payload + .get("telegram_id") + .and_then(|v| v.as_str()) + .filter(|s| !s.is_empty()) + }); + + let linked = telegram_id.is_some(); + + log::debug!( + "[telegram-login] user profile has_telegram_id={}, linked={}", + telegram_id.is_some(), + linked + ); + + if linked { + // Store a credential marker so `channel_status` reports connected. + let provider_key = credential_provider("telegram", ChannelAuthMode::ManagedDm); + + let telegram_user_id = telegram_id.unwrap_or("").to_string(); + + let mut fields_map = serde_json::Map::new(); + fields_map.insert("linked".to_string(), Value::Bool(true)); + if !telegram_user_id.is_empty() { + fields_map.insert( + "telegram_user_id".to_string(), + Value::String(telegram_user_id), + ); + } + + // Store using a placeholder token (managed mode has no user-visible token). + credentials::ops::store_provider_credentials( + config, + &provider_key, + None, + Some("managed".to_string()), + Some(Value::Object(fields_map)), + Some(true), + ) + .await + .map_err(|e| format!("failed to store managed channel credentials: {e}"))?; + + log::info!( + "[telegram-login] Telegram managed DM linked; credentials stored as {}", + provider_key + ); + } + + Ok(RpcOutcome::new( + TelegramLoginCheckResult { + linked, + details: if linked { Some(user_payload) } else { None }, + }, + vec![], + )) +} diff --git a/src/openhuman/channels/controllers/ops/types.rs b/src/openhuman/channels/controllers/ops/types.rs new file mode 100644 index 0000000000..ce4a65e989 --- /dev/null +++ b/src/openhuman/channels/controllers/ops/types.rs @@ -0,0 +1,79 @@ +//! Shared response types for channel controller operations. + +use serde::{Deserialize, Serialize}; +use serde_json::Value; + +/// Result returned by `connect_channel`. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ChannelConnectionResult { + /// `"connected"` for credential-based modes, `"pending_auth"` for OAuth/managed. + pub status: String, + /// Whether the service must be restarted for the channel to become active. + pub restart_required: bool, + /// For OAuth/managed modes: the action ID the frontend should handle. + #[serde(skip_serializing_if = "Option::is_none")] + pub auth_action: Option, + /// Human-readable status message. + #[serde(skip_serializing_if = "Option::is_none")] + pub message: Option, +} + +/// Single entry returned by `channel_status`. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ChannelStatusEntry { + pub channel_id: String, + pub auth_mode: super::super::definitions::ChannelAuthMode, + pub connected: bool, + pub has_credentials: bool, +} + +/// Result returned by `test_channel`. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ChannelTestResult { + pub success: bool, + pub message: String, +} + +/// Result from `telegram_login_start`. +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct TelegramLoginStartResult { + /// The short-lived link token created by the backend. + pub link_token: String, + /// Full Telegram deep link URL the user should open. + pub telegram_url: String, + /// Bot username used. + pub bot_username: String, +} + +/// Result from `telegram_login_check`. +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct TelegramLoginCheckResult { + /// Whether the Telegram user has been linked to the app user. + pub linked: bool, + /// Backend-provided status payload (may include telegramUserId, etc.). + #[serde(skip_serializing_if = "Option::is_none")] + pub details: Option, +} + +/// Result from `discord_link_start`. +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct DiscordLinkStartResult { + /// The short-lived link token to paste into Discord. + pub link_token: String, + /// Human-readable instruction shown to the user. + pub instructions: String, +} + +/// Result from `discord_link_check`. +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct DiscordLinkCheckResult { + /// Whether the Discord account has been linked to the app user. + pub linked: bool, + /// Backend-provided status payload (may include discordId, etc.). + #[serde(skip_serializing_if = "Option::is_none")] + pub details: Option, +} diff --git a/src/openhuman/channels/controllers/ops/yuanbao.rs b/src/openhuman/channels/controllers/ops/yuanbao.rs new file mode 100644 index 0000000000..1e3dba8d4b --- /dev/null +++ b/src/openhuman/channels/controllers/ops/yuanbao.rs @@ -0,0 +1,89 @@ +//! Yuanbao-specific credential helpers. + +use serde_json::Value; + +use crate::openhuman::channels::providers::yuanbao::sign::SignManager; +use crate::openhuman::channels::providers::yuanbao::YuanbaoConfig; + +/// Read a required non-empty Yuanbao credential field from the connect-channel +/// payload. Returns the trimmed value or an error naming the missing field. +pub(super) fn require_yuanbao_field( + creds_map: &serde_json::Map, + key: &str, +) -> Result { + creds_map + .get(key) + .and_then(|v| v.as_str()) + .map(str::trim) + .filter(|s| !s.is_empty()) + .map(|s| s.to_string()) + .ok_or_else(|| format!("missing required {key}")) +} + +/// Build the **effective** Yuanbao config that will be used for both +/// preflight verification and persistence. +/// +/// Starts from the existing TOML (so manually-installed deployments keep +/// any custom routes), overlays the client-supplied endpoint overrides +/// (`env` / `api_domain` / `ws_domain` / `route_env`), then calls +/// `apply_env_defaults` so the verifier hits the correct cluster — e.g. a +/// user submitting `env = "pre"` is verified against the pre-release +/// sign-token endpoint instead of the default prod one. +/// +/// `app_secret` is intentionally left empty: the runtime loads it from +/// the encrypted credentials store at startup, never from `config.toml`. +pub(super) fn build_effective_yuanbao_config( + base: YuanbaoConfig, + creds_map: &serde_json::Map, + app_key: String, +) -> YuanbaoConfig { + let opt_string = |key: &str| -> Option { + creds_map + .get(key) + .and_then(|v| v.as_str()) + .map(str::trim) + .filter(|s| !s.is_empty()) + .map(|s| s.to_string()) + }; + + let mut cfg = base; + cfg.app_key = app_key; + cfg.app_secret = String::new(); + if let Some(env) = opt_string("env") { + cfg.env = env; + } + if let Some(api_domain) = opt_string("api_domain") { + cfg.api_domain = api_domain; + } + if let Some(ws_domain) = opt_string("ws_domain") { + cfg.ws_domain = ws_domain; + } + if let Some(route_env) = opt_string("route_env") { + cfg.route_env = route_env; + } + cfg.apply_env_defaults(); + cfg +} + +/// Verify Yuanbao credentials against the `sign-token` endpoint before any +/// persistence so invalid `app_key` / `app_secret` surface the upstream API +/// error to the user instead of silently succeeding. +/// +/// Takes the **effective** `YuanbaoConfig` already built from the client's +/// overrides + TOML defaults, so the verifier targets whatever cluster the +/// runtime will use after restart. +pub(super) async fn verify_yuanbao_credentials( + yb_cfg: &YuanbaoConfig, + app_secret: &str, +) -> Result<(), String> { + SignManager::new(reqwest::Client::new()) + .get_token( + &yb_cfg.app_key, + app_secret, + &yb_cfg.api_domain, + &yb_cfg.route_env, + ) + .await + .map_err(|e| format!("yuanbao credential verification failed: {e}"))?; + Ok(()) +} diff --git a/src/openhuman/channels/providers/web.rs b/src/openhuman/channels/providers/web.rs deleted file mode 100644 index d2563dd7d2..0000000000 --- a/src/openhuman/channels/providers/web.rs +++ /dev/null @@ -1,2894 +0,0 @@ -use async_trait::async_trait; -use once_cell::sync::Lazy; -use serde::Deserialize; -use serde_json::{json, Map, Value}; -use std::collections::{HashMap, HashSet}; -use std::sync::{Arc, OnceLock}; -use tokio::sync::{broadcast, Mutex}; -use uuid::Uuid; - -use crate::core::all::{ControllerFuture, RegisteredController}; -use crate::core::event_bus::{DomainEvent, EventHandler, SubscriptionHandle}; -use crate::core::socketio::{SubagentProgressDetail, WebChannelEvent}; -use crate::core::{ControllerSchema, FieldSchema, TypeSchema}; -use crate::openhuman::agent::profiles::{AgentProfile, AgentProfileStore, DEFAULT_PROFILE_ID}; -use crate::openhuman::agent::Agent; -use crate::openhuman::config::rpc as config_rpc; -use crate::openhuman::config::Config; -use crate::openhuman::prompt_injection::{ - enforce_prompt_input, PromptEnforcementAction, PromptEnforcementContext, -}; -use crate::openhuman::threads::turn_state::{TurnStateMirror, TurnStateStore}; -use crate::rpc::RpcOutcome; - -use super::presentation; - -static EVENT_BUS: Lazy> = Lazy::new(|| { - let (tx, _rx) = broadcast::channel(512); - tx -}); - -pub fn subscribe_web_channel_events() -> broadcast::Receiver { - EVENT_BUS.subscribe() -} - -pub fn publish_web_channel_event(event: WebChannelEvent) { - let _ = EVENT_BUS.send(event); -} - -static APPROVAL_SURFACE_HANDLE: OnceLock = OnceLock::new(); - -/// Bridge a parked `ApprovalGate` request onto the web channel. When the gate -/// publishes `ApprovalRequested` carrying a chat thread/client (set via the -/// per-turn `ApprovalChatContext`), surface the "run X? (yes/no)" question as an -/// `approval_request` event on that thread so the user can answer in chat. -/// Idempotent. No-op for non-chat approvals (thread/client id absent). -pub fn register_approval_surface_subscriber() { - if APPROVAL_SURFACE_HANDLE.get().is_some() { - return; - } - match crate::core::event_bus::subscribe_global(Arc::new(ApprovalSurfaceSubscriber)) { - Some(handle) => { - let _ = APPROVAL_SURFACE_HANDLE.set(handle); - log::info!( - "[web-channel] approval-surface subscriber registered (domain=approval) — will bridge ApprovalRequested → approval_request socket event" - ); - } - None => { - log::warn!( - "[web-channel] failed to register approval-surface subscriber — bus not initialized" - ); - } - } -} - -/// Handle for the artifact-surface subscriber. Set once on -/// [`register_artifact_surface_subscriber`]; subsequent calls no-op. -static ARTIFACT_SURFACE_HANDLE: OnceLock = OnceLock::new(); - -/// Bridge artifact lifecycle events onto the web channel. -/// `DomainEvent::ArtifactPending` / `ArtifactReady` / `ArtifactFailed` -/// (published by `artifacts::store::{create,finalize,fail}_artifact`) -/// carry the thread_id + client_id when the producing turn ran under an -/// `APPROVAL_CHAT_CONTEXT`. When present, fan out as an -/// `artifact_pending` / `artifact_ready` / `artifact_failed` socket -/// event so the frontend `chatRuntimeSlice` can upsert the snapshot and -/// the `ArtifactCard` can render in the message timeline: -/// -/// - `artifact_pending` → render an in-progress "Generating…" card the -/// moment the producing tool dispatches (#3162). -/// - `artifact_ready` → swap the same card to a download surface when -/// the file lands (#2779). -/// - `artifact_failed` → swap to a retry-hint card on producer error. -/// -/// The card is keyed on `artifact_id`, so the Pending → Ready/Failed -/// transition reuses the same surface instead of flickering a new one. -/// Idempotent. No-op for non-chat events (thread/client id absent). -pub fn register_artifact_surface_subscriber() { - if ARTIFACT_SURFACE_HANDLE.get().is_some() { - return; - } - match crate::core::event_bus::subscribe_global(Arc::new(ArtifactSurfaceSubscriber)) { - Some(handle) => { - let _ = ARTIFACT_SURFACE_HANDLE.set(handle); - log::info!( - "[web-channel] artifact-surface subscriber registered (domain=artifact) — will bridge ArtifactPending/Ready/Failed → artifact_pending/artifact_ready/artifact_failed socket events" - ); - } - None => { - log::warn!( - "[web-channel] failed to register artifact-surface subscriber — bus not initialized" - ); - } - } -} - -struct ArtifactSurfaceSubscriber; - -#[async_trait] -impl EventHandler for ArtifactSurfaceSubscriber { - fn name(&self) -> &str { - "channels::web::artifact_surface" - } - - fn domains(&self) -> Option<&[&str]> { - Some(&["artifact"]) - } - - async fn handle(&self, event: &DomainEvent) { - match event { - DomainEvent::ArtifactReady { - artifact_id, - kind, - title, - workspace_dir, - path, - size_bytes, - thread_id, - client_id, - } => { - let (Some(thread_id), Some(client_id)) = (thread_id, client_id) else { - log::debug!( - "[web-channel] artifact-surface skip ArtifactReady id={artifact_id}: no chat context" - ); - return; - }; - log::info!( - "[web-channel] artifact-surface emitting artifact_ready id={artifact_id} kind={kind} thread_id={thread_id} client_id={client_id}" - ); - publish_web_channel_event(WebChannelEvent { - event: "artifact_ready".to_string(), - client_id: client_id.clone(), - thread_id: thread_id.clone(), - args: Some(serde_json::json!({ - "artifact_id": artifact_id, - "kind": kind, - "title": title, - "workspace_dir": workspace_dir, - "path": path, - "size_bytes": size_bytes, - })), - ..Default::default() - }); - } - DomainEvent::ArtifactFailed { - artifact_id, - kind, - title, - workspace_dir, - error, - thread_id, - client_id, - } => { - let (Some(thread_id), Some(client_id)) = (thread_id, client_id) else { - log::debug!( - "[web-channel] artifact-surface skip ArtifactFailed id={artifact_id}: no chat context" - ); - return; - }; - log::warn!( - "[web-channel] artifact-surface emitting artifact_failed id={artifact_id} kind={kind} thread_id={thread_id} client_id={client_id} error_len={}", - error.len() - ); - publish_web_channel_event(WebChannelEvent { - event: "artifact_failed".to_string(), - client_id: client_id.clone(), - thread_id: thread_id.clone(), - args: Some(serde_json::json!({ - "artifact_id": artifact_id, - "kind": kind, - "title": title, - "workspace_dir": workspace_dir, - "error": error, - })), - ..Default::default() - }); - } - DomainEvent::ArtifactPending { - artifact_id, - kind, - title, - workspace_dir, - path, - thread_id, - client_id, - } => { - let (Some(thread_id), Some(client_id)) = (thread_id, client_id) else { - log::debug!( - "[web-channel] artifact-surface skip ArtifactPending id={artifact_id}: no chat context" - ); - return; - }; - log::info!( - "[web-channel] artifact-surface emitting artifact_pending id={artifact_id} kind={kind} thread_id={thread_id} client_id={client_id}" - ); - publish_web_channel_event(WebChannelEvent { - event: "artifact_pending".to_string(), - client_id: client_id.clone(), - thread_id: thread_id.clone(), - args: Some(serde_json::json!({ - "artifact_id": artifact_id, - "kind": kind, - "title": title, - "workspace_dir": workspace_dir, - "path": path, - })), - ..Default::default() - }); - } - _ => {} - } - } -} - -struct ApprovalSurfaceSubscriber; - -#[async_trait] -impl EventHandler for ApprovalSurfaceSubscriber { - fn name(&self) -> &str { - "channels::web::approval_surface" - } - - fn domains(&self) -> Option<&[&str]> { - Some(&["approval"]) - } - - async fn handle(&self, event: &DomainEvent) { - if let DomainEvent::ApprovalRequested { - request_id, - tool_name, - action_summary, - args_redacted, - thread_id, - client_id, - .. - } = event - { - match (thread_id, client_id) { - (Some(thread_id), Some(client_id)) => { - // Short, neutral description — the card renders the exact - // command/args (from `args` below) and has Approve/Deny - // buttons, so no "reply yes/no" instruction here. - let question = format!("Run `{tool_name}` — {action_summary}"); - log::info!( - "[web-channel] approval-surface emitting approval_request request_id={request_id} thread_id={thread_id} client_id={client_id} tool={tool_name}" - ); - publish_web_channel_event(WebChannelEvent { - event: "approval_request".to_string(), - client_id: client_id.clone(), - thread_id: thread_id.clone(), - request_id: request_id.clone(), - tool_name: Some(tool_name.clone()), - message: Some(question), - // The exact (redacted) command/args being requested, so - // the card can show precisely what will run. - args: Some(args_redacted.clone()), - ..Default::default() - }); - } - _ => { - log::warn!( - "[web-channel] approval-surface received ApprovalRequested request_id={request_id} tool={tool_name} but thread_id/client_id absent (thread={}, client={}) — NOT surfacing", - thread_id.is_some(), - client_id.is_some() - ); - } - } - } - } -} - -/// All inputs that the cached `SessionEntry`'s `Agent` was built from, -/// captured at build time. The cache-hit predicate is a single -/// `entry.fingerprint == current_fingerprint` comparison — pulling the -/// fields into a named struct (instead of inlining four `&&`s) makes -/// the predicate testable in isolation and makes "what invalidates the -/// cache?" answerable in one place. -/// -/// Adding a new dimension that should force a rebuild = add a field -/// here and populate it both at insert time and at the call-site -/// fingerprint construction. -#[derive(PartialEq, Debug, Clone)] -struct SessionCacheFingerprint { - /// Per-message `model_override` (clients can override the model - /// for an individual chat call). - model_override: Option, - /// Per-message `temperature` override (same channel as - /// `model_override`). - temperature: Option, - /// Which agent definition was used to build `agent`. Tracked so cache - /// invalidation can detect when the target changes between turns. - target_agent_id: String, - /// Bound provider string at build time for the selected workload - /// role (`chat`, `reasoning`, `agentic`, `coding`, `summarization`). - /// - /// Web-chat sessions cache a fully constructed `Agent`, which in - /// turn holds a concrete provider instance chosen up front by the - /// session builder. If the bound provider string changes in - /// Settings, the cache must invalidate so the next turn rebuilds - /// against the updated provider rather than silently reusing the - /// stale instance. - provider_binding: String, - /// Signature of the autonomy/access config (`[autonomy]`) at build time. - /// The cached `Agent` holds tools that each captured a `SecurityPolicy` - /// snapshot at construction, so a change to the agent-access tier - /// (`config.update_autonomy_settings` → Settings → Agent access) must - /// invalidate the cache — otherwise the next turn silently reuses tools - /// gated by the OLD policy and the setting appears to do nothing. Derived - /// from the on-disk autonomy block (read fresh each turn), so it flips the - /// moment a new tier is saved. - autonomy_signature: String, -} - -struct SessionEntry { - agent: Agent, - fingerprint: SessionCacheFingerprint, -} - -/// Deterministic signature of the autonomy/access config for the session cache -/// fingerprint. Serializing the whole `[autonomy]` block (serde emits fields in -/// stable declaration order) captures every knob that feeds `SecurityPolicy` — -/// `level`, `workspace_only`, `trusted_roots`, `allow_tool_install`, -/// `allowed_commands`, … — so saving any agent-access change flips the -/// signature and forces a rebuild. On the practically-impossible serialize -/// error we return an empty string, which just means "treat as changed". -fn autonomy_signature(config: &Config) -> String { - serde_json::to_string(&config.autonomy).unwrap_or_default() -} - -/// Decide which agent definition this turn should run with. -/// -/// All new chat turns route to the `orchestrator` agent directly. -/// The welcome agent has been removed; the Joyride walkthrough in the -/// frontend handles onboarding UI instead. -fn pick_target_agent_id(_config: &Config, profile: &AgentProfile) -> String { - if profile.id == DEFAULT_PROFILE_ID { - "orchestrator".to_string() - } else { - profile.agent_id.clone() - } -} - -#[derive(Debug)] -struct InFlightEntry { - request_id: String, - handle: tokio::task::JoinHandle<()>, - run_queue: Arc, -} - -#[derive(Debug, Clone)] -struct WebChatTaskResult { - full_response: String, - citations: Vec, -} - -static THREAD_SESSIONS: Lazy>> = - Lazy::new(|| Mutex::new(HashMap::new())); - -static IN_FLIGHT: Lazy>> = - Lazy::new(|| Mutex::new(HashMap::new())); -#[cfg(any(test, debug_assertions))] -static TEST_FORCED_RUN_CHAT_TASK_ERROR: Lazy>> = - Lazy::new(|| Mutex::new(None)); -/// Key for the per-thread runtime maps (`THREAD_SESSIONS`, `IN_FLIGHT`). -/// -/// Keyed by `thread_id` ALONE — the stable, persistent identity of a -/// conversation — NOT by the Socket.IO `client_id`, which is regenerated on -/// every reconnect. Keying these maps by `client_id` previously orphaned a -/// thread's cached session (conversation amnesia) and its in-flight task handle -/// (Cancel became a no-op) whenever the socket reconnected with a new id. Event -/// delivery still routes by `client_id` (the live socket); only the -/// thread-owned runtime state keys off `thread_id`. -fn key_for(thread_id: &str) -> String { - thread_id.to_string() -} - -fn event_session_id_for(client_id: &str, thread_id: &str) -> String { - json!({ - "client_id": client_id, - "thread_id": thread_id, - }) - .to_string() -} - -#[path = "web_errors.rs"] -mod web_errors; -pub(crate) use web_errors::{ - classify_inference_error, inference_budget_exceeded_user_message, - is_inference_budget_exceeded_error, -}; -#[cfg(any(test, debug_assertions))] -#[allow(unused_imports)] -pub(crate) use web_errors::{ - extract_provider_error_detail, extract_provider_name, generic_inference_error_user_message, - is_action_budget_exhausted, is_fallback_chain_exhausted, is_non_retryable_rate_limit_text, - parse_retry_after_secs_from_str, retry_after_hint, with_provider_detail, ClassifiedError, -}; - -#[cfg(any(test, debug_assertions))] -pub mod test_support { - #[derive(Debug, Clone, PartialEq, Eq)] - pub struct ClassifiedErrorSnapshot { - pub error_type: &'static str, - pub message: String, - pub source: &'static str, - pub retryable: bool, - pub retry_after_ms: Option, - pub provider: Option, - pub fallback_available: Option, - } - - pub fn classify_error_for_test(err: &str) -> ClassifiedErrorSnapshot { - let classified = super::classify_inference_error(err); - ClassifiedErrorSnapshot { - error_type: classified.error_type, - message: classified.message, - source: classified.source, - retryable: classified.retryable, - retry_after_ms: classified.retry_after_ms, - provider: classified.provider, - fallback_available: classified.fallback_available, - } - } - - pub fn extracted_provider_detail_for_test(err: &str) -> Option { - super::extract_provider_error_detail(err) - } - - pub fn retry_after_secs_for_test(err: &str) -> Option { - super::parse_retry_after_secs_from_str(err) - } - - pub fn is_non_retryable_rate_limit_for_test(lower: &str) -> bool { - super::is_non_retryable_rate_limit_text(lower) - } - - pub fn key_for_test(thread_id: &str) -> String { - super::key_for(thread_id) - } - - pub fn event_session_id_for_test(client_id: &str, thread_id: &str) -> String { - super::event_session_id_for(client_id, thread_id) - } - - pub async fn set_forced_run_chat_task_error_for_test(message: Option<&str>) { - super::set_test_forced_run_chat_task_error(message).await; - } -} - -fn prompt_guard_user_message(action: PromptEnforcementAction) -> &'static str { - match action { - PromptEnforcementAction::Allow => "Message accepted.", - PromptEnforcementAction::Blocked => { - "Your message was blocked by a security policy. Please rephrase and remove instruction-override or secret-exfiltration requests." - } - PromptEnforcementAction::ReviewBlocked => { - "Your message was flagged for security review and was not processed. Please rephrase the request in a direct, task-focused way." - } - } -} - -#[cfg(any(test, debug_assertions))] -pub(super) async fn set_test_forced_run_chat_task_error(message: Option<&str>) { - let mut slot = TEST_FORCED_RUN_CHAT_TASK_ERROR.lock().await; - *slot = message.map(str::to_string); -} - -pub async fn start_chat( - client_id: &str, - thread_id: &str, - message: &str, - model_override: Option, - temperature: Option, - profile_id: Option, - locale: Option, - queue_mode: Option, - metadata: ChatRequestMetadata, -) -> Result { - let client_id = client_id.trim().to_string(); - let thread_id = thread_id.trim().to_string(); - let message = message.trim().to_string(); - - if client_id.is_empty() { - return Err("client_id is required".to_string()); - } - if thread_id.is_empty() { - return Err("thread_id is required".to_string()); - } - if message.is_empty() { - return Err("message is required".to_string()); - } - - let request_id = Uuid::new_v4().to_string(); - let prompt_decision = enforce_prompt_input( - &message, - PromptEnforcementContext { - source: "channels.providers.web.start_chat", - request_id: Some(&request_id), - user_id: Some(&client_id), - session_id: Some(&thread_id), - }, - ); - if !matches!(prompt_decision.action, PromptEnforcementAction::Allow) { - log::warn!( - "[web-channel] prompt rejected client_id={} thread_id={} request_id={} action={} score={:.2} reasons={} hash={} chars={}", - client_id, - thread_id, - request_id, - match prompt_decision.action { - PromptEnforcementAction::Allow => "allow", - PromptEnforcementAction::Blocked => "block", - PromptEnforcementAction::ReviewBlocked => "review_blocked", - }, - prompt_decision.score, - prompt_decision - .reasons - .iter() - .map(|r| r.code.as_str()) - .collect::>() - .join(","), - prompt_decision.prompt_hash, - prompt_decision.prompt_chars, - ); - return Err(prompt_guard_user_message(prompt_decision.action).to_string()); - } - - // Chat-native approval: if this thread has a parked approval and the message - // is a yes/no reply, route it to the gate (resuming the parked turn) rather - // than starting a new turn — which would cancel the parked approval. Any - // other text falls through to the normal path below, which cancels the - // in-flight turn and dispatches the message fresh (the intended "redirect"). - if let Some(gate) = crate::openhuman::approval::ApprovalGate::try_global() { - if let Some(request_id) = gate.pending_for_thread(&thread_id) { - if let Some(decision) = crate::openhuman::approval::parse_approval_reply(&message) { - match gate.decide(&request_id, decision) { - Ok(Some(_)) => { - log::info!( - "[web-channel] routed chat reply to approval gate thread_id={} request_id={} decision={}", - thread_id, - request_id, - decision.as_str() - ); - return Ok(request_id); - } - Ok(None) => { - // `decide` returns `Ok(None)` when the request is already - // gone / already decided — the parked turn was NOT resumed - // by this call. Don't ACK it as applied; fall through so the - // reply is dispatched as a fresh turn. - log::warn!( - "[web-channel] approval reply targeted a non-pending/already-decided request thread_id={} request_id={} decision={} — dispatching as fresh turn", - thread_id, - request_id, - decision.as_str() - ); - } - Err(err) => { - // Don't claim success: the parked turn is still waiting on - // its oneshot. Log and fall through so the reply is - // dispatched as a fresh turn rather than silently dropped - // (the stale parked request will TTL out). - log::warn!( - "[web-channel] failed to route chat reply to approval gate thread_id={} request_id={} decision={} err={}", - thread_id, - request_id, - decision.as_str(), - err - ); - } - } - } - } - } - - let map_key = key_for(&thread_id); - - let parsed_mode = match queue_mode.as_deref() { - Some("steer") => crate::openhuman::agent::harness::run_queue::QueueMode::Steer, - Some("followup") => crate::openhuman::agent::harness::run_queue::QueueMode::Followup, - Some("collect") => crate::openhuman::agent::harness::run_queue::QueueMode::Collect, - _ => crate::openhuman::agent::harness::run_queue::QueueMode::Interrupt, - }; - - // Non-interrupt modes: push into the running turn's queue and return. - if !matches!( - parsed_mode, - crate::openhuman::agent::harness::run_queue::QueueMode::Interrupt - ) { - let in_flight = IN_FLIGHT.lock().await; - if let Some(existing) = in_flight.get(&map_key) { - let queued_msg = crate::openhuman::agent::harness::run_queue::QueuedMessage { - text: message.clone(), - mode: parsed_mode, - client_id: client_id.clone(), - thread_id: thread_id.clone(), - queued_at_ms: std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .unwrap_or_default() - .as_millis() as u64, - model_override: model_override.clone(), - temperature, - profile_id: profile_id.clone(), - locale: locale.clone(), - }; - existing.run_queue.push(queued_msg).await; - let status = existing.run_queue.status().await; - log::info!( - "[web-channel] queued {} message thread_id={} request_id={} queue_depth={}", - parsed_mode, - thread_id, - request_id, - status.total - ); - crate::core::event_bus::publish_global(DomainEvent::RunQueueMessageQueued { - thread_id: thread_id.clone(), - mode: parsed_mode.to_string(), - queue_depth: status.total, - }); - return Ok(json!({ - "queued": true, - "queue_mode": parsed_mode.to_string(), - "client_id": client_id, - "thread_id": thread_id, - "request_id": request_id, - "queue_depth": status.total, - }) - .to_string()); - } - // No in-flight turn — fall through to start a fresh turn. - log::info!( - "[web-channel] no in-flight turn for {} mode thread_id={} — starting fresh", - parsed_mode, - thread_id - ); - } - - { - let mut in_flight = IN_FLIGHT.lock().await; - - // Interrupt path: abort any in-flight turn (existing behavior). - if let Some(existing) = in_flight.remove(&map_key) { - let cancelled_id = existing.request_id.clone(); - existing.handle.abort(); - log::info!( - "[web-channel] interrupted in-flight turn thread_id={} cancelled_request_id={}", - thread_id, - existing.request_id - ); - crate::core::event_bus::publish_global(DomainEvent::RunQueueInterrupted { - thread_id: thread_id.clone(), - cancelled_request_id: existing.request_id.clone(), - }); - publish_web_channel_event(WebChannelEvent { - event: "chat_error".to_string(), - client_id: client_id.clone(), - thread_id: thread_id.clone(), - request_id: cancelled_id, - full_response: None, - message: Some("Cancelled by newer request".to_string()), - error_type: Some("cancelled".to_string()), - error_source: None, - error_retryable: None, - error_retry_after_ms: None, - error_provider: None, - error_fallback_available: None, - tool_name: None, - skill_id: None, - args: None, - output: None, - success: None, - round: None, - reaction_emoji: None, - segment_index: None, - segment_total: None, - delta: None, - delta_kind: None, - tool_call_id: None, - citations: None, - subagent: None, - task_board: None, - }); - } - } - - let turn_run_queue = crate::openhuman::agent::harness::run_queue::RunQueue::new(); - let turn_run_queue_task = turn_run_queue.clone(); - - let client_id_task = client_id.clone(); - let thread_id_task = thread_id.clone(); - let request_id_task = request_id.clone(); - let map_key_task = map_key.clone(); - - let user_message = message.clone(); - let handle = tokio::spawn(async move { - // Scope the per-turn approval chat context so a parked `ApprovalGate` - // request (raised deep in the tool loop, which runs inline in this same - // task) carries the thread/client id — letting a yes/no chat reply be - // routed back to `approval_decide`. No sub-task is spawned between here - // and `intercept`, so the task-local propagates. - let approval_ctx = crate::openhuman::approval::ApprovalChatContext { - thread_id: thread_id_task.clone(), - client_id: client_id_task.clone(), - }; - // Scope the matching `AgentTurnOrigin::WebChat` alongside the chat - // context so the approval gate's origin-aware decision tree sees a - // web-routable turn. Both task-locals must wrap the same future — - // tokio task-locals do not cross `tokio::spawn`, and `intercept` - // runs inline within this task. - let origin = crate::openhuman::agent::turn_origin::AgentTurnOrigin::WebChat { - thread_id: thread_id_task.clone(), - client_id: client_id_task.clone(), - }; - let result = crate::openhuman::agent::turn_origin::with_origin( - origin, - crate::openhuman::approval::APPROVAL_CHAT_CONTEXT.scope( - approval_ctx, - run_chat_task( - &client_id_task, - &thread_id_task, - &request_id_task, - &user_message, - model_override, - temperature, - profile_id, - locale, - turn_run_queue_task, - metadata, - ), - ), - ) - .await; - - match result { - Ok(chat_result) => { - // ── Presentation layer (local model, fire-and-forget) ───── - // Segment the response into human-readable bubbles and - // decide whether to react — both run via local Ollama if - // available, zero cloud cost. - presentation::deliver_response( - &client_id_task, - &thread_id_task, - &request_id_task, - &chat_result.full_response, - &user_message, - &chat_result.citations, - ) - .await; - } - Err(err) => { - log::warn!( - "[web-channel] run_chat_task failed client_id={} thread_id={} request_id={} error={}", - client_id_task, - thread_id_task, - request_id_task, - err - ); - let detailed = format!( - "run_chat_task failed client_id={} thread_id={} request_id={} error={}", - client_id_task, thread_id_task, request_id_task, err - ); - let classified = classify_inference_error(&err); - let classified_type = classified.error_type; - let classified_type_string = classified_type.to_string(); - // Max-tool-iterations cap is a deterministic agent-state - // outcome surfaced to the user via the existing - // `WebChannelEvent::chat_error` event below. Skip the - // Sentry funnel entirely for that variant - // (OPENHUMAN-TAURI-98). Substring match is required here - // because the typed `AgentError` was flattened to a - // `String` at the native-bus boundary. - // - // Other errors flow through `report_error_or_expected` - // so transport-level transient failures (DNS/TCP/TLS - // handshake, ISP blocks — OPENHUMAN-TAURI-32 for the RU - // user who couldn't reach api.tinyhumans.ai at all) get - // logged as warn-level breadcrumbs instead of error - // events. Sentry has no signal to act on those — no - // status, no trace, no payload — and every retry - // exhaustion produces another noisy event. - if crate::openhuman::agent::error::is_max_iterations_error(&detailed) { - log::info!( - target: "web_channel", - "[web_channel.run_chat_task] suppressed Sentry emission for max-iteration \ - cap client_id={} thread_id={} request_id={} error_type={} message={}", - client_id_task, - thread_id_task, - request_id_task, - classified_type, - detailed - ); - } else { - crate::core::observability::report_error_or_expected( - detailed.as_str(), - "web_channel", - "run_chat_task", - &[ - ("channel", "web"), - ("error_type", classified_type), - ("thread_id", thread_id_task.as_str()), - ("request_id", request_id_task.as_str()), - ], - ); - } - publish_web_channel_event(WebChannelEvent { - event: "chat_error".to_string(), - client_id: client_id_task.clone(), - thread_id: thread_id_task.clone(), - request_id: request_id_task.clone(), - full_response: None, - message: Some(classified.message), - error_type: Some(classified_type_string), - error_source: Some(classified.source.to_string()), - error_retryable: Some(classified.retryable), - error_retry_after_ms: classified.retry_after_ms, - error_provider: classified.provider, - error_fallback_available: classified.fallback_available, - tool_name: None, - skill_id: None, - args: None, - output: None, - success: None, - round: None, - reaction_emoji: None, - segment_index: None, - segment_total: None, - delta: None, - delta_kind: None, - tool_call_id: None, - citations: None, - subagent: None, - task_board: None, - }); - } - } - - // Drain followup messages queued during this turn. - let followups = { - let mut in_flight = IN_FLIGHT.lock().await; - let followups = if let Some(current) = in_flight.get(&map_key_task) { - if current.request_id == request_id_task { - let fups = current.run_queue.drain_followups().await; - in_flight.remove(&map_key_task); - fups - } else { - Vec::new() - } - } else { - Vec::new() - }; - followups - }; - if !followups.is_empty() { - log::info!( - "[web-channel] dispatching {} followup(s) thread_id={}", - followups.len(), - thread_id_task - ); - crate::core::event_bus::publish_global( - crate::core::event_bus::DomainEvent::RunQueueFollowupDispatched { - thread_id: thread_id_task.clone(), - followup_count: followups.len(), - }, - ); - // Dispatch each followup as a fresh turn on a new task to avoid - // Send issues with the nested async closure. - dispatch_followups(followups); - } - }); - - { - let mut in_flight = IN_FLIGHT.lock().await; - in_flight.insert( - map_key, - InFlightEntry { - request_id: request_id.clone(), - handle, - run_queue: turn_run_queue, - }, - ); - } - - Ok(request_id) -} - -fn dispatch_followups(followups: Vec) { - for fup in followups { - tokio::spawn(async move { - if let Err(err) = start_chat( - &fup.client_id, - &fup.thread_id, - &fup.text, - fup.model_override, - fup.temperature, - fup.profile_id, - fup.locale, - Some("followup".to_string()), - ChatRequestMetadata::default(), - ) - .await - { - log::warn!( - "[web-channel] failed to dispatch followup thread_id={} err={}", - fup.thread_id, - err - ); - } - }); - } -} - -/// Invalidate all cached agent sessions for the given thread ID. -/// Called when a thread is deleted so stale sessions don't leak -/// into reused thread IDs. -pub async fn invalidate_thread_sessions(thread_id: &str) { - let mut sessions = THREAD_SESSIONS.lock().await; - let keys_to_remove: Vec = sessions - .keys() - .filter(|k| k.as_str() == thread_id || k.ends_with(&format!("::{thread_id}"))) - .cloned() - .collect(); - for key in &keys_to_remove { - sessions.remove(key); - } - if !keys_to_remove.is_empty() { - log::debug!( - "[web-channel] invalidated {} cached session(s) for thread_id={}", - keys_to_remove.len(), - thread_id - ); - } -} - -/// Snapshot the IN_FLIGHT map for the test-support introspection RPC. -/// -/// Returned as `(map_key, request_id)` pairs. Not intended for any -/// production caller — release builds reach this via the bearer-gated -/// `/rpc` endpoint only, and the per-launch token file is debug-only. -pub async fn in_flight_entries_for_test() -> Vec<(String, String)> { - let guard = IN_FLIGHT.lock().await; - guard - .iter() - .map(|(k, v)| (k.clone(), v.request_id.clone())) - .collect() -} - -pub async fn cancel_chat(client_id: &str, thread_id: &str) -> Result, String> { - let client_id = client_id.trim(); - let thread_id = thread_id.trim(); - - if client_id.is_empty() { - return Err("client_id is required".to_string()); - } - if thread_id.is_empty() { - return Err("thread_id is required".to_string()); - } - - let map_key = key_for(thread_id); - let mut removed_request_id: Option = None; - - { - let mut in_flight = IN_FLIGHT.lock().await; - if let Some(existing) = in_flight.remove(&map_key) { - removed_request_id = Some(existing.request_id.clone()); - existing.handle.abort(); - } - } - - if let Some(request_id) = removed_request_id.clone() { - publish_web_channel_event(WebChannelEvent { - event: "chat_error".to_string(), - client_id: client_id.to_string(), - thread_id: thread_id.to_string(), - request_id, - full_response: None, - message: Some("Cancelled".to_string()), - error_type: Some("cancelled".to_string()), - error_source: None, - error_retryable: None, - error_retry_after_ms: None, - error_provider: None, - error_fallback_available: None, - tool_name: None, - skill_id: None, - args: None, - output: None, - success: None, - round: None, - reaction_emoji: None, - segment_index: None, - segment_total: None, - delta: None, - delta_kind: None, - tool_call_id: None, - citations: None, - subagent: None, - task_board: None, - }); - } - - Ok(removed_request_id) -} - -async fn run_chat_task( - client_id: &str, - thread_id: &str, - request_id: &str, - message: &str, - model_override: Option, - temperature: Option, - profile_id: Option, - locale: Option, - run_queue: Arc, - metadata: ChatRequestMetadata, -) -> Result { - #[cfg(any(test, debug_assertions))] - { - let mut slot = TEST_FORCED_RUN_CHAT_TASK_ERROR.lock().await; - if let Some(forced) = slot.take() { - log::debug!( - "[web-channel][test] forced run_chat_task failure client_id={} thread_id={} request_id={}", - client_id, - thread_id, - request_id - ); - return Err(forced); - } - } - - let config = config_rpc::load_config_with_timeout().await?; - let (_profiles_state, profile) = - AgentProfileStore::new(config.workspace_dir.clone()).resolve(profile_id.as_deref())?; - let map_key = key_for(thread_id); - let model_override = normalize_model_override(profile.model_override.clone()) - .or_else(|| normalize_model_override(model_override)); - let temperature = profile.temperature.or(temperature); - // Compute the routing decision up front so the cache lookup can - // detect when it has changed. This also keeps non-default profile - // switches from reusing a cached agent built for another target. - let target_agent_id = pick_target_agent_id(&config, &profile); - let provider_role = provider_role_for_model_override(model_override.as_deref()); - let current_fp = SessionCacheFingerprint { - model_override: model_override.clone(), - temperature, - target_agent_id: target_agent_id.clone(), - provider_binding: crate::openhuman::inference::provider::provider_for_role( - provider_role, - &config, - ), - autonomy_signature: autonomy_signature(&config), - }; - - let prior = { - let mut sessions = THREAD_SESSIONS.lock().await; - sessions.remove(&map_key) - }; - - let (mut agent, was_built_fresh) = match prior { - Some(entry) if entry.fingerprint == current_fp => { - log::info!( - "[web-channel] reusing cached session agent id={} for client={} thread={}", - target_agent_id, - client_id, - thread_id - ); - (entry.agent, false) - } - Some(prior_entry) => { - log::info!( - "[web-channel] cache miss — rebuilding session agent \ - (was id={}, now id={}; prior_provider_binding={}, now={}) \ - for client={} thread={}", - prior_entry.fingerprint.target_agent_id, - target_agent_id, - prior_entry.fingerprint.provider_binding, - current_fp.provider_binding, - client_id, - thread_id - ); - ( - build_session_agent( - &config, - client_id, - thread_id, - &target_agent_id, - &profile, - model_override.clone(), - temperature, - locale.as_deref(), - )?, - true, - ) - } - None => ( - build_session_agent( - &config, - client_id, - thread_id, - &target_agent_id, - &profile, - model_override.clone(), - temperature, - locale.as_deref(), - )?, - true, - ), - }; - - // Cold-boot resume from the conversation JSONL. - // - // The agent's `try_load_session_transcript` mechanism only fires - // when a transcript file matches `agent_definition_name` — it - // misses on cold boot if the previous process wrote transcripts - // under a different name (the `set_agent_definition_name` / - // `session_key` rename bug fixed in this PR). The conversation - // JSONL store is the authoritative per-thread message log either - // way, so seed from it whenever we just built a fresh agent. The - // method is a no-op if the agent already has a cached transcript - // or non-empty history, so this is cheap on the warm path too. - if was_built_fresh { - match crate::openhuman::memory_conversations::get_messages( - config.workspace_dir.clone(), - thread_id, - ) { - Ok(prior_messages) if !prior_messages.is_empty() => { - let pairs: Vec<(String, String)> = prior_messages - .into_iter() - .map(|m| (m.sender, m.content)) - .collect(); - if let Err(err) = agent.seed_resume_from_messages(pairs, message) { - log::warn!( - "[web-channel] failed to seed agent resume from conversation log \ - thread={} err={}", - thread_id, - err - ); - } - } - Ok(_) => { - log::debug!( - "[web-channel] no prior messages to seed for thread={} — first turn", - thread_id - ); - } - Err(err) => { - log::warn!( - "[web-channel] failed to read conversation log for resume thread={} err={}", - thread_id, - err - ); - } - } - } - - // Wire up a real-time progress channel so tool calls, iterations, - // and sub-agent events are emitted to the web channel as they happen - // (instead of retroactively after the loop finishes). - let (progress_tx, progress_rx) = tokio::sync::mpsc::channel(64); - agent.set_on_progress(Some(progress_tx)); - agent.set_run_queue(Some(run_queue)); - let turn_state_store = TurnStateStore::new(config.workspace_dir.clone()); - spawn_progress_bridge( - progress_rx, - client_id.to_string(), - thread_id.to_string(), - request_id.to_string(), - turn_state_store, - metadata.clone(), - config.clone(), - ); - - // Make `thread_id` ambient for any outbound provider call inside - // the agent loop. The OpenAI-compatible provider reads it via - // `thread_context::current_thread_id()` and forwards it on - // `/openai/v1/chat/completions` so the backend can group - // InferenceLog entries and reuse the KV cache for this thread. - let result = match crate::openhuman::inference::provider::thread_context::with_thread_id( - thread_id.to_string(), - agent.run_single(message), - ) - .await - { - Ok(response) => { - let citations = agent.take_last_turn_citations(); - Ok(WebChatTaskResult { - full_response: response, - citations, - }) - } - Err(err) => { - let err_message = err.to_string(); - if is_inference_budget_exceeded_error(&err_message) { - log::warn!( - "[web-channel] inference budget exhausted for client={} thread={} request_id={} error_category=budget_exhausted", - client_id, - thread_id, - request_id - ); - Ok(WebChatTaskResult { - full_response: inference_budget_exceeded_user_message().to_string(), - citations: Vec::new(), - }) - } else { - Err(err_message) - } - } - }; - - // Voice / PTT integration (#3090 Task 4). When the chat was sent with - // `speak_reply: true`, drive the agent's full reply through - // `voice::reply_speech::synthesize_reply` so the renderer can play it. - // When the call originated as a PTT session, also publish - // `PttTranscriptCommitted` so screen-intelligence (and any future bus - // subscriber) can react to a completed PTT turn. - // - // Why here (not in the progress bridge): the bridge sees `TextDelta`s - // only when the inference provider streams. The non-streaming fallback - // (and the JSON-RPC E2E mocks) produce a single final response with no - // deltas — so buffering deltas alone loses the reply text in those - // paths. The full response is available right here, regardless of - // streaming mode, which makes this the most reliable hook point. - // - // Failures are non-fatal (TTS / observability are best-effort side - // channels). - if let Ok(ref task_result) = result { - let speak_reply = matches!(metadata.speak_reply, Some(true)); - let trimmed_response = task_result.full_response.trim(); - if speak_reply && !trimmed_response.is_empty() { - let opts = crate::openhuman::voice::reply_speech::ReplySpeechOptions::default(); - match crate::openhuman::voice::reply_speech::synthesize_reply( - &config, - &task_result.full_response, - &opts, - ) - .await - { - Ok(_) => log::debug!( - "[web_channel] reply_speech dispatched chars={} client_id={} thread_id={} request_id={}", - task_result.full_response.len(), - client_id, - thread_id, - request_id, - ), - Err(err) => log::warn!( - "[web_channel] reply_speech failed: {err} client_id={} thread_id={} request_id={}", - client_id, - thread_id, - request_id, - ), - } - } - if metadata.source.as_deref() == Some("ptt") { - if let Some(session_id) = metadata.session_id { - // TODO(#3090 T11): held_ms will be supplied by the renderer once the PTT - // watchdog reports actual hold duration. 0 is a placeholder until then. - crate::openhuman::voice::publish_ptt_transcript_committed( - thread_id.to_string(), - session_id, - task_result.full_response.chars().count(), - 0, - false, - ); - } - } - } - - // Clear the sender so it doesn't hold the channel open across sessions. - agent.set_on_progress(None); - - { - let mut sessions = THREAD_SESSIONS.lock().await; - sessions.insert( - map_key, - SessionEntry { - agent, - fingerprint: current_fp, - }, - ); - } - - result -} - -/// Spawn a background task that reads [`AgentProgress`] events from the -/// agent turn loop and translates them into [`WebChannelEvent`]s tagged -/// with the correct client/thread/request IDs. The task runs until the -/// sender is dropped (i.e. when the agent turn finishes). -/// -/// `metadata` is logged on the bridge's diagnostic lines so PTT turns are -/// easy to correlate across the stream of progress events. The -/// authoritative TTS / PTT-commit dispatch (`speak_reply` → -/// `voice::reply_speech::synthesize_reply`, `source == "ptt"` → -/// `publish_ptt_transcript_committed`) is owned by `run_chat_task`, which -/// sees the full assistant response even when the provider falls back to -/// non-streaming. -pub(crate) fn spawn_progress_bridge( - mut rx: tokio::sync::mpsc::Receiver, - client_id: String, - thread_id: String, - request_id: String, - turn_state_store: TurnStateStore, - metadata: ChatRequestMetadata, - config: crate::openhuman::config::Config, -) { - use crate::openhuman::agent::progress::AgentProgress; - use crate::openhuman::session_db::run_ledger::{ - AgentRunKind, AgentRunStatus, AgentRunUpsert, RunEventAppend, RunTelemetryUpsert, - }; - use std::collections::HashMap; - - tokio::spawn(async move { - log::debug!( - "[web_channel][bridge] spawned client_id={} thread_id={} request_id={} speak_reply={:?} source={:?} session_id={:?}", - client_id, - thread_id, - request_id, - metadata.speak_reply, - metadata.source, - metadata.session_id, - ); - let mut round: u32 = 0; - let mut events_seen: u64 = 0; - let mut parent_completed = false; - let mut parent_tool_count: u64 = 0; - let mut child_tool_counts: HashMap = HashMap::new(); - let mut turn_state = - TurnStateMirror::new(turn_state_store, thread_id.clone(), request_id.clone()); - while let Some(event) = rx.recv().await { - events_seen += 1; - turn_state.observe(&event); - // Per-variant trace so branch decisions are visible in - // terminal output when correlating progress over Socket.IO. - // Kept at trace-level for high-volume deltas and debug for - // lifecycle transitions. - match &event { - AgentProgress::TextDelta { delta, iteration } => { - log::trace!( - "[web_channel][bridge] text_delta round={} chars={} request_id={}", - iteration, - delta.len(), - request_id, - ); - } - AgentProgress::ThinkingDelta { delta, iteration } => { - log::trace!( - "[web_channel][bridge] thinking_delta round={} chars={} request_id={}", - iteration, - delta.len(), - request_id, - ); - } - AgentProgress::ToolCallArgsDelta { - call_id, - tool_name, - delta, - iteration, - } => { - log::trace!( - "[web_channel][bridge] tool_args_delta round={} tool={} call_id={} chars={} request_id={}", - iteration, - tool_name, - call_id, - delta.len(), - request_id, - ); - } - AgentProgress::ToolCallStarted { - call_id, - tool_name, - iteration, - .. - } => { - log::debug!( - "[web_channel][bridge] tool_call round={} tool={} call_id={} request_id={}", - iteration, - tool_name, - call_id, - request_id, - ); - } - AgentProgress::ToolCallCompleted { - call_id, - tool_name, - success, - iteration, - .. - } => { - log::debug!( - "[web_channel][bridge] tool_result round={} tool={} call_id={} success={} request_id={}", - iteration, - tool_name, - call_id, - success, - request_id, - ); - } - AgentProgress::SubagentFailed { - agent_id, error, .. - } => { - log::warn!( - "[web_channel][bridge] subagent_failed agent_id={} err={} client_id={} thread_id={} request_id={}", - agent_id, - error, - client_id, - thread_id, - request_id, - ); - } - other => { - log::debug!( - "[web_channel][bridge] lifecycle event={:?} request_id={}", - std::mem::discriminant(other), - request_id, - ); - } - } - match event { - AgentProgress::TurnStarted => { - ledger_upsert_agent_run( - &config, - AgentRunUpsert { - id: request_id.clone(), - kind: AgentRunKind::BackgroundAgent, - parent_run_id: None, - parent_thread_id: Some(thread_id.clone()), - agent_id: Some("orchestrator".to_string()), - status: AgentRunStatus::Running, - prompt_ref: Some(format!("thread:{thread_id}:request:{request_id}")), - worker_thread_id: None, - task_board_id: Some(thread_id.clone()), - task_card_id: None, - checkpoint_path: None, - checkpoint: None, - summary: None, - error: None, - metadata: json!({ - "clientId": client_id, - "source": "web_channel", - "schemaVersion": 1 - }), - started_at: None, - completed_at: None, - }, - ); - ledger_append_event( - &config, - RunEventAppend { - run_id: request_id.clone(), - event_type: "turn_started".to_string(), - payload: json!({ "threadId": thread_id, "clientId": client_id }), - }, - ); - publish_web_channel_event(WebChannelEvent { - event: "inference_start".to_string(), - client_id: client_id.clone(), - thread_id: thread_id.clone(), - request_id: request_id.clone(), - full_response: None, - message: None, - error_type: None, - error_source: None, - error_retryable: None, - error_retry_after_ms: None, - error_provider: None, - error_fallback_available: None, - tool_name: None, - skill_id: None, - args: None, - output: None, - success: None, - round: None, - reaction_emoji: None, - segment_index: None, - segment_total: None, - delta: None, - delta_kind: None, - tool_call_id: None, - citations: None, - subagent: None, - task_board: None, - }); - } - AgentProgress::IterationStarted { - iteration, - max_iterations, - } => { - round = iteration; - publish_web_channel_event(WebChannelEvent { - event: "iteration_start".to_string(), - client_id: client_id.clone(), - thread_id: thread_id.clone(), - request_id: request_id.clone(), - full_response: None, - message: Some(format!("Iteration {iteration}/{max_iterations}")), - error_type: None, - error_source: None, - error_retryable: None, - error_retry_after_ms: None, - error_provider: None, - error_fallback_available: None, - tool_name: None, - skill_id: None, - args: None, - output: None, - success: None, - round: Some(iteration), - reaction_emoji: None, - segment_index: None, - segment_total: None, - delta: None, - delta_kind: None, - tool_call_id: None, - citations: None, - subagent: None, - task_board: None, - }); - } - AgentProgress::ToolCallStarted { - call_id, - tool_name, - arguments, - iteration, - } => { - parent_tool_count += 1; - ledger_append_event( - &config, - RunEventAppend { - run_id: request_id.clone(), - event_type: "tool_call_started".to_string(), - payload: json!({ - "callId": call_id, - "toolName": tool_name, - "iteration": iteration - }), - }, - ); - ledger_upsert_telemetry( - &config, - RunTelemetryUpsert { - run_id: request_id.clone(), - tool_count: Some(parent_tool_count), - ..Default::default() - }, - ); - publish_web_channel_event(WebChannelEvent { - event: "tool_call".to_string(), - client_id: client_id.clone(), - thread_id: thread_id.clone(), - request_id: request_id.clone(), - tool_name: Some(tool_name), - skill_id: Some("web_channel".to_string()), - args: Some(arguments), - round: Some(iteration), - tool_call_id: Some(call_id), - ..Default::default() - }); - } - AgentProgress::ToolCallCompleted { - call_id, - tool_name, - success, - output_chars, - elapsed_ms, - iteration, - } => { - ledger_append_event( - &config, - RunEventAppend { - run_id: request_id.clone(), - event_type: "tool_call_completed".to_string(), - payload: json!({ - "callId": call_id, - "toolName": tool_name, - "success": success, - "outputChars": output_chars, - "elapsedMs": elapsed_ms, - "iteration": iteration - }), - }, - ); - publish_web_channel_event(WebChannelEvent { - event: "tool_result".to_string(), - client_id: client_id.clone(), - thread_id: thread_id.clone(), - request_id: request_id.clone(), - tool_name: Some(tool_name), - skill_id: Some("web_channel".to_string()), - output: Some( - json!({"output_chars": output_chars, "elapsed_ms": elapsed_ms}) - .to_string(), - ), - success: Some(success), - round: Some(iteration), - tool_call_id: Some(call_id), - ..Default::default() - }); - } - AgentProgress::SubagentSpawned { - agent_id, - task_id, - mode, - dedicated_thread, - prompt_chars, - worker_thread_id, - display_name, - } => { - let label = display_name.as_deref().unwrap_or(&agent_id); - let kind = if worker_thread_id.is_some() { - AgentRunKind::WorkerThread - } else { - AgentRunKind::Subagent - }; - ledger_upsert_agent_run( - &config, - AgentRunUpsert { - id: task_id.clone(), - kind, - parent_run_id: Some(request_id.clone()), - parent_thread_id: Some(thread_id.clone()), - agent_id: Some(agent_id.clone()), - status: AgentRunStatus::Running, - prompt_ref: worker_thread_id - .as_ref() - .map(|id| format!("thread:{id}:message:seed")), - worker_thread_id: worker_thread_id.clone(), - task_board_id: Some(thread_id.clone()), - task_card_id: None, - checkpoint_path: None, - checkpoint: None, - summary: None, - error: None, - metadata: json!({ - "mode": mode, - "dedicatedThread": dedicated_thread, - "promptChars": prompt_chars, - "displayName": display_name, - "source": "agent_progress", - "schemaVersion": 1 - }), - started_at: None, - completed_at: None, - }, - ); - ledger_append_event( - &config, - RunEventAppend { - run_id: task_id.clone(), - event_type: "subagent_spawned".to_string(), - payload: json!({ - "agentId": agent_id, - "parentRunId": request_id, - "threadId": thread_id, - "workerThreadId": worker_thread_id, - "mode": mode, - "dedicatedThread": dedicated_thread, - "promptChars": prompt_chars, - "displayName": display_name - }), - }, - ); - publish_web_channel_event(WebChannelEvent { - event: "subagent_spawned".to_string(), - client_id: client_id.clone(), - thread_id: thread_id.clone(), - request_id: request_id.clone(), - message: Some(format!("Sub-agent '{label}' spawned")), - tool_name: Some(agent_id), - skill_id: Some(task_id), - round: Some(round), - subagent: Some(SubagentProgressDetail { - mode: Some(mode), - dedicated_thread: Some(dedicated_thread), - prompt_chars: Some(prompt_chars as u64), - worker_thread_id, - display_name, - ..Default::default() - }), - ..Default::default() - }); - } - AgentProgress::SubagentCompleted { - agent_id, - task_id, - elapsed_ms, - iterations, - output_chars, - } => { - let completed_at = chrono::Utc::now(); - ledger_upsert_agent_run( - &config, - AgentRunUpsert { - id: task_id.clone(), - kind: AgentRunKind::Subagent, - parent_run_id: Some(request_id.clone()), - parent_thread_id: Some(thread_id.clone()), - agent_id: Some(agent_id.clone()), - status: AgentRunStatus::Completed, - prompt_ref: None, - worker_thread_id: None, - task_board_id: Some(thread_id.clone()), - task_card_id: None, - checkpoint_path: None, - checkpoint: None, - summary: Some(format!( - "Completed in {iterations} iteration(s), {output_chars} output chars" - )), - error: None, - metadata: json!({}), - started_at: None, - completed_at: Some(completed_at), - }, - ); - ledger_upsert_telemetry( - &config, - RunTelemetryUpsert { - run_id: task_id.clone(), - elapsed_ms: Some(elapsed_ms), - tool_count: child_tool_counts.get(&task_id).copied(), - ..Default::default() - }, - ); - ledger_append_event( - &config, - RunEventAppend { - run_id: task_id.clone(), - event_type: "subagent_completed".to_string(), - payload: json!({ - "agentId": agent_id, - "elapsedMs": elapsed_ms, - "iterations": iterations, - "outputChars": output_chars - }), - }, - ); - publish_web_channel_event(WebChannelEvent { - event: "subagent_completed".to_string(), - client_id: client_id.clone(), - thread_id: thread_id.clone(), - request_id: request_id.clone(), - message: Some(format!( - "Sub-agent '{agent_id}' completed in {elapsed_ms}ms" - )), - tool_name: Some(agent_id), - skill_id: Some(task_id), - success: Some(true), - round: Some(round), - subagent: Some(SubagentProgressDetail { - elapsed_ms: Some(elapsed_ms), - iterations: Some(iterations), - output_chars: Some(output_chars as u64), - ..Default::default() - }), - ..Default::default() - }); - } - AgentProgress::SubagentFailed { - agent_id, - task_id, - error, - } => { - let completed_at = chrono::Utc::now(); - ledger_upsert_agent_run( - &config, - AgentRunUpsert { - id: task_id.clone(), - kind: AgentRunKind::Subagent, - parent_run_id: Some(request_id.clone()), - parent_thread_id: Some(thread_id.clone()), - agent_id: Some(agent_id.clone()), - status: AgentRunStatus::Failed, - prompt_ref: None, - worker_thread_id: None, - task_board_id: Some(thread_id.clone()), - task_card_id: None, - checkpoint_path: None, - checkpoint: None, - summary: None, - error: Some(error.clone()), - metadata: json!({}), - started_at: None, - completed_at: Some(completed_at), - }, - ); - ledger_upsert_telemetry( - &config, - RunTelemetryUpsert { - run_id: task_id.clone(), - tool_count: child_tool_counts.get(&task_id).copied(), - error: Some(error.clone()), - ..Default::default() - }, - ); - ledger_append_event( - &config, - RunEventAppend { - run_id: task_id.clone(), - event_type: "subagent_failed".to_string(), - payload: json!({ "agentId": agent_id, "error": error }), - }, - ); - publish_web_channel_event(WebChannelEvent { - event: "subagent_failed".to_string(), - client_id: client_id.clone(), - thread_id: thread_id.clone(), - request_id: request_id.clone(), - message: Some(error), - tool_name: Some(agent_id), - skill_id: Some(task_id), - success: Some(false), - round: Some(round), - ..Default::default() - }); - } - AgentProgress::SubagentAwaitingUser { - agent_id, - task_id, - question, - worker_thread_id, - } => { - log::debug!( - "[web_channel][bridge] subagent_awaiting_user agent_id={} task_id={} client_id={} thread_id={} request_id={}", - agent_id, - task_id, - client_id, - thread_id, - request_id, - ); - let checkpoint_path = config - .workspace_dir - .join(".openhuman/subagent_checkpoints") - .join(format!("{task_id}.json")); - ledger_upsert_agent_run( - &config, - AgentRunUpsert { - id: task_id.clone(), - kind: if worker_thread_id.is_some() { - AgentRunKind::WorkerThread - } else { - AgentRunKind::Subagent - }, - parent_run_id: Some(request_id.clone()), - parent_thread_id: Some(thread_id.clone()), - agent_id: Some(agent_id.clone()), - status: AgentRunStatus::AwaitingUser, - prompt_ref: None, - worker_thread_id: worker_thread_id.clone(), - task_board_id: Some(thread_id.clone()), - task_card_id: None, - checkpoint_path: Some(checkpoint_path.to_string_lossy().to_string()), - checkpoint: Some(json!({ - "resumeTool": "continue_subagent", - "taskId": task_id, - "agentId": agent_id, - "question": question, - "workerThreadId": worker_thread_id - })), - summary: Some(question.clone()), - error: None, - metadata: json!({}), - started_at: None, - completed_at: None, - }, - ); - ledger_append_event( - &config, - RunEventAppend { - run_id: task_id.clone(), - event_type: "subagent_awaiting_user".to_string(), - payload: json!({ - "agentId": agent_id, - "question": question, - "workerThreadId": worker_thread_id - }), - }, - ); - publish_web_channel_event(WebChannelEvent { - event: "subagent_awaiting_user".to_string(), - client_id: client_id.clone(), - thread_id: thread_id.clone(), - request_id: request_id.clone(), - message: Some(question), - tool_name: Some(agent_id), - skill_id: Some(task_id), - success: Some(true), - round: Some(round), - subagent: Some(SubagentProgressDetail { - worker_thread_id, - ..Default::default() - }), - ..Default::default() - }); - } - AgentProgress::SubagentIterationStarted { - agent_id, - task_id, - iteration, - max_iterations, - extended_policy, - } => { - publish_web_channel_event(WebChannelEvent { - event: "subagent_iteration_start".to_string(), - client_id: client_id.clone(), - thread_id: thread_id.clone(), - request_id: request_id.clone(), - message: Some(if extended_policy { - format!("Sub-agent '{agent_id}' step {iteration}") - } else { - format!("Sub-agent '{agent_id}' iteration {iteration}/{max_iterations}") - }), - tool_name: Some(agent_id), - skill_id: Some(task_id), - round: Some(round), - subagent: Some(SubagentProgressDetail { - child_iteration: Some(iteration), - child_max_iterations: if extended_policy { - None - } else { - Some(max_iterations) - }, - ..Default::default() - }), - ..Default::default() - }); - } - AgentProgress::SubagentToolCallStarted { - agent_id, - task_id, - call_id, - tool_name, - iteration, - } => { - let count = child_tool_counts.entry(task_id.clone()).or_insert(0); - *count += 1; - ledger_upsert_telemetry( - &config, - RunTelemetryUpsert { - run_id: task_id.clone(), - tool_count: Some(*count), - ..Default::default() - }, - ); - ledger_append_event( - &config, - RunEventAppend { - run_id: task_id.clone(), - event_type: "subagent_tool_call_started".to_string(), - payload: json!({ - "agentId": agent_id, - "callId": call_id, - "toolName": tool_name, - "iteration": iteration - }), - }, - ); - publish_web_channel_event(WebChannelEvent { - event: "subagent_tool_call".to_string(), - client_id: client_id.clone(), - thread_id: thread_id.clone(), - request_id: request_id.clone(), - tool_name: Some(tool_name), - skill_id: Some(task_id.clone()), - round: Some(round), - tool_call_id: Some(call_id), - subagent: Some(SubagentProgressDetail { - child_iteration: Some(iteration), - agent_id: Some(agent_id), - task_id: Some(task_id), - ..Default::default() - }), - ..Default::default() - }); - } - AgentProgress::SubagentToolCallCompleted { - agent_id, - task_id, - call_id, - tool_name, - success, - output_chars, - elapsed_ms, - iteration, - } => { - ledger_append_event( - &config, - RunEventAppend { - run_id: task_id.clone(), - event_type: "subagent_tool_call_completed".to_string(), - payload: json!({ - "agentId": agent_id, - "callId": call_id, - "toolName": tool_name, - "success": success, - "outputChars": output_chars, - "elapsedMs": elapsed_ms, - "iteration": iteration - }), - }, - ); - publish_web_channel_event(WebChannelEvent { - event: "subagent_tool_result".to_string(), - client_id: client_id.clone(), - thread_id: thread_id.clone(), - request_id: request_id.clone(), - tool_name: Some(tool_name), - skill_id: Some(task_id.clone()), - success: Some(success), - round: Some(round), - tool_call_id: Some(call_id), - output: Some( - json!({"output_chars": output_chars, "elapsed_ms": elapsed_ms}) - .to_string(), - ), - subagent: Some(SubagentProgressDetail { - child_iteration: Some(iteration), - agent_id: Some(agent_id), - task_id: Some(task_id), - elapsed_ms: Some(elapsed_ms), - output_chars: Some(output_chars as u64), - ..Default::default() - }), - ..Default::default() - }); - } - AgentProgress::SubagentTextDelta { - agent_id, - task_id, - delta, - iteration, - } => { - publish_web_channel_event(WebChannelEvent { - event: "subagent_text_delta".to_string(), - client_id: client_id.clone(), - thread_id: thread_id.clone(), - request_id: request_id.clone(), - round: Some(round), - delta: Some(delta), - delta_kind: Some("text".to_string()), - skill_id: Some(task_id.clone()), - subagent: Some(SubagentProgressDetail { - child_iteration: Some(iteration), - agent_id: Some(agent_id), - task_id: Some(task_id), - ..Default::default() - }), - ..Default::default() - }); - } - AgentProgress::SubagentThinkingDelta { - agent_id, - task_id, - delta, - iteration, - } => { - publish_web_channel_event(WebChannelEvent { - event: "subagent_thinking_delta".to_string(), - client_id: client_id.clone(), - thread_id: thread_id.clone(), - request_id: request_id.clone(), - round: Some(round), - delta: Some(delta), - delta_kind: Some("thinking".to_string()), - skill_id: Some(task_id.clone()), - subagent: Some(SubagentProgressDetail { - child_iteration: Some(iteration), - agent_id: Some(agent_id), - task_id: Some(task_id), - ..Default::default() - }), - ..Default::default() - }); - } - AgentProgress::TaskBoardUpdated { board } => { - log::debug!( - "[web_channel][bridge] task_board_updated client_id={} thread_id={} request_id={} cards={}", - client_id, - thread_id, - request_id, - board.cards.len() - ); - publish_web_channel_event(WebChannelEvent { - event: "task_board_updated".to_string(), - client_id: client_id.clone(), - thread_id: thread_id.clone(), - request_id: request_id.clone(), - task_board: Some(serde_json::to_value(board).unwrap_or_else( - |_| serde_json::json!({ "threadId": thread_id, "cards": [] }), - )), - ..Default::default() - }); - } - AgentProgress::TextDelta { delta, iteration } => { - publish_web_channel_event(WebChannelEvent { - event: "text_delta".to_string(), - client_id: client_id.clone(), - thread_id: thread_id.clone(), - request_id: request_id.clone(), - round: Some(iteration), - delta: Some(delta), - delta_kind: Some("text".to_string()), - ..Default::default() - }); - } - AgentProgress::ThinkingDelta { delta, iteration } => { - publish_web_channel_event(WebChannelEvent { - event: "thinking_delta".to_string(), - client_id: client_id.clone(), - thread_id: thread_id.clone(), - request_id: request_id.clone(), - round: Some(iteration), - delta: Some(delta), - delta_kind: Some("thinking".to_string()), - ..Default::default() - }); - } - AgentProgress::ToolCallArgsDelta { - call_id, - tool_name, - delta, - iteration, - } => { - publish_web_channel_event(WebChannelEvent { - event: "tool_args_delta".to_string(), - client_id: client_id.clone(), - thread_id: thread_id.clone(), - request_id: request_id.clone(), - tool_name: if tool_name.is_empty() { - None - } else { - Some(tool_name) - }, - skill_id: Some("web_channel".to_string()), - round: Some(iteration), - delta: Some(delta), - delta_kind: Some("tool_args".to_string()), - tool_call_id: Some(call_id), - ..Default::default() - }); - } - AgentProgress::TurnCompleted { iterations } => { - parent_completed = true; - let completed_at = chrono::Utc::now(); - ledger_upsert_agent_run( - &config, - AgentRunUpsert { - id: request_id.clone(), - kind: AgentRunKind::BackgroundAgent, - parent_run_id: None, - parent_thread_id: Some(thread_id.clone()), - agent_id: Some("orchestrator".to_string()), - status: AgentRunStatus::Completed, - prompt_ref: Some(format!("thread:{thread_id}:request:{request_id}")), - worker_thread_id: None, - task_board_id: Some(thread_id.clone()), - task_card_id: None, - checkpoint_path: None, - checkpoint: None, - summary: Some(format!("Completed in {iterations} iteration(s)")), - error: None, - metadata: json!({}), - started_at: None, - completed_at: Some(completed_at), - }, - ); - ledger_append_event( - &config, - RunEventAppend { - run_id: request_id.clone(), - event_type: "turn_completed".to_string(), - payload: json!({ "iterations": iterations }), - }, - ); - log::debug!( - "[web_channel] turn completed after {iterations} iteration(s) \ - client_id={client_id} thread_id={thread_id} request_id={request_id} \ - speak_reply={:?} source={:?} session_id={:?}", - metadata.speak_reply, - metadata.source, - metadata.session_id, - ); - } - AgentProgress::TurnCostUpdated { - model, - iteration, - input_tokens, - output_tokens, - cached_input_tokens, - total_usd, - } => { - ledger_upsert_telemetry( - &config, - RunTelemetryUpsert { - run_id: request_id.clone(), - input_tokens: Some(input_tokens), - output_tokens: Some(output_tokens), - cached_input_tokens: Some(cached_input_tokens), - cost_usd: Some(total_usd), - model: Some(model.clone()), - ..Default::default() - }, - ); - // Cost telemetry — not surfaced to the UI yet, but - // logged at debug for now and ready for a future - // socket payload. - log::debug!( - "[web_channel] turn cost update model={model} iter={iteration} \ - in={input_tokens} out={output_tokens} cached_in={cached_input_tokens} \ - total_usd={total_usd:.4} client_id={client_id} thread_id={thread_id}" - ); - } - } - } - turn_state.finish(); - if !parent_completed { - ledger_upsert_agent_run( - &config, - AgentRunUpsert { - id: request_id.clone(), - kind: AgentRunKind::BackgroundAgent, - parent_run_id: None, - parent_thread_id: Some(thread_id.clone()), - agent_id: Some("orchestrator".to_string()), - status: AgentRunStatus::Interrupted, - prompt_ref: Some(format!("thread:{thread_id}:request:{request_id}")), - worker_thread_id: None, - task_board_id: Some(thread_id.clone()), - task_card_id: None, - checkpoint_path: None, - checkpoint: None, - summary: None, - error: Some("progress bridge exited before turn completion".to_string()), - metadata: json!({}), - started_at: None, - completed_at: Some(chrono::Utc::now()), - }, - ); - ledger_append_event( - &config, - RunEventAppend { - run_id: request_id.clone(), - event_type: "turn_interrupted".to_string(), - payload: json!({ "eventsSeen": events_seen }), - }, - ); - } - log::debug!( - "[web_channel][bridge] exit client_id={} thread_id={} request_id={} round={} events_seen={}", - client_id, - thread_id, - request_id, - round, - events_seen, - ); - }); -} - -fn ledger_upsert_agent_run( - config: &crate::openhuman::config::Config, - upsert: crate::openhuman::session_db::run_ledger::AgentRunUpsert, -) { - if let Err(err) = crate::openhuman::session_db::run_ledger::upsert_agent_run(config, upsert) { - log::warn!("[run_ledger][web_channel] failed to upsert run: {err}"); - } -} - -fn ledger_append_event( - config: &crate::openhuman::config::Config, - event: crate::openhuman::session_db::run_ledger::RunEventAppend, -) { - if let Err(err) = crate::openhuman::session_db::run_ledger::append_run_event(config, event) { - log::warn!("[run_ledger][web_channel] failed to append event: {err}"); - } -} - -fn ledger_upsert_telemetry( - config: &crate::openhuman::config::Config, - telemetry: crate::openhuman::session_db::run_ledger::RunTelemetryUpsert, -) { - if let Err(err) = - crate::openhuman::session_db::run_ledger::upsert_run_telemetry(config, telemetry) - { - log::warn!("[run_ledger][web_channel] failed to upsert telemetry: {err}"); - } -} - -fn normalize_model_override(model_override: Option) -> Option { - model_override - .map(|model| model.trim().to_string()) - .filter(|model| !model.is_empty()) -} - -fn provider_role_for_model_override(model_override: Option<&str>) -> &'static str { - match model_override.map(str::trim) { - Some("hint:agentic") | Some("agentic-v1") => "agentic", - Some("hint:coding") | Some("coding-v1") => "coding", - Some("hint:summarization") | Some("summarization-v1") => "summarization", - Some("hint:reasoning") => "reasoning", - _ => "chat", - } -} - -fn build_session_agent( - config: &Config, - client_id: &str, - thread_id: &str, - target_agent_id: &str, - profile: &AgentProfile, - model_override: Option, - temperature: Option, - locale: Option<&str>, -) -> Result { - let mut effective = config.clone(); - if let Some(model) = model_override { - effective.default_model = Some(model); - } - let provider_role = provider_role_for_model_override(effective.default_model.as_deref()); - if let Some(temp) = temperature { - effective.default_temperature = temp; - } - - // All chat turns route directly to the orchestrator agent (or to the - // profile-specific agent for non-default profiles). The welcome agent - // has been removed; onboarding UI is handled by the Joyride walkthrough - // in the frontend. - log::info!( - "[web-channel] routing chat turn to '{}' via profile '{}' provider_role='{}' (client_id={}, thread_id={})", - target_agent_id, - profile.id, - provider_role, - client_id, - thread_id - ); - - // (#623) If this thread was spawned from a subconscious reflection, - // load the pre-resolved `source_chunks` snapshot and route through - // the chunks-aware constructor so the orchestrator's system prompt - // carries the same memory context the reflection-LLM cited. For - // regular threads this is a no-op (chunks=None, normal path). - let reflection_chunks = load_reflection_chunks_for_thread(&effective.workspace_dir, thread_id); - - if let Some(chunks) = reflection_chunks - .as_ref() - .filter(|chunks| !chunks.is_empty()) - { - log::info!( - "[web-channel] thread={} spawned from reflection — injecting {} memory chunks into system prompt", - thread_id, - chunks.len() - ); - } - - // Compose the locale-directive (e.g. "Respond in Arabic") with the - // profile's own suffix so the agent always reads the user's - // preferred reply language alongside any profile-level rules. The - // directive is emitted only for non-English locales — English - // matches the agent's default, so injecting it would just be noise - // for the LLM and a regression risk for cached/seeded transcripts. - let locale_directive = locale.and_then(locale_reply_directive); - let composed_suffix = compose_system_prompt_suffix( - locale_directive.as_deref(), - profile.system_prompt_suffix.as_deref(), - ); - if let Some(s) = locale_directive.as_deref() { - log::info!( - "[web-channel] injecting locale directive client={} thread={} locale={} directive={:?}", - client_id, - thread_id, - locale.unwrap_or(""), - s - ); - } - - let agent_result = Agent::from_config_for_agent_with_profile( - &effective, - target_agent_id, - reflection_chunks, - composed_suffix, - ); - - agent_result - .map(|mut agent| { - if let Some(allowed_tools) = profile - .allowed_tools - .as_ref() - .filter(|tools| !tools.is_empty()) - { - agent.set_visible_tool_names( - allowed_tools - .iter() - .map(|tool| tool.trim().to_string()) - .filter(|tool| !tool.is_empty()) - .collect::>(), - ); - } - agent.set_event_context(event_session_id_for(client_id, thread_id), "web_channel"); - // Scope session transcripts per thread so each conversation - // gets its own transcript file instead of sharing one by - // agent type. Without this, new threads load the latest - // transcript for the agent name and inherit prior messages. - let short_thread = if thread_id.len() > 12 { - &thread_id[..12] - } else { - thread_id - }; - agent.set_agent_definition_name(format!("{target_agent_id}_{short_thread}")); - agent - }) - .map_err(|e| e.to_string()) -} - -/// Look up reflection-spawned-thread metadata for a chat thread (#623). -/// -/// Reads the thread's first message; if it was seeded by `reflections_act` -/// — `extra_metadata.origin == "subconscious_reflection"` with a -/// `reflection_id` — fetches the reflection row and returns its -/// pre-resolved `source_chunks` snapshot. Returns `None` for ordinary -/// chat threads (no reflection origin) and on any error so a missing -/// reflection never breaks the chat path. -fn load_reflection_chunks_for_thread( - workspace_dir: &std::path::Path, - thread_id: &str, -) -> Option> { - let messages = crate::openhuman::memory_conversations::get_messages( - workspace_dir.to_path_buf(), - thread_id, - ) - .ok()?; - let first = messages.first()?; - let origin = first - .extra_metadata - .get("origin") - .and_then(|v| v.as_str())?; - if origin != "subconscious_reflection" { - return None; - } - let reflection_id = first - .extra_metadata - .get("reflection_id") - .and_then(|v| v.as_str())? - .to_string(); - let reflection = - crate::openhuman::subconscious::store::with_connection(workspace_dir, |conn| { - crate::openhuman::subconscious::reflection_store::get_reflection(conn, &reflection_id) - }) - .ok() - .flatten()?; - Some(reflection.source_chunks) -} - -#[derive(Debug, Deserialize)] -struct WebChatParams { - client_id: String, - thread_id: String, - message: String, - model_override: Option, - temperature: Option, - profile_id: Option, - /// BCP-47 locale of the frontend UI (e.g. `ar`, `zh-CN`). When set - /// and not English, the system prompt is augmented to ask the - /// agent to reply in that language. `None` keeps the agent's - /// default language (English) so existing integrations don't - /// silently change behaviour. - locale: Option, - /// When `true`, the agent's final reply should be spoken via TTS - /// (for PTT and similar background voice flows). Accepted and - /// stored here; wired to TTS in Task 4. - #[serde(default)] - speak_reply: Option, - /// Origin of the message: `"ptt"` | `"dictation"` | `"type"` | other. - /// Used for analytics and downstream metadata. - #[serde(default)] - source: Option, - /// Optional caller-provided correlation id (PTT session id). - #[serde(default)] - session_id: Option, - /// Queue mode for concurrent messages: `interrupt` (default), `steer`, - /// `followup`, or `collect`. - #[serde(default)] - queue_mode: Option, -} - -/// Per-request metadata carried alongside a chat send. Currently used by the -/// PTT flow (Task 4 wires it to `voice::reply_speech`); other voice surfaces -/// can populate it the same way. -#[derive(Debug, Default, Clone)] -pub struct ChatRequestMetadata { - pub speak_reply: Option, - pub source: Option, - pub session_id: Option, -} - -#[derive(Debug, Deserialize)] -struct WebQueueParams { - thread_id: String, -} - -#[derive(Debug, Deserialize)] -struct WebCancelParams { - client_id: String, - thread_id: String, -} - -pub async fn channel_web_chat( - client_id: &str, - thread_id: &str, - message: &str, - model_override: Option, - temperature: Option, - profile_id: Option, - locale: Option, - queue_mode: Option, - metadata: ChatRequestMetadata, -) -> Result, String> { - let result = start_chat( - client_id, - thread_id, - message, - model_override, - temperature, - profile_id, - locale, - queue_mode, - metadata, - ) - .await?; - - // start_chat returns either a plain request_id string or a JSON string - // (for queued messages). Try to parse as JSON first. - if let Ok(parsed) = serde_json::from_str::(&result) { - return Ok(RpcOutcome::single_log(parsed, "web channel message queued")); - } - - Ok(RpcOutcome::single_log( - json!({ - "accepted": true, - "client_id": client_id.trim(), - "thread_id": thread_id.trim(), - "request_id": result, - }), - "web channel request accepted", - )) -} - -pub async fn channel_web_queue_status(thread_id: &str) -> Result, String> { - let map_key = key_for(thread_id); - let in_flight = IN_FLIGHT.lock().await; - if let Some(entry) = in_flight.get(&map_key) { - let status = entry.run_queue.status().await; - Ok(RpcOutcome::single_log( - json!({ - "thread_id": thread_id.trim(), - "active": true, - "request_id": entry.request_id, - "steers": status.steers, - "followups": status.followups, - "collects": status.collects, - "total": status.total, - }), - "queue status retrieved", - )) - } else { - Ok(RpcOutcome::single_log( - json!({ - "thread_id": thread_id.trim(), - "active": false, - "steers": 0, - "followups": 0, - "collects": 0, - "total": 0, - }), - "no active turn for thread", - )) - } -} - -pub async fn channel_web_queue_clear(thread_id: &str) -> Result, String> { - let map_key = key_for(thread_id); - let in_flight = IN_FLIGHT.lock().await; - if let Some(entry) = in_flight.get(&map_key) { - let dropped = entry.run_queue.clear().await; - log::info!( - "[web-channel] cleared queue thread_id={} dropped={}", - thread_id, - dropped - ); - Ok(RpcOutcome::single_log( - json!({ - "thread_id": thread_id.trim(), - "cleared": true, - "dropped": dropped, - }), - "queue cleared", - )) - } else { - Ok(RpcOutcome::single_log( - json!({ - "thread_id": thread_id.trim(), - "cleared": false, - "dropped": 0, - }), - "no active turn for thread", - )) - } -} - -pub async fn channel_web_cancel( - client_id: &str, - thread_id: &str, -) -> Result, String> { - let cancelled_request_id = cancel_chat(client_id, thread_id).await?; - - // No web-channel turn for this thread → it may be an autonomous task run - // streaming into a task session. Those are detached dispatcher tasks (not in - // IN_FLIGHT), so cancel them via the dispatcher's registry instead — this is - // what makes the chat Cancel button work on task threads. - let cancelled = if cancelled_request_id.is_some() { - true - } else { - crate::openhuman::agent::task_dispatcher::cancel_session(thread_id.trim()).await - }; - - Ok(RpcOutcome::single_log( - json!({ - "cancelled": cancelled, - "client_id": client_id.trim(), - "thread_id": thread_id.trim(), - "request_id": cancelled_request_id, - }), - "web channel cancellation processed", - )) -} - -pub fn all_web_channel_controller_schemas() -> Vec { - vec![ - schemas("chat"), - schemas("cancel"), - schemas("queue_status"), - schemas("queue_clear"), - ] -} - -pub fn all_web_channel_registered_controllers() -> Vec { - vec![ - RegisteredController { - schema: schemas("chat"), - handler: handle_chat, - }, - RegisteredController { - schema: schemas("cancel"), - handler: handle_cancel, - }, - RegisteredController { - schema: schemas("queue_status"), - handler: handle_queue_status, - }, - RegisteredController { - schema: schemas("queue_clear"), - handler: handle_queue_clear, - }, - ] -} - -pub fn schemas(function: &str) -> ControllerSchema { - match function { - "chat" => ControllerSchema { - namespace: "channel", - function: "web_chat", - description: "Send a web channel message through the agent loop.", - inputs: vec![ - required_string("client_id", "Client stream identifier."), - required_string("thread_id", "Thread identifier."), - required_string("message", "User message."), - optional_string("model_override", "Optional model override."), - optional_f64("temperature", "Optional temperature override."), - optional_string("profile_id", "Optional agent profile id."), - optional_string( - "locale", - "Optional BCP-47 UI locale (e.g. 'ar', 'zh-CN'). Drives the \"reply in this language\" system-prompt directive.", - ), - optional_bool("speak_reply", "When true, the agent's final reply is spoken via TTS (for PTT and similar background voice flows)."), - optional_string("source", "Origin of the message: \"ptt\" | \"dictation\" | \"type\" | other. Used for analytics + downstream metadata."), - optional_u64("session_id", "Optional caller-provided correlation id (PTT session id)."), - optional_string( - "queue_mode", - "Queue mode: 'interrupt' (default), 'steer', 'followup', or 'collect'.", - ), - ], - outputs: vec![json_output("ack", "Acceptance payload.")], - }, - "cancel" => ControllerSchema { - namespace: "channel", - function: "web_cancel", - description: "Cancel in-flight web channel request for a thread.", - inputs: vec![ - required_string("client_id", "Client stream identifier."), - required_string("thread_id", "Thread identifier."), - ], - outputs: vec![json_output("ack", "Cancellation payload.")], - }, - "queue_status" => ControllerSchema { - namespace: "channel", - function: "web_queue_status", - description: "Get the run queue status for a thread.", - inputs: vec![required_string("thread_id", "Thread identifier.")], - outputs: vec![json_output("status", "Queue status payload.")], - }, - "queue_clear" => ControllerSchema { - namespace: "channel", - function: "web_queue_clear", - description: "Clear the run queue for a thread.", - inputs: vec![required_string("thread_id", "Thread identifier.")], - outputs: vec![json_output("result", "Queue clear result.")], - }, - _ => ControllerSchema { - namespace: "channel", - function: "unknown", - description: "Unknown web channel controller function.", - inputs: vec![], - outputs: vec![FieldSchema { - name: "error", - ty: TypeSchema::String, - comment: "Lookup error details.", - required: true, - }], - }, - } -} - -fn handle_chat(params: Map) -> ControllerFuture { - Box::pin(async move { - let p = deserialize_params::(params)?; - to_json( - channel_web_chat( - &p.client_id, - &p.thread_id, - &p.message, - p.model_override, - p.temperature, - p.profile_id, - p.locale, - p.queue_mode, - ChatRequestMetadata { - speak_reply: p.speak_reply, - source: p.source, - session_id: p.session_id, - }, - ) - .await?, - ) - }) -} - -fn handle_queue_status(params: Map) -> ControllerFuture { - Box::pin(async move { - let p = deserialize_params::(params)?; - to_json(channel_web_queue_status(&p.thread_id).await?) - }) -} - -fn handle_queue_clear(params: Map) -> ControllerFuture { - Box::pin(async move { - let p = deserialize_params::(params)?; - to_json(channel_web_queue_clear(&p.thread_id).await?) - }) -} - -/// Map a frontend BCP-47 locale tag to a system-prompt directive -/// instructing the agent to reply in that language. Returns `None` -/// for English (the agent's default — adding "Respond in English" -/// is a no-op for the LLM but risks invalidating cached prefixes) -/// and for unknown tags so the agent falls through to its default -/// behaviour instead of seeing a half-built directive. -pub(crate) fn locale_reply_directive(locale: &str) -> Option { - let language = match locale.trim() { - // Keep this table in lockstep with `Locale` in - // `app/src/lib/i18n/types.ts` — every locale the frontend can - // ship should resolve to a language name here. - "ar" => "Arabic", - "bn" => "Bengali", - "es" => "Spanish", - "fr" => "French", - "hi" => "Hindi", - "id" => "Indonesian", - "it" => "Italian", - "pt" => "Portuguese", - "ru" => "Russian", - "zh-CN" | "zh" => "Simplified Chinese", - // English (and any unrecognised tag) → no directive. - _ => return None, - }; - Some(format!( - "User language: the user's interface is set to {language}. \ - Respond in {language} unless the user explicitly asks for a different language. \ - Keep proper nouns, code, and command names untranslated." - )) -} - -/// Stitch the locale directive (if any) onto the profile's own -/// system-prompt suffix. The directive comes first so it shows up -/// near the top of the appended block — easier for the LLM to honour -/// than language guidance buried after profile-specific rules. -pub(crate) fn compose_system_prompt_suffix( - locale_directive: Option<&str>, - profile_suffix: Option<&str>, -) -> Option { - match (locale_directive, profile_suffix) { - (None, None) => None, - (Some(d), None) => Some(d.to_string()), - (None, Some(p)) => Some(p.to_string()), - (Some(d), Some(p)) => Some(format!("{d}\n\n{p}")), - } -} - -fn handle_cancel(params: Map) -> ControllerFuture { - Box::pin(async move { - let p = deserialize_params::(params)?; - to_json(channel_web_cancel(&p.client_id, &p.thread_id).await?) - }) -} - -fn deserialize_params( - params: Map, -) -> Result { - serde_json::from_value(Value::Object(params)).map_err(|e| format!("invalid params: {e}")) -} - -fn required_string(name: &'static str, comment: &'static str) -> FieldSchema { - FieldSchema { - name, - ty: TypeSchema::String, - comment, - required: true, - } -} - -fn optional_string(name: &'static str, comment: &'static str) -> FieldSchema { - FieldSchema { - name, - ty: TypeSchema::Option(Box::new(TypeSchema::String)), - comment, - required: false, - } -} - -fn optional_f64(name: &'static str, comment: &'static str) -> FieldSchema { - FieldSchema { - name, - ty: TypeSchema::Option(Box::new(TypeSchema::F64)), - comment, - required: false, - } -} - -fn optional_bool(name: &'static str, comment: &'static str) -> FieldSchema { - FieldSchema { - name, - ty: TypeSchema::Option(Box::new(TypeSchema::Bool)), - comment, - required: false, - } -} - -fn optional_u64(name: &'static str, comment: &'static str) -> FieldSchema { - FieldSchema { - name, - ty: TypeSchema::Option(Box::new(TypeSchema::U64)), - comment, - required: false, - } -} - -fn json_output(name: &'static str, comment: &'static str) -> FieldSchema { - FieldSchema { - name, - ty: TypeSchema::Json, - comment, - required: true, - } -} - -fn to_json(outcome: RpcOutcome) -> Result { - outcome.into_cli_compatible_json() -} - -#[cfg(test)] -#[path = "web_tests.rs"] -mod tests; diff --git a/src/openhuman/channels/providers/web/event_bus.rs b/src/openhuman/channels/providers/web/event_bus.rs new file mode 100644 index 0000000000..43ed696847 --- /dev/null +++ b/src/openhuman/channels/providers/web/event_bus.rs @@ -0,0 +1,232 @@ +use async_trait::async_trait; +use once_cell::sync::Lazy; +use std::sync::{Arc, OnceLock}; +use tokio::sync::broadcast; + +use crate::core::event_bus::{DomainEvent, EventHandler, SubscriptionHandle}; +use crate::core::socketio::WebChannelEvent; + +static EVENT_BUS: Lazy> = Lazy::new(|| { + let (tx, _rx) = broadcast::channel(512); + tx +}); + +pub fn subscribe_web_channel_events() -> broadcast::Receiver { + EVENT_BUS.subscribe() +} + +pub fn publish_web_channel_event(event: WebChannelEvent) { + let _ = EVENT_BUS.send(event); +} + +static APPROVAL_SURFACE_HANDLE: OnceLock = OnceLock::new(); + +pub fn register_approval_surface_subscriber() { + if APPROVAL_SURFACE_HANDLE.get().is_some() { + return; + } + match crate::core::event_bus::subscribe_global(Arc::new(ApprovalSurfaceSubscriber)) { + Some(handle) => { + let _ = APPROVAL_SURFACE_HANDLE.set(handle); + log::info!( + "[web-channel] approval-surface subscriber registered (domain=approval) — will bridge ApprovalRequested → approval_request socket event" + ); + } + None => { + log::warn!( + "[web-channel] failed to register approval-surface subscriber — bus not initialized" + ); + } + } +} + +static ARTIFACT_SURFACE_HANDLE: OnceLock = OnceLock::new(); + +pub fn register_artifact_surface_subscriber() { + if ARTIFACT_SURFACE_HANDLE.get().is_some() { + return; + } + match crate::core::event_bus::subscribe_global(Arc::new(ArtifactSurfaceSubscriber)) { + Some(handle) => { + let _ = ARTIFACT_SURFACE_HANDLE.set(handle); + log::info!( + "[web-channel] artifact-surface subscriber registered (domain=artifact) — will bridge ArtifactPending/Ready/Failed → artifact_pending/artifact_ready/artifact_failed socket events" + ); + } + None => { + log::warn!( + "[web-channel] failed to register artifact-surface subscriber — bus not initialized" + ); + } + } +} + +struct ArtifactSurfaceSubscriber; + +#[async_trait] +impl EventHandler for ArtifactSurfaceSubscriber { + fn name(&self) -> &str { + "channels::web::artifact_surface" + } + + fn domains(&self) -> Option<&[&str]> { + Some(&["artifact"]) + } + + async fn handle(&self, event: &DomainEvent) { + match event { + DomainEvent::ArtifactReady { + artifact_id, + kind, + title, + workspace_dir, + path, + size_bytes, + thread_id, + client_id, + } => { + let (Some(thread_id), Some(client_id)) = (thread_id, client_id) else { + log::debug!( + "[web-channel] artifact-surface skip ArtifactReady id={artifact_id}: no chat context" + ); + return; + }; + log::info!( + "[web-channel] artifact-surface emitting artifact_ready id={artifact_id} kind={kind} thread_id={thread_id} client_id={client_id}" + ); + publish_web_channel_event(WebChannelEvent { + event: "artifact_ready".to_string(), + client_id: client_id.clone(), + thread_id: thread_id.clone(), + args: Some(serde_json::json!({ + "artifact_id": artifact_id, + "kind": kind, + "title": title, + "workspace_dir": workspace_dir, + "path": path, + "size_bytes": size_bytes, + })), + ..Default::default() + }); + } + DomainEvent::ArtifactFailed { + artifact_id, + kind, + title, + workspace_dir, + error, + thread_id, + client_id, + } => { + let (Some(thread_id), Some(client_id)) = (thread_id, client_id) else { + log::debug!( + "[web-channel] artifact-surface skip ArtifactFailed id={artifact_id}: no chat context" + ); + return; + }; + log::warn!( + "[web-channel] artifact-surface emitting artifact_failed id={artifact_id} kind={kind} thread_id={thread_id} client_id={client_id} error_len={}", + error.len() + ); + publish_web_channel_event(WebChannelEvent { + event: "artifact_failed".to_string(), + client_id: client_id.clone(), + thread_id: thread_id.clone(), + args: Some(serde_json::json!({ + "artifact_id": artifact_id, + "kind": kind, + "title": title, + "workspace_dir": workspace_dir, + "error": error, + })), + ..Default::default() + }); + } + DomainEvent::ArtifactPending { + artifact_id, + kind, + title, + workspace_dir, + path, + thread_id, + client_id, + } => { + let (Some(thread_id), Some(client_id)) = (thread_id, client_id) else { + log::debug!( + "[web-channel] artifact-surface skip ArtifactPending id={artifact_id}: no chat context" + ); + return; + }; + log::info!( + "[web-channel] artifact-surface emitting artifact_pending id={artifact_id} kind={kind} thread_id={thread_id} client_id={client_id}" + ); + publish_web_channel_event(WebChannelEvent { + event: "artifact_pending".to_string(), + client_id: client_id.clone(), + thread_id: thread_id.clone(), + args: Some(serde_json::json!({ + "artifact_id": artifact_id, + "kind": kind, + "title": title, + "workspace_dir": workspace_dir, + "path": path, + })), + ..Default::default() + }); + } + _ => {} + } + } +} + +struct ApprovalSurfaceSubscriber; + +#[async_trait] +impl EventHandler for ApprovalSurfaceSubscriber { + fn name(&self) -> &str { + "channels::web::approval_surface" + } + + fn domains(&self) -> Option<&[&str]> { + Some(&["approval"]) + } + + async fn handle(&self, event: &DomainEvent) { + if let DomainEvent::ApprovalRequested { + request_id, + tool_name, + action_summary, + args_redacted, + thread_id, + client_id, + .. + } = event + { + match (thread_id, client_id) { + (Some(thread_id), Some(client_id)) => { + let question = format!("Run `{tool_name}` — {action_summary}"); + log::info!( + "[web-channel] approval-surface emitting approval_request request_id={request_id} thread_id={thread_id} client_id={client_id} tool={tool_name}" + ); + publish_web_channel_event(WebChannelEvent { + event: "approval_request".to_string(), + client_id: client_id.clone(), + thread_id: thread_id.clone(), + request_id: request_id.clone(), + tool_name: Some(tool_name.clone()), + message: Some(question), + args: Some(args_redacted.clone()), + ..Default::default() + }); + } + _ => { + log::warn!( + "[web-channel] approval-surface received ApprovalRequested request_id={request_id} tool={tool_name} but thread_id/client_id absent (thread={}, client={}) — NOT surfacing", + thread_id.is_some(), + client_id.is_some() + ); + } + } + } + } +} diff --git a/src/openhuman/channels/providers/web/mod.rs b/src/openhuman/channels/providers/web/mod.rs new file mode 100644 index 0000000000..a57721867d --- /dev/null +++ b/src/openhuman/channels/providers/web/mod.rs @@ -0,0 +1,118 @@ +mod event_bus; +mod ops; +mod progress_bridge; +mod run_task; +mod schemas; +mod session; +mod types; + +#[path = "../web_errors.rs"] +mod web_errors; +pub(crate) use web_errors::{ + classify_inference_error, inference_budget_exceeded_user_message, + is_inference_budget_exceeded_error, +}; +#[cfg(any(test, debug_assertions))] +#[allow(unused_imports)] +pub(crate) use web_errors::{ + extract_provider_error_detail, extract_provider_name, generic_inference_error_user_message, + is_action_budget_exhausted, is_fallback_chain_exhausted, is_non_retryable_rate_limit_text, + parse_retry_after_secs_from_str, retry_after_hint, with_provider_detail, ClassifiedError, +}; + +// Public API — event bus +pub use event_bus::{ + publish_web_channel_event, register_approval_surface_subscriber, + register_artifact_surface_subscriber, subscribe_web_channel_events, +}; + +// Public API — operations +pub use ops::{ + cancel_chat, channel_web_cancel, channel_web_chat, channel_web_queue_clear, + channel_web_queue_status, in_flight_entries_for_test, invalidate_thread_sessions, start_chat, +}; +pub use types::ChatRequestMetadata; + +// Public API — schemas / controllers +pub use schemas::{ + all_web_channel_controller_schemas, all_web_channel_registered_controllers, schemas, +}; + +// Helpers re-exported for tests +pub(crate) use ops::{event_session_id_for, key_for}; +pub(crate) use progress_bridge::spawn_progress_bridge; +pub(crate) use session::{compose_system_prompt_suffix, locale_reply_directive}; + +// Schema field helpers re-exported for tests +pub(crate) use schemas::{ + json_output, optional_bool, optional_f64, optional_string, optional_u64, required_string, +}; + +// Test helpers (debug/test builds only) +#[cfg(any(test, debug_assertions))] +pub use ops::set_test_forced_run_chat_task_error; + +#[cfg(any(test, debug_assertions))] +pub(crate) use ops::THREAD_SESSIONS; +#[cfg(any(test, debug_assertions))] +pub(crate) use session::{normalize_model_override, provider_role_for_model_override}; +#[cfg(any(test, debug_assertions))] +pub(crate) use types::WebChatParams; + +#[cfg(any(test, debug_assertions))] +pub mod test_support { + #[derive(Debug, Clone, PartialEq, Eq)] + pub struct ClassifiedErrorSnapshot { + pub error_type: &'static str, + pub message: String, + pub source: &'static str, + pub retryable: bool, + pub retry_after_ms: Option, + pub provider: Option, + pub fallback_available: Option, + } + + pub fn classify_error_for_test(err: &str) -> ClassifiedErrorSnapshot { + let classified = super::classify_inference_error(err); + ClassifiedErrorSnapshot { + error_type: classified.error_type, + message: classified.message, + source: classified.source, + retryable: classified.retryable, + retry_after_ms: classified.retry_after_ms, + provider: classified.provider, + fallback_available: classified.fallback_available, + } + } + + pub fn extracted_provider_detail_for_test(err: &str) -> Option { + super::extract_provider_error_detail(err) + } + + pub fn retry_after_secs_for_test(err: &str) -> Option { + super::parse_retry_after_secs_from_str(err) + } + + pub fn is_non_retryable_rate_limit_for_test(lower: &str) -> bool { + super::is_non_retryable_rate_limit_text(lower) + } + + pub fn key_for_test(thread_id: &str) -> String { + super::key_for(thread_id) + } + + pub fn event_session_id_for_test(client_id: &str, thread_id: &str) -> String { + super::event_session_id_for(client_id, thread_id) + } + + pub async fn set_forced_run_chat_task_error_for_test(message: Option<&str>) { + super::set_test_forced_run_chat_task_error(message).await; + } +} + +#[cfg(test)] +pub(crate) use types::SessionCacheFingerprint; + +#[cfg(test)] +#[path = "../web_tests.rs"] +mod tests; diff --git a/src/openhuman/channels/providers/web/ops.rs b/src/openhuman/channels/providers/web/ops.rs new file mode 100644 index 0000000000..ddf77b42e9 --- /dev/null +++ b/src/openhuman/channels/providers/web/ops.rs @@ -0,0 +1,609 @@ +use std::collections::HashMap; + +use once_cell::sync::Lazy; +use serde_json::{json, Value}; +use tokio::sync::Mutex; +use uuid::Uuid; + +use crate::core::event_bus::DomainEvent; +use crate::core::socketio::WebChannelEvent; +use crate::openhuman::prompt_injection::{ + enforce_prompt_input, PromptEnforcementAction, PromptEnforcementContext, +}; +use crate::rpc::RpcOutcome; + +use super::event_bus::publish_web_channel_event; +use super::run_task::run_chat_task; +use super::types::{ChatRequestMetadata, InFlightEntry, SessionEntry}; +use super::web_errors::classify_inference_error; + +pub(crate) static THREAD_SESSIONS: Lazy>> = + Lazy::new(|| Mutex::new(HashMap::new())); + +pub(super) static IN_FLIGHT: Lazy>> = + Lazy::new(|| Mutex::new(HashMap::new())); + +#[cfg(any(test, debug_assertions))] +pub(super) static TEST_FORCED_RUN_CHAT_TASK_ERROR: Lazy>> = + Lazy::new(|| Mutex::new(None)); + +pub(crate) fn key_for(thread_id: &str) -> String { + thread_id.to_string() +} + +pub(crate) fn event_session_id_for(client_id: &str, thread_id: &str) -> String { + json!({ + "client_id": client_id, + "thread_id": thread_id, + }) + .to_string() +} + +fn prompt_guard_user_message(action: PromptEnforcementAction) -> &'static str { + match action { + PromptEnforcementAction::Allow => "Message accepted.", + PromptEnforcementAction::Blocked => { + "Your message was blocked by a security policy. Please rephrase and remove instruction-override or secret-exfiltration requests." + } + PromptEnforcementAction::ReviewBlocked => { + "Your message was flagged for security review and was not processed. Please rephrase the request in a direct, task-focused way." + } + } +} + +#[cfg(any(test, debug_assertions))] +pub async fn set_test_forced_run_chat_task_error(message: Option<&str>) { + let mut slot = TEST_FORCED_RUN_CHAT_TASK_ERROR.lock().await; + *slot = message.map(str::to_string); +} + +pub async fn start_chat( + client_id: &str, + thread_id: &str, + message: &str, + model_override: Option, + temperature: Option, + profile_id: Option, + locale: Option, + queue_mode: Option, + metadata: ChatRequestMetadata, +) -> Result { + let client_id = client_id.trim().to_string(); + let thread_id = thread_id.trim().to_string(); + let message = message.trim().to_string(); + + if client_id.is_empty() { + return Err("client_id is required".to_string()); + } + if thread_id.is_empty() { + return Err("thread_id is required".to_string()); + } + if message.is_empty() { + return Err("message is required".to_string()); + } + + let request_id = Uuid::new_v4().to_string(); + let prompt_decision = enforce_prompt_input( + &message, + PromptEnforcementContext { + source: "channels.providers.web.start_chat", + request_id: Some(&request_id), + user_id: Some(&client_id), + session_id: Some(&thread_id), + }, + ); + if !matches!(prompt_decision.action, PromptEnforcementAction::Allow) { + log::warn!( + "[web-channel] prompt rejected client_id={} thread_id={} request_id={} action={} score={:.2} reasons={} hash={} chars={}", + client_id, + thread_id, + request_id, + match prompt_decision.action { + PromptEnforcementAction::Allow => "allow", + PromptEnforcementAction::Blocked => "block", + PromptEnforcementAction::ReviewBlocked => "review_blocked", + }, + prompt_decision.score, + prompt_decision + .reasons + .iter() + .map(|r| r.code.as_str()) + .collect::>() + .join(","), + prompt_decision.prompt_hash, + prompt_decision.prompt_chars, + ); + return Err(prompt_guard_user_message(prompt_decision.action).to_string()); + } + + // Chat-native approval: if this thread has a parked approval and the message + // is a yes/no reply, route it to the gate rather than starting a new turn. + if let Some(gate) = crate::openhuman::approval::ApprovalGate::try_global() { + if let Some(request_id) = gate.pending_for_thread(&thread_id) { + if let Some(decision) = crate::openhuman::approval::parse_approval_reply(&message) { + match gate.decide(&request_id, decision) { + Ok(Some(_)) => { + log::info!( + "[web-channel] routed chat reply to approval gate thread_id={} request_id={} decision={}", + thread_id, + request_id, + decision.as_str() + ); + return Ok(request_id); + } + Ok(None) => { + log::warn!( + "[web-channel] approval reply targeted a non-pending/already-decided request thread_id={} request_id={} decision={} — dispatching as fresh turn", + thread_id, + request_id, + decision.as_str() + ); + } + Err(err) => { + log::warn!( + "[web-channel] failed to route chat reply to approval gate thread_id={} request_id={} decision={} err={}", + thread_id, + request_id, + decision.as_str(), + err + ); + } + } + } + } + } + + let map_key = key_for(&thread_id); + + let parsed_mode = match queue_mode.as_deref() { + Some("steer") => crate::openhuman::agent::harness::run_queue::QueueMode::Steer, + Some("followup") => crate::openhuman::agent::harness::run_queue::QueueMode::Followup, + Some("collect") => crate::openhuman::agent::harness::run_queue::QueueMode::Collect, + _ => crate::openhuman::agent::harness::run_queue::QueueMode::Interrupt, + }; + + // Non-interrupt modes: push into the running turn's queue and return. + if !matches!( + parsed_mode, + crate::openhuman::agent::harness::run_queue::QueueMode::Interrupt + ) { + let in_flight = IN_FLIGHT.lock().await; + if let Some(existing) = in_flight.get(&map_key) { + let queued_msg = crate::openhuman::agent::harness::run_queue::QueuedMessage { + text: message.clone(), + mode: parsed_mode, + client_id: client_id.clone(), + thread_id: thread_id.clone(), + queued_at_ms: std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap_or_default() + .as_millis() as u64, + model_override: model_override.clone(), + temperature, + profile_id: profile_id.clone(), + locale: locale.clone(), + }; + existing.run_queue.push(queued_msg).await; + let status = existing.run_queue.status().await; + log::info!( + "[web-channel] queued {} message thread_id={} request_id={} queue_depth={}", + parsed_mode, + thread_id, + request_id, + status.total + ); + crate::core::event_bus::publish_global(DomainEvent::RunQueueMessageQueued { + thread_id: thread_id.clone(), + mode: parsed_mode.to_string(), + queue_depth: status.total, + }); + return Ok(json!({ + "queued": true, + "queue_mode": parsed_mode.to_string(), + "client_id": client_id, + "thread_id": thread_id, + "request_id": request_id, + "queue_depth": status.total, + }) + .to_string()); + } + log::info!( + "[web-channel] no in-flight turn for {} mode thread_id={} — starting fresh", + parsed_mode, + thread_id + ); + } + + { + let mut in_flight = IN_FLIGHT.lock().await; + + if let Some(existing) = in_flight.remove(&map_key) { + let cancelled_id = existing.request_id.clone(); + existing.handle.abort(); + log::info!( + "[web-channel] interrupted in-flight turn thread_id={} cancelled_request_id={}", + thread_id, + existing.request_id + ); + crate::core::event_bus::publish_global(DomainEvent::RunQueueInterrupted { + thread_id: thread_id.clone(), + cancelled_request_id: existing.request_id.clone(), + }); + publish_web_channel_event(WebChannelEvent { + event: "chat_error".to_string(), + client_id: client_id.clone(), + thread_id: thread_id.clone(), + request_id: cancelled_id, + message: Some("Cancelled by newer request".to_string()), + error_type: Some("cancelled".to_string()), + ..Default::default() + }); + } + } + + let turn_run_queue = crate::openhuman::agent::harness::run_queue::RunQueue::new(); + let turn_run_queue_task = turn_run_queue.clone(); + + let client_id_task = client_id.clone(); + let thread_id_task = thread_id.clone(); + let request_id_task = request_id.clone(); + let map_key_task = map_key.clone(); + + let user_message = message.clone(); + let handle = tokio::spawn(async move { + let approval_ctx = crate::openhuman::approval::ApprovalChatContext { + thread_id: thread_id_task.clone(), + client_id: client_id_task.clone(), + }; + let origin = crate::openhuman::agent::turn_origin::AgentTurnOrigin::WebChat { + thread_id: thread_id_task.clone(), + client_id: client_id_task.clone(), + }; + let result = crate::openhuman::agent::turn_origin::with_origin( + origin, + crate::openhuman::approval::APPROVAL_CHAT_CONTEXT.scope( + approval_ctx, + run_chat_task( + &client_id_task, + &thread_id_task, + &request_id_task, + &user_message, + model_override, + temperature, + profile_id, + locale, + turn_run_queue_task, + metadata, + ), + ), + ) + .await; + + match result { + Ok(chat_result) => { + crate::openhuman::channels::providers::presentation::deliver_response( + &client_id_task, + &thread_id_task, + &request_id_task, + &chat_result.full_response, + &user_message, + &chat_result.citations, + ) + .await; + } + Err(err) => { + log::warn!( + "[web-channel] run_chat_task failed client_id={} thread_id={} request_id={} error={}", + client_id_task, + thread_id_task, + request_id_task, + err + ); + let detailed = format!( + "run_chat_task failed client_id={} thread_id={} request_id={} error={}", + client_id_task, thread_id_task, request_id_task, err + ); + let classified = classify_inference_error(&err); + let classified_type = classified.error_type; + let classified_type_string = classified_type.to_string(); + if crate::openhuman::agent::error::is_max_iterations_error(&detailed) { + log::info!( + target: "web_channel", + "[web_channel.run_chat_task] suppressed Sentry emission for max-iteration \ + cap client_id={} thread_id={} request_id={} error_type={} message={}", + client_id_task, + thread_id_task, + request_id_task, + classified_type, + detailed + ); + } else { + crate::core::observability::report_error_or_expected( + detailed.as_str(), + "web_channel", + "run_chat_task", + &[ + ("channel", "web"), + ("error_type", classified_type), + ("thread_id", thread_id_task.as_str()), + ("request_id", request_id_task.as_str()), + ], + ); + } + publish_web_channel_event(WebChannelEvent { + event: "chat_error".to_string(), + client_id: client_id_task.clone(), + thread_id: thread_id_task.clone(), + request_id: request_id_task.clone(), + message: Some(classified.message), + error_type: Some(classified_type_string), + error_source: Some(classified.source.to_string()), + error_retryable: Some(classified.retryable), + error_retry_after_ms: classified.retry_after_ms, + error_provider: classified.provider, + error_fallback_available: classified.fallback_available, + ..Default::default() + }); + } + } + + // Drain followup messages queued during this turn. + let followups = { + let mut in_flight = IN_FLIGHT.lock().await; + let followups = if let Some(current) = in_flight.get(&map_key_task) { + if current.request_id == request_id_task { + let fups = current.run_queue.drain_followups().await; + in_flight.remove(&map_key_task); + fups + } else { + Vec::new() + } + } else { + Vec::new() + }; + followups + }; + if !followups.is_empty() { + log::info!( + "[web-channel] dispatching {} followup(s) thread_id={}", + followups.len(), + thread_id_task + ); + crate::core::event_bus::publish_global( + crate::core::event_bus::DomainEvent::RunQueueFollowupDispatched { + thread_id: thread_id_task.clone(), + followup_count: followups.len(), + }, + ); + dispatch_followups(followups); + } + }); + + { + let mut in_flight = IN_FLIGHT.lock().await; + in_flight.insert( + map_key, + InFlightEntry { + request_id: request_id.clone(), + handle, + run_queue: turn_run_queue, + }, + ); + } + + Ok(request_id) +} + +fn dispatch_followups(followups: Vec) { + for fup in followups { + tokio::spawn(async move { + if let Err(err) = start_chat( + &fup.client_id, + &fup.thread_id, + &fup.text, + fup.model_override, + fup.temperature, + fup.profile_id, + fup.locale, + Some("followup".to_string()), + ChatRequestMetadata::default(), + ) + .await + { + log::warn!( + "[web-channel] failed to dispatch followup thread_id={} err={}", + fup.thread_id, + err + ); + } + }); + } +} + +pub async fn invalidate_thread_sessions(thread_id: &str) { + let mut sessions = THREAD_SESSIONS.lock().await; + let keys_to_remove: Vec = sessions + .keys() + .filter(|k| k.as_str() == thread_id || k.ends_with(&format!("::{thread_id}"))) + .cloned() + .collect(); + for key in &keys_to_remove { + sessions.remove(key); + } + if !keys_to_remove.is_empty() { + log::debug!( + "[web-channel] invalidated {} cached session(s) for thread_id={}", + keys_to_remove.len(), + thread_id + ); + } +} + +pub async fn in_flight_entries_for_test() -> Vec<(String, String)> { + let guard = IN_FLIGHT.lock().await; + guard + .iter() + .map(|(k, v)| (k.clone(), v.request_id.clone())) + .collect() +} + +pub async fn cancel_chat(client_id: &str, thread_id: &str) -> Result, String> { + let client_id = client_id.trim(); + let thread_id = thread_id.trim(); + + if client_id.is_empty() { + return Err("client_id is required".to_string()); + } + if thread_id.is_empty() { + return Err("thread_id is required".to_string()); + } + + let map_key = key_for(thread_id); + let mut removed_request_id: Option = None; + + { + let mut in_flight = IN_FLIGHT.lock().await; + if let Some(existing) = in_flight.remove(&map_key) { + removed_request_id = Some(existing.request_id.clone()); + existing.handle.abort(); + } + } + + if let Some(request_id) = removed_request_id.clone() { + publish_web_channel_event(WebChannelEvent { + event: "chat_error".to_string(), + client_id: client_id.to_string(), + thread_id: thread_id.to_string(), + request_id, + message: Some("Cancelled".to_string()), + error_type: Some("cancelled".to_string()), + ..Default::default() + }); + } + + Ok(removed_request_id) +} + +pub async fn channel_web_chat( + client_id: &str, + thread_id: &str, + message: &str, + model_override: Option, + temperature: Option, + profile_id: Option, + locale: Option, + queue_mode: Option, + metadata: ChatRequestMetadata, +) -> Result, String> { + let result = start_chat( + client_id, + thread_id, + message, + model_override, + temperature, + profile_id, + locale, + queue_mode, + metadata, + ) + .await?; + + if let Ok(parsed) = serde_json::from_str::(&result) { + return Ok(RpcOutcome::single_log(parsed, "web channel message queued")); + } + + Ok(RpcOutcome::single_log( + json!({ + "accepted": true, + "client_id": client_id.trim(), + "thread_id": thread_id.trim(), + "request_id": result, + }), + "web channel request accepted", + )) +} + +pub async fn channel_web_queue_status(thread_id: &str) -> Result, String> { + let map_key = key_for(thread_id); + let in_flight = IN_FLIGHT.lock().await; + if let Some(entry) = in_flight.get(&map_key) { + let status = entry.run_queue.status().await; + Ok(RpcOutcome::single_log( + json!({ + "thread_id": thread_id.trim(), + "active": true, + "request_id": entry.request_id, + "steers": status.steers, + "followups": status.followups, + "collects": status.collects, + "total": status.total, + }), + "queue status retrieved", + )) + } else { + Ok(RpcOutcome::single_log( + json!({ + "thread_id": thread_id.trim(), + "active": false, + "steers": 0, + "followups": 0, + "collects": 0, + "total": 0, + }), + "no active turn for thread", + )) + } +} + +pub async fn channel_web_queue_clear(thread_id: &str) -> Result, String> { + let map_key = key_for(thread_id); + let in_flight = IN_FLIGHT.lock().await; + if let Some(entry) = in_flight.get(&map_key) { + let dropped = entry.run_queue.clear().await; + log::info!( + "[web-channel] cleared queue thread_id={} dropped={}", + thread_id, + dropped + ); + Ok(RpcOutcome::single_log( + json!({ + "thread_id": thread_id.trim(), + "cleared": true, + "dropped": dropped, + }), + "queue cleared", + )) + } else { + Ok(RpcOutcome::single_log( + json!({ + "thread_id": thread_id.trim(), + "cleared": false, + "dropped": 0, + }), + "no active turn for thread", + )) + } +} + +pub async fn channel_web_cancel( + client_id: &str, + thread_id: &str, +) -> Result, String> { + let cancelled_request_id = cancel_chat(client_id, thread_id).await?; + + let cancelled = if cancelled_request_id.is_some() { + true + } else { + crate::openhuman::agent::task_dispatcher::cancel_session(thread_id.trim()).await + }; + + Ok(RpcOutcome::single_log( + json!({ + "cancelled": cancelled, + "client_id": client_id.trim(), + "thread_id": thread_id.trim(), + "request_id": cancelled_request_id, + }), + "web channel cancellation processed", + )) +} diff --git a/src/openhuman/channels/providers/web/progress_bridge.rs b/src/openhuman/channels/providers/web/progress_bridge.rs new file mode 100644 index 0000000000..66ff51b94a --- /dev/null +++ b/src/openhuman/channels/providers/web/progress_bridge.rs @@ -0,0 +1,951 @@ +use serde_json::json; + +use crate::core::socketio::{SubagentProgressDetail, WebChannelEvent}; +use crate::openhuman::threads::turn_state::{TurnStateMirror, TurnStateStore}; + +use super::event_bus::publish_web_channel_event; +use super::types::ChatRequestMetadata; + +pub(super) fn ledger_upsert_agent_run( + config: &crate::openhuman::config::Config, + upsert: crate::openhuman::session_db::run_ledger::AgentRunUpsert, +) { + if let Err(err) = crate::openhuman::session_db::run_ledger::upsert_agent_run(config, upsert) { + log::warn!("[run_ledger][web_channel] failed to upsert run: {err}"); + } +} + +pub(super) fn ledger_append_event( + config: &crate::openhuman::config::Config, + event: crate::openhuman::session_db::run_ledger::RunEventAppend, +) { + if let Err(err) = crate::openhuman::session_db::run_ledger::append_run_event(config, event) { + log::warn!("[run_ledger][web_channel] failed to append event: {err}"); + } +} + +pub(super) fn ledger_upsert_telemetry( + config: &crate::openhuman::config::Config, + telemetry: crate::openhuman::session_db::run_ledger::RunTelemetryUpsert, +) { + if let Err(err) = + crate::openhuman::session_db::run_ledger::upsert_run_telemetry(config, telemetry) + { + log::warn!("[run_ledger][web_channel] failed to upsert telemetry: {err}"); + } +} + +/// Spawn a background task that reads [`AgentProgress`] events from the +/// agent turn loop and translates them into [`WebChannelEvent`]s tagged +/// with the correct client/thread/request IDs. The task runs until the +/// sender is dropped (i.e. when the agent turn finishes). +pub(crate) fn spawn_progress_bridge( + mut rx: tokio::sync::mpsc::Receiver, + client_id: String, + thread_id: String, + request_id: String, + turn_state_store: TurnStateStore, + metadata: ChatRequestMetadata, + config: crate::openhuman::config::Config, +) { + use crate::openhuman::agent::progress::AgentProgress; + use crate::openhuman::session_db::run_ledger::{ + AgentRunKind, AgentRunStatus, AgentRunUpsert, RunEventAppend, RunTelemetryUpsert, + }; + use std::collections::HashMap; + + tokio::spawn(async move { + log::debug!( + "[web_channel][bridge] spawned client_id={} thread_id={} request_id={} speak_reply={:?} source={:?} session_id={:?}", + client_id, + thread_id, + request_id, + metadata.speak_reply, + metadata.source, + metadata.session_id, + ); + let mut round: u32 = 0; + let mut events_seen: u64 = 0; + let mut parent_completed = false; + let mut parent_tool_count: u64 = 0; + let mut child_tool_counts: HashMap = HashMap::new(); + let mut turn_state = + TurnStateMirror::new(turn_state_store, thread_id.clone(), request_id.clone()); + while let Some(event) = rx.recv().await { + events_seen += 1; + turn_state.observe(&event); + match &event { + AgentProgress::TextDelta { delta, iteration } => { + log::trace!( + "[web_channel][bridge] text_delta round={} chars={} request_id={}", + iteration, + delta.len(), + request_id, + ); + } + AgentProgress::ThinkingDelta { delta, iteration } => { + log::trace!( + "[web_channel][bridge] thinking_delta round={} chars={} request_id={}", + iteration, + delta.len(), + request_id, + ); + } + AgentProgress::ToolCallArgsDelta { + call_id, + tool_name, + delta, + iteration, + } => { + log::trace!( + "[web_channel][bridge] tool_args_delta round={} tool={} call_id={} chars={} request_id={}", + iteration, + tool_name, + call_id, + delta.len(), + request_id, + ); + } + AgentProgress::ToolCallStarted { + call_id, + tool_name, + iteration, + .. + } => { + log::debug!( + "[web_channel][bridge] tool_call round={} tool={} call_id={} request_id={}", + iteration, + tool_name, + call_id, + request_id, + ); + } + AgentProgress::ToolCallCompleted { + call_id, + tool_name, + success, + iteration, + .. + } => { + log::debug!( + "[web_channel][bridge] tool_result round={} tool={} call_id={} success={} request_id={}", + iteration, + tool_name, + call_id, + success, + request_id, + ); + } + AgentProgress::SubagentFailed { + agent_id, error, .. + } => { + log::warn!( + "[web_channel][bridge] subagent_failed agent_id={} err={} client_id={} thread_id={} request_id={}", + agent_id, + error, + client_id, + thread_id, + request_id, + ); + } + other => { + log::debug!( + "[web_channel][bridge] lifecycle event={:?} request_id={}", + std::mem::discriminant(other), + request_id, + ); + } + } + match event { + AgentProgress::TurnStarted => { + ledger_upsert_agent_run( + &config, + AgentRunUpsert { + id: request_id.clone(), + kind: AgentRunKind::BackgroundAgent, + parent_run_id: None, + parent_thread_id: Some(thread_id.clone()), + agent_id: Some("orchestrator".to_string()), + status: AgentRunStatus::Running, + prompt_ref: Some(format!("thread:{thread_id}:request:{request_id}")), + worker_thread_id: None, + task_board_id: Some(thread_id.clone()), + task_card_id: None, + checkpoint_path: None, + checkpoint: None, + summary: None, + error: None, + metadata: json!({ + "clientId": client_id, + "source": "web_channel", + "schemaVersion": 1 + }), + started_at: None, + completed_at: None, + }, + ); + ledger_append_event( + &config, + RunEventAppend { + run_id: request_id.clone(), + event_type: "turn_started".to_string(), + payload: json!({ "threadId": thread_id, "clientId": client_id }), + }, + ); + publish_web_channel_event(WebChannelEvent { + event: "inference_start".to_string(), + client_id: client_id.clone(), + thread_id: thread_id.clone(), + request_id: request_id.clone(), + ..Default::default() + }); + } + AgentProgress::IterationStarted { + iteration, + max_iterations, + } => { + round = iteration; + publish_web_channel_event(WebChannelEvent { + event: "iteration_start".to_string(), + client_id: client_id.clone(), + thread_id: thread_id.clone(), + request_id: request_id.clone(), + message: Some(format!("Iteration {iteration}/{max_iterations}")), + round: Some(iteration), + ..Default::default() + }); + } + AgentProgress::ToolCallStarted { + call_id, + tool_name, + arguments, + iteration, + } => { + parent_tool_count += 1; + ledger_append_event( + &config, + RunEventAppend { + run_id: request_id.clone(), + event_type: "tool_call_started".to_string(), + payload: json!({ + "callId": call_id, + "toolName": tool_name, + "iteration": iteration + }), + }, + ); + ledger_upsert_telemetry( + &config, + RunTelemetryUpsert { + run_id: request_id.clone(), + tool_count: Some(parent_tool_count), + ..Default::default() + }, + ); + publish_web_channel_event(WebChannelEvent { + event: "tool_call".to_string(), + client_id: client_id.clone(), + thread_id: thread_id.clone(), + request_id: request_id.clone(), + tool_name: Some(tool_name), + skill_id: Some("web_channel".to_string()), + args: Some(arguments), + round: Some(iteration), + tool_call_id: Some(call_id), + ..Default::default() + }); + } + AgentProgress::ToolCallCompleted { + call_id, + tool_name, + success, + output_chars, + elapsed_ms, + iteration, + } => { + ledger_append_event( + &config, + RunEventAppend { + run_id: request_id.clone(), + event_type: "tool_call_completed".to_string(), + payload: json!({ + "callId": call_id, + "toolName": tool_name, + "success": success, + "outputChars": output_chars, + "elapsedMs": elapsed_ms, + "iteration": iteration + }), + }, + ); + publish_web_channel_event(WebChannelEvent { + event: "tool_result".to_string(), + client_id: client_id.clone(), + thread_id: thread_id.clone(), + request_id: request_id.clone(), + tool_name: Some(tool_name), + skill_id: Some("web_channel".to_string()), + output: Some( + json!({"output_chars": output_chars, "elapsed_ms": elapsed_ms}) + .to_string(), + ), + success: Some(success), + round: Some(iteration), + tool_call_id: Some(call_id), + ..Default::default() + }); + } + AgentProgress::SubagentSpawned { + agent_id, + task_id, + mode, + dedicated_thread, + prompt_chars, + worker_thread_id, + display_name, + } => { + let label = display_name.as_deref().unwrap_or(&agent_id); + let kind = if worker_thread_id.is_some() { + AgentRunKind::WorkerThread + } else { + AgentRunKind::Subagent + }; + ledger_upsert_agent_run( + &config, + AgentRunUpsert { + id: task_id.clone(), + kind, + parent_run_id: Some(request_id.clone()), + parent_thread_id: Some(thread_id.clone()), + agent_id: Some(agent_id.clone()), + status: AgentRunStatus::Running, + prompt_ref: worker_thread_id + .as_ref() + .map(|id| format!("thread:{id}:message:seed")), + worker_thread_id: worker_thread_id.clone(), + task_board_id: Some(thread_id.clone()), + task_card_id: None, + checkpoint_path: None, + checkpoint: None, + summary: None, + error: None, + metadata: json!({ + "mode": mode, + "dedicatedThread": dedicated_thread, + "promptChars": prompt_chars, + "displayName": display_name, + "source": "agent_progress", + "schemaVersion": 1 + }), + started_at: None, + completed_at: None, + }, + ); + ledger_append_event( + &config, + RunEventAppend { + run_id: task_id.clone(), + event_type: "subagent_spawned".to_string(), + payload: json!({ + "agentId": agent_id, + "parentRunId": request_id, + "threadId": thread_id, + "workerThreadId": worker_thread_id, + "mode": mode, + "dedicatedThread": dedicated_thread, + "promptChars": prompt_chars, + "displayName": display_name + }), + }, + ); + publish_web_channel_event(WebChannelEvent { + event: "subagent_spawned".to_string(), + client_id: client_id.clone(), + thread_id: thread_id.clone(), + request_id: request_id.clone(), + message: Some(format!("Sub-agent '{label}' spawned")), + tool_name: Some(agent_id), + skill_id: Some(task_id), + round: Some(round), + subagent: Some(SubagentProgressDetail { + mode: Some(mode), + dedicated_thread: Some(dedicated_thread), + prompt_chars: Some(prompt_chars as u64), + worker_thread_id, + display_name, + ..Default::default() + }), + ..Default::default() + }); + } + AgentProgress::SubagentCompleted { + agent_id, + task_id, + elapsed_ms, + iterations, + output_chars, + } => { + let completed_at = chrono::Utc::now(); + ledger_upsert_agent_run( + &config, + AgentRunUpsert { + id: task_id.clone(), + kind: AgentRunKind::Subagent, + parent_run_id: Some(request_id.clone()), + parent_thread_id: Some(thread_id.clone()), + agent_id: Some(agent_id.clone()), + status: AgentRunStatus::Completed, + prompt_ref: None, + worker_thread_id: None, + task_board_id: Some(thread_id.clone()), + task_card_id: None, + checkpoint_path: None, + checkpoint: None, + summary: Some(format!( + "Completed in {iterations} iteration(s), {output_chars} output chars" + )), + error: None, + metadata: json!({}), + started_at: None, + completed_at: Some(completed_at), + }, + ); + ledger_upsert_telemetry( + &config, + RunTelemetryUpsert { + run_id: task_id.clone(), + elapsed_ms: Some(elapsed_ms), + tool_count: child_tool_counts.get(&task_id).copied(), + ..Default::default() + }, + ); + ledger_append_event( + &config, + RunEventAppend { + run_id: task_id.clone(), + event_type: "subagent_completed".to_string(), + payload: json!({ + "agentId": agent_id, + "elapsedMs": elapsed_ms, + "iterations": iterations, + "outputChars": output_chars + }), + }, + ); + publish_web_channel_event(WebChannelEvent { + event: "subagent_completed".to_string(), + client_id: client_id.clone(), + thread_id: thread_id.clone(), + request_id: request_id.clone(), + message: Some(format!( + "Sub-agent '{agent_id}' completed in {elapsed_ms}ms" + )), + tool_name: Some(agent_id), + skill_id: Some(task_id), + success: Some(true), + round: Some(round), + subagent: Some(SubagentProgressDetail { + elapsed_ms: Some(elapsed_ms), + iterations: Some(iterations), + output_chars: Some(output_chars as u64), + ..Default::default() + }), + ..Default::default() + }); + } + AgentProgress::SubagentFailed { + agent_id, + task_id, + error, + } => { + let completed_at = chrono::Utc::now(); + ledger_upsert_agent_run( + &config, + AgentRunUpsert { + id: task_id.clone(), + kind: AgentRunKind::Subagent, + parent_run_id: Some(request_id.clone()), + parent_thread_id: Some(thread_id.clone()), + agent_id: Some(agent_id.clone()), + status: AgentRunStatus::Failed, + prompt_ref: None, + worker_thread_id: None, + task_board_id: Some(thread_id.clone()), + task_card_id: None, + checkpoint_path: None, + checkpoint: None, + summary: None, + error: Some(error.clone()), + metadata: json!({}), + started_at: None, + completed_at: Some(completed_at), + }, + ); + ledger_upsert_telemetry( + &config, + RunTelemetryUpsert { + run_id: task_id.clone(), + tool_count: child_tool_counts.get(&task_id).copied(), + error: Some(error.clone()), + ..Default::default() + }, + ); + ledger_append_event( + &config, + RunEventAppend { + run_id: task_id.clone(), + event_type: "subagent_failed".to_string(), + payload: json!({ "agentId": agent_id, "error": error }), + }, + ); + publish_web_channel_event(WebChannelEvent { + event: "subagent_failed".to_string(), + client_id: client_id.clone(), + thread_id: thread_id.clone(), + request_id: request_id.clone(), + message: Some(error), + tool_name: Some(agent_id), + skill_id: Some(task_id), + success: Some(false), + round: Some(round), + ..Default::default() + }); + } + AgentProgress::SubagentAwaitingUser { + agent_id, + task_id, + question, + worker_thread_id, + } => { + log::debug!( + "[web_channel][bridge] subagent_awaiting_user agent_id={} task_id={} client_id={} thread_id={} request_id={}", + agent_id, + task_id, + client_id, + thread_id, + request_id, + ); + let checkpoint_path = config + .workspace_dir + .join(".openhuman/subagent_checkpoints") + .join(format!("{task_id}.json")); + ledger_upsert_agent_run( + &config, + AgentRunUpsert { + id: task_id.clone(), + kind: if worker_thread_id.is_some() { + AgentRunKind::WorkerThread + } else { + AgentRunKind::Subagent + }, + parent_run_id: Some(request_id.clone()), + parent_thread_id: Some(thread_id.clone()), + agent_id: Some(agent_id.clone()), + status: AgentRunStatus::AwaitingUser, + prompt_ref: None, + worker_thread_id: worker_thread_id.clone(), + task_board_id: Some(thread_id.clone()), + task_card_id: None, + checkpoint_path: Some(checkpoint_path.to_string_lossy().to_string()), + checkpoint: Some(json!({ + "resumeTool": "continue_subagent", + "taskId": task_id, + "agentId": agent_id, + "question": question, + "workerThreadId": worker_thread_id + })), + summary: Some(question.clone()), + error: None, + metadata: json!({}), + started_at: None, + completed_at: None, + }, + ); + ledger_append_event( + &config, + RunEventAppend { + run_id: task_id.clone(), + event_type: "subagent_awaiting_user".to_string(), + payload: json!({ + "agentId": agent_id, + "question": question, + "workerThreadId": worker_thread_id + }), + }, + ); + publish_web_channel_event(WebChannelEvent { + event: "subagent_awaiting_user".to_string(), + client_id: client_id.clone(), + thread_id: thread_id.clone(), + request_id: request_id.clone(), + message: Some(question), + tool_name: Some(agent_id), + skill_id: Some(task_id), + success: Some(true), + round: Some(round), + subagent: Some(SubagentProgressDetail { + worker_thread_id, + ..Default::default() + }), + ..Default::default() + }); + } + AgentProgress::SubagentIterationStarted { + agent_id, + task_id, + iteration, + max_iterations, + extended_policy, + } => { + publish_web_channel_event(WebChannelEvent { + event: "subagent_iteration_start".to_string(), + client_id: client_id.clone(), + thread_id: thread_id.clone(), + request_id: request_id.clone(), + message: Some(if extended_policy { + format!("Sub-agent '{agent_id}' step {iteration}") + } else { + format!("Sub-agent '{agent_id}' iteration {iteration}/{max_iterations}") + }), + tool_name: Some(agent_id), + skill_id: Some(task_id), + round: Some(round), + subagent: Some(SubagentProgressDetail { + child_iteration: Some(iteration), + child_max_iterations: if extended_policy { + None + } else { + Some(max_iterations) + }, + ..Default::default() + }), + ..Default::default() + }); + } + AgentProgress::SubagentToolCallStarted { + agent_id, + task_id, + call_id, + tool_name, + iteration, + } => { + let count = child_tool_counts.entry(task_id.clone()).or_insert(0); + *count += 1; + ledger_upsert_telemetry( + &config, + RunTelemetryUpsert { + run_id: task_id.clone(), + tool_count: Some(*count), + ..Default::default() + }, + ); + ledger_append_event( + &config, + RunEventAppend { + run_id: task_id.clone(), + event_type: "subagent_tool_call_started".to_string(), + payload: json!({ + "agentId": agent_id, + "callId": call_id, + "toolName": tool_name, + "iteration": iteration + }), + }, + ); + publish_web_channel_event(WebChannelEvent { + event: "subagent_tool_call".to_string(), + client_id: client_id.clone(), + thread_id: thread_id.clone(), + request_id: request_id.clone(), + tool_name: Some(tool_name), + skill_id: Some(task_id.clone()), + round: Some(round), + tool_call_id: Some(call_id), + subagent: Some(SubagentProgressDetail { + child_iteration: Some(iteration), + agent_id: Some(agent_id), + task_id: Some(task_id), + ..Default::default() + }), + ..Default::default() + }); + } + AgentProgress::SubagentToolCallCompleted { + agent_id, + task_id, + call_id, + tool_name, + success, + output_chars, + elapsed_ms, + iteration, + } => { + ledger_append_event( + &config, + RunEventAppend { + run_id: task_id.clone(), + event_type: "subagent_tool_call_completed".to_string(), + payload: json!({ + "agentId": agent_id, + "callId": call_id, + "toolName": tool_name, + "success": success, + "outputChars": output_chars, + "elapsedMs": elapsed_ms, + "iteration": iteration + }), + }, + ); + publish_web_channel_event(WebChannelEvent { + event: "subagent_tool_result".to_string(), + client_id: client_id.clone(), + thread_id: thread_id.clone(), + request_id: request_id.clone(), + tool_name: Some(tool_name), + skill_id: Some(task_id.clone()), + success: Some(success), + round: Some(round), + tool_call_id: Some(call_id), + output: Some( + json!({"output_chars": output_chars, "elapsed_ms": elapsed_ms}) + .to_string(), + ), + subagent: Some(SubagentProgressDetail { + child_iteration: Some(iteration), + agent_id: Some(agent_id), + task_id: Some(task_id), + elapsed_ms: Some(elapsed_ms), + output_chars: Some(output_chars as u64), + ..Default::default() + }), + ..Default::default() + }); + } + AgentProgress::SubagentTextDelta { + agent_id, + task_id, + delta, + iteration, + } => { + publish_web_channel_event(WebChannelEvent { + event: "subagent_text_delta".to_string(), + client_id: client_id.clone(), + thread_id: thread_id.clone(), + request_id: request_id.clone(), + round: Some(round), + delta: Some(delta), + delta_kind: Some("text".to_string()), + skill_id: Some(task_id.clone()), + subagent: Some(SubagentProgressDetail { + child_iteration: Some(iteration), + agent_id: Some(agent_id), + task_id: Some(task_id), + ..Default::default() + }), + ..Default::default() + }); + } + AgentProgress::SubagentThinkingDelta { + agent_id, + task_id, + delta, + iteration, + } => { + publish_web_channel_event(WebChannelEvent { + event: "subagent_thinking_delta".to_string(), + client_id: client_id.clone(), + thread_id: thread_id.clone(), + request_id: request_id.clone(), + round: Some(round), + delta: Some(delta), + delta_kind: Some("thinking".to_string()), + skill_id: Some(task_id.clone()), + subagent: Some(SubagentProgressDetail { + child_iteration: Some(iteration), + agent_id: Some(agent_id), + task_id: Some(task_id), + ..Default::default() + }), + ..Default::default() + }); + } + AgentProgress::TaskBoardUpdated { board } => { + log::debug!( + "[web_channel][bridge] task_board_updated client_id={} thread_id={} request_id={} cards={}", + client_id, + thread_id, + request_id, + board.cards.len() + ); + publish_web_channel_event(WebChannelEvent { + event: "task_board_updated".to_string(), + client_id: client_id.clone(), + thread_id: thread_id.clone(), + request_id: request_id.clone(), + task_board: Some(serde_json::to_value(board).unwrap_or_else( + |_| serde_json::json!({ "threadId": thread_id, "cards": [] }), + )), + ..Default::default() + }); + } + AgentProgress::TextDelta { delta, iteration } => { + publish_web_channel_event(WebChannelEvent { + event: "text_delta".to_string(), + client_id: client_id.clone(), + thread_id: thread_id.clone(), + request_id: request_id.clone(), + round: Some(iteration), + delta: Some(delta), + delta_kind: Some("text".to_string()), + ..Default::default() + }); + } + AgentProgress::ThinkingDelta { delta, iteration } => { + publish_web_channel_event(WebChannelEvent { + event: "thinking_delta".to_string(), + client_id: client_id.clone(), + thread_id: thread_id.clone(), + request_id: request_id.clone(), + round: Some(iteration), + delta: Some(delta), + delta_kind: Some("thinking".to_string()), + ..Default::default() + }); + } + AgentProgress::ToolCallArgsDelta { + call_id, + tool_name, + delta, + iteration, + } => { + publish_web_channel_event(WebChannelEvent { + event: "tool_args_delta".to_string(), + client_id: client_id.clone(), + thread_id: thread_id.clone(), + request_id: request_id.clone(), + tool_name: if tool_name.is_empty() { + None + } else { + Some(tool_name) + }, + skill_id: Some("web_channel".to_string()), + round: Some(iteration), + delta: Some(delta), + delta_kind: Some("tool_args".to_string()), + tool_call_id: Some(call_id), + ..Default::default() + }); + } + AgentProgress::TurnCompleted { iterations } => { + parent_completed = true; + let completed_at = chrono::Utc::now(); + ledger_upsert_agent_run( + &config, + AgentRunUpsert { + id: request_id.clone(), + kind: AgentRunKind::BackgroundAgent, + parent_run_id: None, + parent_thread_id: Some(thread_id.clone()), + agent_id: Some("orchestrator".to_string()), + status: AgentRunStatus::Completed, + prompt_ref: Some(format!("thread:{thread_id}:request:{request_id}")), + worker_thread_id: None, + task_board_id: Some(thread_id.clone()), + task_card_id: None, + checkpoint_path: None, + checkpoint: None, + summary: Some(format!("Completed in {iterations} iteration(s)")), + error: None, + metadata: json!({}), + started_at: None, + completed_at: Some(completed_at), + }, + ); + ledger_append_event( + &config, + RunEventAppend { + run_id: request_id.clone(), + event_type: "turn_completed".to_string(), + payload: json!({ "iterations": iterations }), + }, + ); + log::debug!( + "[web_channel] turn completed after {iterations} iteration(s) \ + client_id={client_id} thread_id={thread_id} request_id={request_id} \ + speak_reply={:?} source={:?} session_id={:?}", + metadata.speak_reply, + metadata.source, + metadata.session_id, + ); + } + AgentProgress::TurnCostUpdated { + model, + iteration, + input_tokens, + output_tokens, + cached_input_tokens, + total_usd, + } => { + ledger_upsert_telemetry( + &config, + RunTelemetryUpsert { + run_id: request_id.clone(), + input_tokens: Some(input_tokens), + output_tokens: Some(output_tokens), + cached_input_tokens: Some(cached_input_tokens), + cost_usd: Some(total_usd), + model: Some(model.clone()), + ..Default::default() + }, + ); + log::debug!( + "[web_channel] turn cost update model={model} iter={iteration} \ + in={input_tokens} out={output_tokens} cached_in={cached_input_tokens} \ + total_usd={total_usd:.4} client_id={client_id} thread_id={thread_id}" + ); + } + } + } + turn_state.finish(); + if !parent_completed { + ledger_upsert_agent_run( + &config, + AgentRunUpsert { + id: request_id.clone(), + kind: AgentRunKind::BackgroundAgent, + parent_run_id: None, + parent_thread_id: Some(thread_id.clone()), + agent_id: Some("orchestrator".to_string()), + status: AgentRunStatus::Interrupted, + prompt_ref: Some(format!("thread:{thread_id}:request:{request_id}")), + worker_thread_id: None, + task_board_id: Some(thread_id.clone()), + task_card_id: None, + checkpoint_path: None, + checkpoint: None, + summary: None, + error: Some("progress bridge exited before turn completion".to_string()), + metadata: json!({}), + started_at: None, + completed_at: Some(chrono::Utc::now()), + }, + ); + ledger_append_event( + &config, + RunEventAppend { + run_id: request_id.clone(), + event_type: "turn_interrupted".to_string(), + payload: json!({ "eventsSeen": events_seen }), + }, + ); + } + log::debug!( + "[web_channel][bridge] exit client_id={} thread_id={} request_id={} round={} events_seen={}", + client_id, + thread_id, + request_id, + round, + events_seen, + ); + }); +} diff --git a/src/openhuman/channels/providers/web/run_task.rs b/src/openhuman/channels/providers/web/run_task.rs new file mode 100644 index 0000000000..82bdfcda6c --- /dev/null +++ b/src/openhuman/channels/providers/web/run_task.rs @@ -0,0 +1,257 @@ +use std::sync::Arc; + +use crate::openhuman::agent::profiles::AgentProfileStore; +use crate::openhuman::config::rpc as config_rpc; +use crate::openhuman::threads::turn_state::TurnStateStore; + +use super::ops::{key_for, THREAD_SESSIONS}; +use super::progress_bridge::spawn_progress_bridge; +use super::session::{ + build_session_agent, build_session_fingerprint, normalize_model_override, pick_target_agent_id, + provider_role_for_model_override, +}; +use super::types::SessionEntry; +use super::types::{ChatRequestMetadata, WebChatTaskResult}; +use super::web_errors::{ + inference_budget_exceeded_user_message, is_inference_budget_exceeded_error, +}; + +#[cfg(any(test, debug_assertions))] +use super::ops::TEST_FORCED_RUN_CHAT_TASK_ERROR; + +pub(crate) async fn run_chat_task( + client_id: &str, + thread_id: &str, + request_id: &str, + message: &str, + model_override: Option, + temperature: Option, + profile_id: Option, + locale: Option, + run_queue: Arc, + metadata: ChatRequestMetadata, +) -> Result { + #[cfg(any(test, debug_assertions))] + { + let mut slot = TEST_FORCED_RUN_CHAT_TASK_ERROR.lock().await; + if let Some(forced) = slot.take() { + log::debug!( + "[web-channel][test] forced run_chat_task failure client_id={} thread_id={} request_id={}", + client_id, + thread_id, + request_id + ); + return Err(forced); + } + } + + let config = config_rpc::load_config_with_timeout().await?; + let (_profiles_state, profile) = + AgentProfileStore::new(config.workspace_dir.clone()).resolve(profile_id.as_deref())?; + let map_key = key_for(thread_id); + let model_override = normalize_model_override(profile.model_override.clone()) + .or_else(|| normalize_model_override(model_override)); + let temperature = profile.temperature.or(temperature); + let target_agent_id = pick_target_agent_id(&config, &profile); + let provider_role = provider_role_for_model_override(model_override.as_deref()); + let current_fp = build_session_fingerprint( + &config, + model_override.clone(), + temperature, + target_agent_id.clone(), + provider_role, + ); + + let prior = { + let mut sessions = THREAD_SESSIONS.lock().await; + sessions.remove(&map_key) + }; + + let (mut agent, was_built_fresh) = match prior { + Some(entry) if entry.fingerprint == current_fp => { + log::info!( + "[web-channel] reusing cached session agent id={} for client={} thread={}", + target_agent_id, + client_id, + thread_id + ); + (entry.agent, false) + } + Some(prior_entry) => { + log::info!( + "[web-channel] cache miss — rebuilding session agent \ + (was id={}, now id={}; prior_provider_binding={}, now={}) \ + for client={} thread={}", + prior_entry.fingerprint.target_agent_id, + target_agent_id, + prior_entry.fingerprint.provider_binding, + current_fp.provider_binding, + client_id, + thread_id + ); + ( + build_session_agent( + &config, + client_id, + thread_id, + &target_agent_id, + &profile, + model_override.clone(), + temperature, + locale.as_deref(), + )?, + true, + ) + } + None => ( + build_session_agent( + &config, + client_id, + thread_id, + &target_agent_id, + &profile, + model_override.clone(), + temperature, + locale.as_deref(), + )?, + true, + ), + }; + + // Cold-boot resume from the conversation JSONL. + if was_built_fresh { + match crate::openhuman::memory_conversations::get_messages( + config.workspace_dir.clone(), + thread_id, + ) { + Ok(prior_messages) if !prior_messages.is_empty() => { + let pairs: Vec<(String, String)> = prior_messages + .into_iter() + .map(|m| (m.sender, m.content)) + .collect(); + if let Err(err) = agent.seed_resume_from_messages(pairs, message) { + log::warn!( + "[web-channel] failed to seed agent resume from conversation log \ + thread={} err={}", + thread_id, + err + ); + } + } + Ok(_) => { + log::debug!( + "[web-channel] no prior messages to seed for thread={} — first turn", + thread_id + ); + } + Err(err) => { + log::warn!( + "[web-channel] failed to read conversation log for resume thread={} err={}", + thread_id, + err + ); + } + } + } + + let (progress_tx, progress_rx) = tokio::sync::mpsc::channel(64); + agent.set_on_progress(Some(progress_tx)); + agent.set_run_queue(Some(run_queue)); + let turn_state_store = TurnStateStore::new(config.workspace_dir.clone()); + spawn_progress_bridge( + progress_rx, + client_id.to_string(), + thread_id.to_string(), + request_id.to_string(), + turn_state_store, + metadata.clone(), + config.clone(), + ); + + let result = match crate::openhuman::inference::provider::thread_context::with_thread_id( + thread_id.to_string(), + agent.run_single(message), + ) + .await + { + Ok(response) => { + let citations = agent.take_last_turn_citations(); + Ok(WebChatTaskResult { + full_response: response, + citations, + }) + } + Err(err) => { + let err_message = err.to_string(); + if is_inference_budget_exceeded_error(&err_message) { + log::warn!( + "[web-channel] inference budget exhausted for client={} thread={} request_id={} error_category=budget_exhausted", + client_id, + thread_id, + request_id + ); + Ok(WebChatTaskResult { + full_response: inference_budget_exceeded_user_message().to_string(), + citations: Vec::new(), + }) + } else { + Err(err_message) + } + } + }; + + if let Ok(ref task_result) = result { + let speak_reply = matches!(metadata.speak_reply, Some(true)); + let trimmed_response = task_result.full_response.trim(); + if speak_reply && !trimmed_response.is_empty() { + let opts = crate::openhuman::voice::reply_speech::ReplySpeechOptions::default(); + match crate::openhuman::voice::reply_speech::synthesize_reply( + &config, + &task_result.full_response, + &opts, + ) + .await + { + Ok(_) => log::debug!( + "[web_channel] reply_speech dispatched chars={} client_id={} thread_id={} request_id={}", + task_result.full_response.len(), + client_id, + thread_id, + request_id, + ), + Err(err) => log::warn!( + "[web_channel] reply_speech failed: {err} client_id={} thread_id={} request_id={}", + client_id, + thread_id, + request_id, + ), + } + } + if metadata.source.as_deref() == Some("ptt") { + if let Some(session_id) = metadata.session_id { + crate::openhuman::voice::publish_ptt_transcript_committed( + thread_id.to_string(), + session_id, + task_result.full_response.chars().count(), + 0, + false, + ); + } + } + } + + agent.set_on_progress(None); + + { + let mut sessions = THREAD_SESSIONS.lock().await; + sessions.insert( + map_key, + SessionEntry { + agent, + fingerprint: current_fp, + }, + ); + } + + result +} diff --git a/src/openhuman/channels/providers/web/schemas.rs b/src/openhuman/channels/providers/web/schemas.rs new file mode 100644 index 0000000000..4754db72b0 --- /dev/null +++ b/src/openhuman/channels/providers/web/schemas.rs @@ -0,0 +1,215 @@ +use serde_json::{Map, Value}; + +use crate::core::all::{ControllerFuture, RegisteredController}; +use crate::core::{ControllerSchema, FieldSchema, TypeSchema}; +use crate::rpc::RpcOutcome; + +use super::ops::{ + channel_web_cancel, channel_web_chat, channel_web_queue_clear, channel_web_queue_status, +}; +use super::types::{ChatRequestMetadata, WebCancelParams, WebChatParams, WebQueueParams}; + +pub fn all_web_channel_controller_schemas() -> Vec { + vec![ + schemas("chat"), + schemas("cancel"), + schemas("queue_status"), + schemas("queue_clear"), + ] +} + +pub fn all_web_channel_registered_controllers() -> Vec { + vec![ + RegisteredController { + schema: schemas("chat"), + handler: handle_chat, + }, + RegisteredController { + schema: schemas("cancel"), + handler: handle_cancel, + }, + RegisteredController { + schema: schemas("queue_status"), + handler: handle_queue_status, + }, + RegisteredController { + schema: schemas("queue_clear"), + handler: handle_queue_clear, + }, + ] +} + +pub fn schemas(function: &str) -> ControllerSchema { + match function { + "chat" => ControllerSchema { + namespace: "channel", + function: "web_chat", + description: "Send a web channel message through the agent loop.", + inputs: vec![ + required_string("client_id", "Client stream identifier."), + required_string("thread_id", "Thread identifier."), + required_string("message", "User message."), + optional_string("model_override", "Optional model override."), + optional_f64("temperature", "Optional temperature override."), + optional_string("profile_id", "Optional agent profile id."), + optional_string( + "locale", + "Optional BCP-47 UI locale (e.g. 'ar', 'zh-CN'). Drives the \"reply in this language\" system-prompt directive.", + ), + optional_bool("speak_reply", "When true, the agent's final reply is spoken via TTS (for PTT and similar background voice flows)."), + optional_string("source", "Origin of the message: \"ptt\" | \"dictation\" | \"type\" | other. Used for analytics + downstream metadata."), + optional_u64("session_id", "Optional caller-provided correlation id (PTT session id)."), + optional_string( + "queue_mode", + "Queue mode: 'interrupt' (default), 'steer', 'followup', or 'collect'.", + ), + ], + outputs: vec![json_output("ack", "Acceptance payload.")], + }, + "cancel" => ControllerSchema { + namespace: "channel", + function: "web_cancel", + description: "Cancel in-flight web channel request for a thread.", + inputs: vec![ + required_string("client_id", "Client stream identifier."), + required_string("thread_id", "Thread identifier."), + ], + outputs: vec![json_output("ack", "Cancellation payload.")], + }, + "queue_status" => ControllerSchema { + namespace: "channel", + function: "web_queue_status", + description: "Get the run queue status for a thread.", + inputs: vec![required_string("thread_id", "Thread identifier.")], + outputs: vec![json_output("status", "Queue status payload.")], + }, + "queue_clear" => ControllerSchema { + namespace: "channel", + function: "web_queue_clear", + description: "Clear the run queue for a thread.", + inputs: vec![required_string("thread_id", "Thread identifier.")], + outputs: vec![json_output("result", "Queue clear result.")], + }, + _ => ControllerSchema { + namespace: "channel", + function: "unknown", + description: "Unknown web channel controller function.", + inputs: vec![], + outputs: vec![FieldSchema { + name: "error", + ty: TypeSchema::String, + comment: "Lookup error details.", + required: true, + }], + }, + } +} + +fn handle_chat(params: Map) -> ControllerFuture { + Box::pin(async move { + let p = deserialize_params::(params)?; + to_json( + channel_web_chat( + &p.client_id, + &p.thread_id, + &p.message, + p.model_override, + p.temperature, + p.profile_id, + p.locale, + p.queue_mode, + ChatRequestMetadata { + speak_reply: p.speak_reply, + source: p.source, + session_id: p.session_id, + }, + ) + .await?, + ) + }) +} + +fn handle_queue_status(params: Map) -> ControllerFuture { + Box::pin(async move { + let p = deserialize_params::(params)?; + to_json(channel_web_queue_status(&p.thread_id).await?) + }) +} + +fn handle_queue_clear(params: Map) -> ControllerFuture { + Box::pin(async move { + let p = deserialize_params::(params)?; + to_json(channel_web_queue_clear(&p.thread_id).await?) + }) +} + +fn handle_cancel(params: Map) -> ControllerFuture { + Box::pin(async move { + let p = deserialize_params::(params)?; + to_json(channel_web_cancel(&p.client_id, &p.thread_id).await?) + }) +} + +fn deserialize_params( + params: Map, +) -> Result { + serde_json::from_value(Value::Object(params)).map_err(|e| format!("invalid params: {e}")) +} + +pub(crate) fn required_string(name: &'static str, comment: &'static str) -> FieldSchema { + FieldSchema { + name, + ty: TypeSchema::String, + comment, + required: true, + } +} + +pub(crate) fn optional_string(name: &'static str, comment: &'static str) -> FieldSchema { + FieldSchema { + name, + ty: TypeSchema::Option(Box::new(TypeSchema::String)), + comment, + required: false, + } +} + +pub(crate) fn optional_f64(name: &'static str, comment: &'static str) -> FieldSchema { + FieldSchema { + name, + ty: TypeSchema::Option(Box::new(TypeSchema::F64)), + comment, + required: false, + } +} + +pub(crate) fn optional_bool(name: &'static str, comment: &'static str) -> FieldSchema { + FieldSchema { + name, + ty: TypeSchema::Option(Box::new(TypeSchema::Bool)), + comment, + required: false, + } +} + +pub(crate) fn optional_u64(name: &'static str, comment: &'static str) -> FieldSchema { + FieldSchema { + name, + ty: TypeSchema::Option(Box::new(TypeSchema::U64)), + comment, + required: false, + } +} + +pub(crate) fn json_output(name: &'static str, comment: &'static str) -> FieldSchema { + FieldSchema { + name, + ty: TypeSchema::Json, + comment, + required: true, + } +} + +fn to_json(outcome: RpcOutcome) -> Result { + outcome.into_cli_compatible_json() +} diff --git a/src/openhuman/channels/providers/web/session.rs b/src/openhuman/channels/providers/web/session.rs new file mode 100644 index 0000000000..95fbbb7654 --- /dev/null +++ b/src/openhuman/channels/providers/web/session.rs @@ -0,0 +1,212 @@ +use serde_json::json; +use std::collections::HashSet; + +use crate::openhuman::agent::profiles::{AgentProfile, DEFAULT_PROFILE_ID}; +use crate::openhuman::agent::Agent; +use crate::openhuman::config::Config; + +use super::types::SessionCacheFingerprint; + +pub(super) fn autonomy_signature(config: &Config) -> String { + serde_json::to_string(&config.autonomy).unwrap_or_default() +} + +pub(super) fn pick_target_agent_id(_config: &Config, profile: &AgentProfile) -> String { + if profile.id == DEFAULT_PROFILE_ID { + "orchestrator".to_string() + } else { + profile.agent_id.clone() + } +} + +pub(crate) fn normalize_model_override(model_override: Option) -> Option { + model_override + .map(|model| model.trim().to_string()) + .filter(|model| !model.is_empty()) +} + +pub(crate) fn provider_role_for_model_override(model_override: Option<&str>) -> &'static str { + match model_override.map(str::trim) { + Some("hint:agentic") | Some("agentic-v1") => "agentic", + Some("hint:coding") | Some("coding-v1") => "coding", + Some("hint:summarization") | Some("summarization-v1") => "summarization", + Some("hint:reasoning") => "reasoning", + _ => "chat", + } +} + +pub(super) fn build_session_agent( + config: &Config, + client_id: &str, + thread_id: &str, + target_agent_id: &str, + profile: &AgentProfile, + model_override: Option, + temperature: Option, + locale: Option<&str>, +) -> Result { + let mut effective = config.clone(); + if let Some(model) = model_override { + effective.default_model = Some(model); + } + let provider_role = provider_role_for_model_override(effective.default_model.as_deref()); + if let Some(temp) = temperature { + effective.default_temperature = temp; + } + + log::info!( + "[web-channel] routing chat turn to '{}' via profile '{}' provider_role='{}' (client_id={}, thread_id={})", + target_agent_id, + profile.id, + provider_role, + client_id, + thread_id + ); + + let reflection_chunks = load_reflection_chunks_for_thread(&effective.workspace_dir, thread_id); + + if let Some(chunks) = reflection_chunks + .as_ref() + .filter(|chunks| !chunks.is_empty()) + { + log::info!( + "[web-channel] thread={} spawned from reflection — injecting {} memory chunks into system prompt", + thread_id, + chunks.len() + ); + } + + let locale_directive = locale.and_then(locale_reply_directive); + let composed_suffix = compose_system_prompt_suffix( + locale_directive.as_deref(), + profile.system_prompt_suffix.as_deref(), + ); + if let Some(s) = locale_directive.as_deref() { + log::info!( + "[web-channel] injecting locale directive client={} thread={} locale={} directive={:?}", + client_id, + thread_id, + locale.unwrap_or(""), + s + ); + } + + let agent_result = Agent::from_config_for_agent_with_profile( + &effective, + target_agent_id, + reflection_chunks, + composed_suffix, + ); + + agent_result + .map(|mut agent| { + if let Some(allowed_tools) = profile + .allowed_tools + .as_ref() + .filter(|tools| !tools.is_empty()) + { + agent.set_visible_tool_names( + allowed_tools + .iter() + .map(|tool| tool.trim().to_string()) + .filter(|tool| !tool.is_empty()) + .collect::>(), + ); + } + agent.set_event_context( + json!({"client_id": client_id, "thread_id": thread_id}).to_string(), + "web_channel", + ); + let short_thread = if thread_id.len() > 12 { + &thread_id[..12] + } else { + thread_id + }; + agent.set_agent_definition_name(format!("{target_agent_id}_{short_thread}")); + agent + }) + .map_err(|e| e.to_string()) +} + +fn load_reflection_chunks_for_thread( + workspace_dir: &std::path::Path, + thread_id: &str, +) -> Option> { + let messages = crate::openhuman::memory_conversations::get_messages( + workspace_dir.to_path_buf(), + thread_id, + ) + .ok()?; + let first = messages.first()?; + let origin = first + .extra_metadata + .get("origin") + .and_then(|v| v.as_str())?; + if origin != "subconscious_reflection" { + return None; + } + let reflection_id = first + .extra_metadata + .get("reflection_id") + .and_then(|v| v.as_str())? + .to_string(); + let reflection = + crate::openhuman::subconscious::store::with_connection(workspace_dir, |conn| { + crate::openhuman::subconscious::reflection_store::get_reflection(conn, &reflection_id) + }) + .ok() + .flatten()?; + Some(reflection.source_chunks) +} + +pub(crate) fn locale_reply_directive(locale: &str) -> Option { + let language = match locale.trim() { + "ar" => "Arabic", + "bn" => "Bengali", + "es" => "Spanish", + "fr" => "French", + "hi" => "Hindi", + "id" => "Indonesian", + "it" => "Italian", + "pt" => "Portuguese", + "ru" => "Russian", + "zh-CN" | "zh" => "Simplified Chinese", + _ => return None, + }; + Some(format!( + "User language: the user's interface is set to {language}. \ + Respond in {language} unless the user explicitly asks for a different language. \ + Keep proper nouns, code, and command names untranslated." + )) +} + +pub(crate) fn compose_system_prompt_suffix( + locale_directive: Option<&str>, + profile_suffix: Option<&str>, +) -> Option { + match (locale_directive, profile_suffix) { + (None, None) => None, + (Some(d), None) => Some(d.to_string()), + (None, Some(p)) => Some(p.to_string()), + (Some(d), Some(p)) => Some(format!("{d}\n\n{p}")), + } +} + +pub(super) fn build_session_fingerprint( + config: &Config, + model_override: Option, + temperature: Option, + target_agent_id: String, + provider_role: &str, +) -> SessionCacheFingerprint { + SessionCacheFingerprint { + model_override, + temperature, + provider_binding: crate::openhuman::inference::provider::provider_for_role( + provider_role, + config, + ), + target_agent_id, + autonomy_signature: autonomy_signature(config), + } +} diff --git a/src/openhuman/channels/providers/web/types.rs b/src/openhuman/channels/providers/web/types.rs new file mode 100644 index 0000000000..c708c17807 --- /dev/null +++ b/src/openhuman/channels/providers/web/types.rs @@ -0,0 +1,93 @@ +use serde::Deserialize; + +use crate::openhuman::agent::Agent; + +/// All inputs that the cached `SessionEntry`'s `Agent` was built from, +/// captured at build time. The cache-hit predicate is a single +/// `entry.fingerprint == current_fingerprint` comparison — pulling the +/// fields into a named struct (instead of inlining four `&&`s) makes +/// the predicate testable in isolation and makes "what invalidates the +/// cache?" answerable in one place. +/// +/// Adding a new dimension that should force a rebuild = add a field +/// here and populate it both at insert time and at the call-site +/// fingerprint construction. +#[derive(PartialEq, Debug, Clone)] +pub(crate) struct SessionCacheFingerprint { + pub(super) model_override: Option, + pub(super) temperature: Option, + pub(super) target_agent_id: String, + pub(super) provider_binding: String, + pub(super) autonomy_signature: String, +} + +pub(super) struct SessionEntry { + pub(super) agent: Agent, + pub(super) fingerprint: SessionCacheFingerprint, +} + +#[derive(Debug)] +pub(super) struct InFlightEntry { + pub(super) request_id: String, + pub(super) handle: tokio::task::JoinHandle<()>, + pub(super) run_queue: std::sync::Arc, +} + +#[derive(Debug, Clone)] +pub(super) struct WebChatTaskResult { + pub(super) full_response: String, + pub(super) citations: Vec, +} + +/// Per-request metadata carried alongside a chat send. Currently used by the +/// PTT flow (Task 4 wires it to `voice::reply_speech`); other voice surfaces +/// can populate it the same way. +#[derive(Debug, Default, Clone)] +pub struct ChatRequestMetadata { + pub speak_reply: Option, + pub source: Option, + pub session_id: Option, +} + +#[derive(Debug, Deserialize)] +pub(crate) struct WebChatParams { + pub(super) client_id: String, + pub(super) thread_id: String, + pub(super) message: String, + pub(super) model_override: Option, + pub(super) temperature: Option, + pub(super) profile_id: Option, + /// BCP-47 locale of the frontend UI (e.g. `ar`, `zh-CN`). When set + /// and not English, the system prompt is augmented to ask the + /// agent to reply in that language. `None` keeps the agent's + /// default language (English) so existing integrations don't + /// silently change behaviour. + pub(super) locale: Option, + /// When `true`, the agent's final reply should be spoken via TTS + /// (for PTT and similar background voice flows). Accepted and + /// stored here; wired to TTS in Task 4. + #[serde(default)] + pub(super) speak_reply: Option, + /// Origin of the message: `"ptt"` | `"dictation"` | `"type"` | other. + /// Used for analytics and downstream metadata. + #[serde(default)] + pub(super) source: Option, + /// Optional caller-provided correlation id (PTT session id). + #[serde(default)] + pub(super) session_id: Option, + /// Queue mode for concurrent messages: `interrupt` (default), `steer`, + /// `followup`, or `collect`. + #[serde(default)] + pub(super) queue_mode: Option, +} + +#[derive(Debug, Deserialize)] +pub(super) struct WebQueueParams { + pub(super) thread_id: String, +} + +#[derive(Debug, Deserialize)] +pub(super) struct WebCancelParams { + pub(super) client_id: String, + pub(super) thread_id: String, +} diff --git a/src/openhuman/channels/runtime/dispatch/helpers.rs b/src/openhuman/channels/runtime/dispatch/helpers.rs new file mode 100644 index 0000000000..4c3b3a1525 --- /dev/null +++ b/src/openhuman/channels/runtime/dispatch/helpers.rs @@ -0,0 +1,207 @@ +//! Small stateless helpers used by the channel runtime dispatch path. +//! +//! Contains: +//! * [`build_channel_context_block`] — per-turn context injected for non-web +//! channels. +//! * [`select_acknowledgment_reaction`] — deterministic emoji picker. +//! * [`log_worker_join_result`] / [`spawn_scoped_typing_task`] — worker +//! lifecycle utilities. +//! * Private [`contains_any`] / [`starts_with_any`] predicates. + +use crate::openhuman::channels::context::CHANNEL_TYPING_REFRESH_INTERVAL_SECS; +use crate::openhuman::channels::traits; +use crate::openhuman::channels::Channel; +use std::sync::Arc; +use std::time::Duration; +use tokio_util::sync::CancellationToken; + +/// Maximum characters shown in the debug reply println. Large enough to not truncate +/// real responses while keeping terminal output readable. +pub(super) const REPLY_LOG_TRUNCATE_CHARS: usize = 200; + +/// Returns `true` if `s` contains any of the given substrings. +#[inline] +pub(super) fn contains_any(s: &str, words: &[&str]) -> bool { + words.iter().any(|w| s.contains(w)) +} + +/// Returns `true` if `s` starts with any of the given prefixes. +#[inline] +pub(super) fn starts_with_any(s: &str, prefixes: &[&str]) -> bool { + prefixes.iter().any(|p| s.starts_with(p)) +} + +/// Build the per-turn `[Channel context]` block prepended to the user +/// message for non-web inbound channels (e.g. Telegram, Discord, Slack). +/// +/// Surfaces the active channel and reply target so the model knows +/// where it is talking and can route any tool side-effects (notably +/// `cron_add`) back to the same chat instead of defaulting to the +/// in-app web stream. See issue #928. +/// +/// Returns an empty string for web/cli turns (the desktop UI is the +/// default delivery surface, no hint needed). +pub(super) fn build_channel_context_block(msg: &traits::ChannelMessage) -> String { + let channel = msg.channel.trim(); + if channel.is_empty() + || channel.eq_ignore_ascii_case("web") + || channel.eq_ignore_ascii_case("cli") + { + return String::new(); + } + + let reply_target = msg.reply_target.trim(); + if reply_target.is_empty() { + return String::new(); + } + + format!( + "[Channel context]\n\ + You are responding via the \"{channel}\" channel. Reply target: \"{reply_target}\".\n\ + For any cron/scheduled reminder you create with `cron_add`, set `delivery` to \ + `{{ \"mode\": \"announce\", \"channel\": \"{channel}\", \"to\": \"{reply_target}\" }}` \ + so the reminder is delivered back here instead of the in-app web stream. \ + Only fall back to the default proactive delivery if the user explicitly asks for \ + in-app/desktop notification.\n\n" + ) +} + +/// Pick a contextual acknowledgment emoji for an inbound message. +/// +/// Intent categories are checked in priority order. Within each category two +/// emoji options are defined; a cheap deterministic index (based on message +/// length + first char value) selects between them so that similar messages +/// don't always produce the identical reaction. +/// +/// All emojis used here are in Telegram's standard (non-premium) reaction set. +pub(super) fn select_acknowledgment_reaction(content: &str) -> &'static str { + let l = content.to_lowercase(); + + // Deterministic variant (0 or 1) — avoids true randomness while giving variety. + let v = content + .len() + .wrapping_add(content.chars().next().map_or(0, |c| c as usize)) + & 1; + + let opts: &[&str] = if contains_any(&l, &["thank", "thx", "appreciate", "grateful", "cheers"]) { + // Gratitude + &["❤️", "🙏"] + } else if contains_any( + &l, + &[ + "amazing", + "awesome", + "incredible", + "love it", + "congrat", + "!!", + ], + ) { + // Excitement / celebration + &["🔥", "🎉"] + } else if contains_any( + &l, + &[ + "price", "btc", "eth", "crypto", "trade", "pump", "dump", "market", "token", "wallet", + "defi", "nft", "sol", "bnb", + ], + ) { + // Crypto / finance + &["💯", "⚡"] + } else if contains_any( + &l, + &[ + "code", + "function", + "api", + "deploy", + "build", + "debug", + "script", + "git", + "rust", + "python", + "js", + "typescript", + ], + ) { + // Technical / dev + &["👨‍💻", "🤓"] + } else if starts_with_any( + &l, + &[ + "hi", + "hello", + "hey", + "sup", + "good morning", + "good evening", + "good afternoon", + ], + ) || l == "yo" + || l.starts_with("yo ") + { + // Greeting + &["🤗", "😁"] + } else if l.contains('?') + || starts_with_any( + &l, + &[ + "how", + "what", + "why", + "when", + "where", + "who", + "can you", + "could you", + "would you", + "is ", + "are ", + "do you", + "does", + ], + ) + { + // Question / help request + &["🤔", "✍️"] + } else { + // Default — "seen, on it" + &["👀", "✍️"] + }; + + opts[v % opts.len()] +} + +pub(super) fn log_worker_join_result(result: Result<(), tokio::task::JoinError>) { + if let Err(error) = result { + tracing::error!("Channel message worker crashed: {error}"); + } +} + +pub(super) fn spawn_scoped_typing_task( + channel: Arc, + recipient: String, + cancellation_token: CancellationToken, +) -> tokio::task::JoinHandle<()> { + let stop_signal = cancellation_token; + let refresh_interval = Duration::from_secs(CHANNEL_TYPING_REFRESH_INTERVAL_SECS); + let handle = tokio::spawn(async move { + loop { + tokio::select! { + () = stop_signal.cancelled() => break, + _ = tokio::time::sleep(refresh_interval) => { + if let Err(e) = channel.start_typing(&recipient).await { + tracing::debug!("Failed to start typing on {}: {e}", channel.name()); + } + } + } + } + + if let Err(e) = channel.stop_typing(&recipient).await { + tracing::debug!("Failed to stop typing on {}: {e}", channel.name()); + } + }); + + handle +} diff --git a/src/openhuman/channels/runtime/dispatch/mod.rs b/src/openhuman/channels/runtime/dispatch/mod.rs new file mode 100644 index 0000000000..a572000851 --- /dev/null +++ b/src/openhuman/channels/runtime/dispatch/mod.rs @@ -0,0 +1,287 @@ +//! Channel runtime loop and message processing. +//! +//! Sub-modules: +//! * [`helpers`] — small stateless helpers (context block, ACK reaction, typing, workers). +//! * [`routing`] — agent selection and tool-scoping ([`AgentScoping`], +//! [`resolve_target_agent`], [`build_visible_tool_set`]). +//! * [`processor`] — core message pipeline ([`process_channel_message`], +//! [`run_message_dispatch_loop`]) and approval-surface gate. + +mod helpers; +mod processor; +mod routing; + +pub(crate) use processor::{process_channel_message, run_message_dispatch_loop}; + +// `channel_has_approval_surface` stays pub(crate) on processor; re-export so +// the inline test module can reach it via `super::channel_has_approval_surface`. +#[cfg(test)] +use processor::channel_has_approval_surface; + +// Re-export internal helpers accessed by test_support (cfg(any(test, +// debug_assertions))) and the inline #[cfg(test)] modules via `super::*`. +#[cfg(any(test, debug_assertions))] +use helpers::{build_channel_context_block, select_acknowledgment_reaction}; + +#[cfg(test)] +use helpers::{contains_any, starts_with_any}; + +#[cfg(test)] +use routing::{build_visible_tool_set, AgentScoping}; + +#[cfg(any(test, debug_assertions))] +use crate::openhuman::channels::traits; + +#[cfg(any(test, debug_assertions))] +pub mod test_support { + //! Debug-build seams for raw integration coverage of dispatch helpers. + + use super::*; + + pub fn build_channel_context_block_for_test(msg: &traits::ChannelMessage) -> String { + build_channel_context_block(msg) + } + + pub fn select_acknowledgment_reaction_for_test(content: &str) -> &'static str { + select_acknowledgment_reaction(content) + } +} + +#[cfg(test)] +mod scoping_tests { + //! Pure-function unit tests for the agent-scoping helpers added by + //! the #525/#526 fix. These exercise the synchronous logic without + //! touching the real `Config::load_or_init` disk read or the global + //! `AgentDefinitionRegistry`, so they can run in any environment. + //! + //! End-to-end exercise of the dispatch path is covered by the + //! existing `runtime_dispatch::dispatch_routes_through_agent_run_turn_ + //! bus_handler` integration test, which still passes after the new + //! fields landed (the resolver gracefully falls back to + //! `AgentScoping::unscoped()` when no orchestrator is registered in + //! the test environment). + + use super::*; + use crate::openhuman::agent::harness::definition::{ + AgentDefinition, DefinitionSource, ModelSpec, PromptSource, SandboxMode, ToolScope, + }; + use crate::openhuman::tools::traits::{PermissionLevel, Tool, ToolCategory, ToolResult}; + use async_trait::async_trait; + + /// Minimal owned tool stub — just enough for `build_visible_tool_set` + /// to read its `name()`. + struct StubTool { + name: &'static str, + } + + #[async_trait] + impl Tool for StubTool { + fn name(&self) -> &str { + self.name + } + fn description(&self) -> &str { + "stub" + } + fn parameters_schema(&self) -> serde_json::Value { + serde_json::json!({"type": "object"}) + } + fn category(&self) -> ToolCategory { + ToolCategory::System + } + fn permission_level(&self) -> PermissionLevel { + PermissionLevel::None + } + async fn execute(&self, _args: serde_json::Value) -> anyhow::Result { + Ok(ToolResult::success("ok")) + } + } + + fn def_with_scope(scope: ToolScope) -> AgentDefinition { + AgentDefinition { + id: "test_agent".into(), + when_to_use: "test".into(), + display_name: None, + system_prompt: PromptSource::Inline(String::new()), + omit_identity: true, + omit_memory_context: true, + omit_safety_preamble: true, + omit_skills_catalog: true, + omit_profile: true, + omit_memory_md: true, + model: ModelSpec::Inherit, + temperature: 0.4, + tools: scope, + disallowed_tools: vec![], + skill_filter: None, + extra_tools: vec![], + max_iterations: 8, + iteration_policy: Default::default(), + max_result_chars: None, + timeout_secs: None, + sandbox_mode: SandboxMode::None, + background: false, + subagents: vec![], + delegate_name: None, + agent_tier: crate::openhuman::agent::harness::definition::AgentTier::Worker, + source: DefinitionSource::Builtin, + } + } + + /// `ToolScope::Wildcard` must yield `None` — the prompt builder + /// treats `None` as "no filter, every tool visible", which is the + /// correct behaviour for agents like `integrations_agent` that want the + /// full skill-category catalogue. Even when extras are present, a + /// wildcard agent should not start filtering. + #[test] + fn wildcard_scope_yields_none_filter() { + let def = def_with_scope(ToolScope::Wildcard); + let extras: Vec> = vec![Box::new(StubTool { name: "research" })]; + assert!(build_visible_tool_set(&def, &extras).is_none()); + assert!(build_visible_tool_set(&def, &[]).is_none()); + } + + /// `ToolScope::Named` with no extras returns exactly the named set. + /// For agents with a narrow tool scope (e.g. 2 tools in TOML, + /// no delegation, no extras) → 2 entries in the visibility whitelist. + #[test] + fn named_scope_without_extras_returns_named_only() { + let def = def_with_scope(ToolScope::Named(vec![ + "memory_recall".into(), + "ask_user_clarification".into(), + ])); + let set = build_visible_tool_set(&def, &[]).expect("named scope yields Some"); + assert_eq!(set.len(), 2); + assert!(set.contains("memory_recall")); + assert!(set.contains("ask_user_clarification")); + } + + /// `ToolScope::Named` with extras returns the union of the TOML + /// named list and the extras' names. This is the orchestrator's + /// path: direct tools from the TOML + the synthesised delegation + /// tools (`research`, `plan`, `delegate_to_integrations_agent`) + /// → all of them visible to the orchestrator's LLM. The stub + /// names in this test are arbitrary; they exercise the union + /// logic, not the real synthesiser. + #[test] + fn named_scope_with_extras_returns_union() { + let def = def_with_scope(ToolScope::Named(vec![ + "query_memory".into(), + "ask_user_clarification".into(), + "spawn_subagent".into(), + ])); + let extras: Vec> = vec![ + Box::new(StubTool { name: "research" }), + Box::new(StubTool { + name: "delegate_gmail", + }), + Box::new(StubTool { + name: "delegate_github", + }), + ]; + let set = build_visible_tool_set(&def, &extras).expect("named scope yields Some"); + assert_eq!(set.len(), 6); + assert!(set.contains("query_memory")); + assert!(set.contains("ask_user_clarification")); + assert!(set.contains("spawn_subagent")); + assert!(set.contains("research")); + assert!(set.contains("delegate_gmail")); + assert!(set.contains("delegate_github")); + } + + /// Empty `Named` list with extras still yields `Some` containing + /// just the extras — useful for hypothetical agents that only + /// reach the world via delegation, with no direct tools. + #[test] + fn empty_named_with_extras_returns_extras_only() { + let def = def_with_scope(ToolScope::Named(vec![])); + let extras: Vec> = vec![Box::new(StubTool { + name: "delegate_only", + })]; + let set = build_visible_tool_set(&def, &extras).expect("named scope yields Some"); + assert_eq!(set.len(), 1); + assert!(set.contains("delegate_only")); + } + + /// Empty `Named` list with no extras yields an empty `Some(set)` — + /// effectively "no tools visible". The prompt loop's `is_visible` + /// helper treats `Some(empty)` differently from `None`: the former + /// means "filter active, nothing matches" so the LLM gets an empty + /// tool list, while the latter means "no filter at all". + #[test] + fn empty_named_with_no_extras_returns_empty_set() { + let def = def_with_scope(ToolScope::Named(vec![])); + let set = build_visible_tool_set(&def, &[]).expect("named scope yields Some"); + assert!(set.is_empty()); + } + + /// Duplicate names across named + extras are de-duplicated by the + /// HashSet — no double-counting if a workspace override happens to + /// list a delegation tool name in the direct `named` list too. + #[test] + fn duplicate_names_across_named_and_extras_are_deduplicated() { + let def = def_with_scope(ToolScope::Named(vec![ + "research".into(), + "query_memory".into(), + ])); + let extras: Vec> = vec![ + Box::new(StubTool { name: "research" }), // collides with named + Box::new(StubTool { name: "plan" }), + ]; + let set = build_visible_tool_set(&def, &extras).expect("named scope yields Some"); + assert_eq!(set.len(), 3); + assert!(set.contains("research")); + assert!(set.contains("query_memory")); + assert!(set.contains("plan")); + } + + /// `AgentScoping::unscoped` is the safe-fallback constructor used + /// when the registry is uninitialised or the target agent isn't + /// found. All three fields must default to "no scoping applied" + /// so the channel turn runs with the legacy unfiltered behaviour. + #[test] + fn agent_scoping_unscoped_has_no_filter_or_extras() { + let scoping = AgentScoping::unscoped(); + assert!(scoping.target_agent_id.is_none()); + assert!(scoping.visible_tool_names.is_none()); + assert!(scoping.extra_tools.is_empty()); + } +} + +#[cfg(test)] +mod approval_surface_gating_tests { + use super::channel_has_approval_surface; + + // Sub-issue 2 of #3098: this gate is what decides whether the dispatch + // loop sets an `ApprovalChatContext` (→ gate fires for `Prompt`-class + // tools) versus the legacy bypass (→ tool calls silently allowed). + // Pin the matrix so silently broadening to a new channel can't + // accidentally TTL-deny every parked tool call there. + + #[test] + fn telegram_has_approval_surface() { + assert!(channel_has_approval_surface("telegram")); + } + + #[test] + fn other_channels_do_not_yet_have_an_approval_surface() { + for channel in ["discord", "slack", "imessage", "mattermost", "web", "irc"] { + assert!( + !channel_has_approval_surface(channel), + "channel {channel:?} is not (yet) wired to a per-channel approval surface; \ + the dispatch loop must not scope an ApprovalChatContext for it or every \ + Prompt-class tool call will park with nobody to answer and TTL-deny" + ); + } + } + + #[test] + fn unknown_channel_does_not_have_approval_surface() { + assert!(!channel_has_approval_surface("")); + assert!(!channel_has_approval_surface("Telegram")); // case-sensitive on purpose + assert!(!channel_has_approval_surface("telegram-bot")); + } +} + +#[cfg(test)] +#[path = "../dispatch_tests.rs"] +mod tests; diff --git a/src/openhuman/channels/runtime/dispatch.rs b/src/openhuman/channels/runtime/dispatch/processor.rs similarity index 58% rename from src/openhuman/channels/runtime/dispatch.rs rename to src/openhuman/channels/runtime/dispatch/processor.rs index 23a5fe9a0b..a54302cff4 100644 --- a/src/openhuman/channels/runtime/dispatch.rs +++ b/src/openhuman/channels/runtime/dispatch/processor.rs @@ -1,650 +1,41 @@ -//! Channel runtime loop and message processing. +//! Core message processing loop for the channel runtime. +//! +//! Contains: +//! * [`channel_has_approval_surface`] — gate controlling per-channel approval +//! context scoping. +//! * [`try_route_approval_reply`] — intercepts yes/no approval replies before +//! dispatching a fresh agent turn. +//! * [`process_channel_message`] — full per-message pipeline: typing, ACK +//! reaction, history, agent turn, draft updates, reply. +//! * [`run_message_dispatch_loop`] — bounded-concurrency worker loop that feeds +//! messages into [`process_channel_message`]. use crate::core::event_bus::{ publish_global, request_native_global, DomainEvent, NativeRequestError, }; use crate::openhuman::agent::bus::{AgentTurnRequest, AgentTurnResponse, AGENT_RUN_TURN_METHOD}; -use crate::openhuman::agent::harness::definition::{ - AgentDefinition, AgentDefinitionRegistry, ToolScope, -}; use crate::openhuman::agent::progress::AgentProgress; use crate::openhuman::channels::context::{ build_memory_context, compact_sender_history, conversation_history_key, conversation_memory_key, is_context_window_overflow_error, ChannelRuntimeContext, - CHANNEL_TYPING_REFRESH_INTERVAL_SECS, MAX_CHANNEL_HISTORY, }; use crate::openhuman::channels::providers::telegram::TELEGRAM_APPROVAL_CLIENT_ID; use crate::openhuman::channels::routes::{ get_or_create_provider, get_route_selection, handle_runtime_command_if_needed, }; use crate::openhuman::channels::traits; -use crate::openhuman::channels::{Channel, SendMessage}; -use crate::openhuman::composio::fetch_connected_integrations; -use crate::openhuman::config::Config; +use crate::openhuman::channels::SendMessage; use crate::openhuman::inference::provider::{self, ChatMessage}; -use crate::openhuman::tools::{orchestrator_tools, Tool}; use crate::openhuman::util::truncate_with_ellipsis; -use std::collections::HashSet; use std::sync::Arc; use std::time::{Duration, Instant}; use tokio_util::sync::CancellationToken; -/// Maximum characters shown in the debug reply println. Large enough to not truncate -/// real responses while keeping terminal output readable. -const REPLY_LOG_TRUNCATE_CHARS: usize = 200; - -/// Returns `true` if `s` contains any of the given substrings. -#[inline] -fn contains_any(s: &str, words: &[&str]) -> bool { - words.iter().any(|w| s.contains(w)) -} - -/// Returns `true` if `s` starts with any of the given prefixes. -#[inline] -fn starts_with_any(s: &str, prefixes: &[&str]) -> bool { - prefixes.iter().any(|p| s.starts_with(p)) -} - -/// Build the per-turn `[Channel context]` block prepended to the user -/// message for non-web inbound channels (e.g. Telegram, Discord, Slack). -/// -/// Surfaces the active channel and reply target so the model knows -/// where it is talking and can route any tool side-effects (notably -/// `cron_add`) back to the same chat instead of defaulting to the -/// in-app web stream. See issue #928. -/// -/// Returns an empty string for web/cli turns (the desktop UI is the -/// default delivery surface, no hint needed). -fn build_channel_context_block(msg: &traits::ChannelMessage) -> String { - let channel = msg.channel.trim(); - if channel.is_empty() - || channel.eq_ignore_ascii_case("web") - || channel.eq_ignore_ascii_case("cli") - { - return String::new(); - } - - let reply_target = msg.reply_target.trim(); - if reply_target.is_empty() { - return String::new(); - } - - format!( - "[Channel context]\n\ - You are responding via the \"{channel}\" channel. Reply target: \"{reply_target}\".\n\ - For any cron/scheduled reminder you create with `cron_add`, set `delivery` to \ - `{{ \"mode\": \"announce\", \"channel\": \"{channel}\", \"to\": \"{reply_target}\" }}` \ - so the reminder is delivered back here instead of the in-app web stream. \ - Only fall back to the default proactive delivery if the user explicitly asks for \ - in-app/desktop notification.\n\n" - ) -} - -/// Pick a contextual acknowledgment emoji for an inbound message. -/// -/// Intent categories are checked in priority order. Within each category two -/// emoji options are defined; a cheap deterministic index (based on message -/// length + first char value) selects between them so that similar messages -/// don't always produce the identical reaction. -/// -/// All emojis used here are in Telegram's standard (non-premium) reaction set. -fn select_acknowledgment_reaction(content: &str) -> &'static str { - let l = content.to_lowercase(); - - // Deterministic variant (0 or 1) — avoids true randomness while giving variety. - let v = content - .len() - .wrapping_add(content.chars().next().map_or(0, |c| c as usize)) - & 1; - - let opts: &[&str] = if contains_any(&l, &["thank", "thx", "appreciate", "grateful", "cheers"]) { - // Gratitude - &["❤️", "🙏"] - } else if contains_any( - &l, - &[ - "amazing", - "awesome", - "incredible", - "love it", - "congrat", - "!!", - ], - ) { - // Excitement / celebration - &["🔥", "🎉"] - } else if contains_any( - &l, - &[ - "price", "btc", "eth", "crypto", "trade", "pump", "dump", "market", "token", "wallet", - "defi", "nft", "sol", "bnb", - ], - ) { - // Crypto / finance - &["💯", "⚡"] - } else if contains_any( - &l, - &[ - "code", - "function", - "api", - "deploy", - "build", - "debug", - "script", - "git", - "rust", - "python", - "js", - "typescript", - ], - ) { - // Technical / dev - &["👨‍💻", "🤓"] - } else if starts_with_any( - &l, - &[ - "hi", - "hello", - "hey", - "sup", - "good morning", - "good evening", - "good afternoon", - ], - ) || l == "yo" - || l.starts_with("yo ") - { - // Greeting - &["🤗", "😁"] - } else if l.contains('?') - || starts_with_any( - &l, - &[ - "how", - "what", - "why", - "when", - "where", - "who", - "can you", - "could you", - "would you", - "is ", - "are ", - "do you", - "does", - ], - ) - { - // Question / help request - &["🤔", "✍️"] - } else { - // Default — "seen, on it" - &["👀", "✍️"] - }; - - opts[v % opts.len()] -} - -fn log_worker_join_result(result: Result<(), tokio::task::JoinError>) { - if let Err(error) = result { - tracing::error!("Channel message worker crashed: {error}"); - } -} - -fn spawn_scoped_typing_task( - channel: Arc, - recipient: String, - cancellation_token: CancellationToken, -) -> tokio::task::JoinHandle<()> { - let stop_signal = cancellation_token; - let refresh_interval = Duration::from_secs(CHANNEL_TYPING_REFRESH_INTERVAL_SECS); - let handle = tokio::spawn(async move { - loop { - tokio::select! { - () = stop_signal.cancelled() => break, - _ = tokio::time::sleep(refresh_interval) => { - if let Err(e) = channel.start_typing(&recipient).await { - tracing::debug!("Failed to start typing on {}: {e}", channel.name()); - } - } - } - } - - if let Err(e) = channel.stop_typing(&recipient).await { - tracing::debug!("Failed to stop typing on {}: {e}", channel.name()); - } - }); - - handle -} - -/// Per-turn scoping fields derived from the active agent definition. -/// -/// Carries the three new fields that get spliced into [`AgentTurnRequest`] -/// in [`process_channel_message`]. Constructed by [`resolve_target_agent`] -/// after reading `config.onboarding_completed`, looking up the matching -/// definition in [`AgentDefinitionRegistry`], and synthesising any -/// per-turn delegation tools the agent needs. -struct AgentScoping { - target_agent_id: Option, - visible_tool_names: Option>, - extra_tools: Vec>, -} - -impl AgentScoping { - /// Empty scoping — preserves the legacy "every tool in the global - /// registry is visible" behaviour. Returned when the registry isn't - /// initialised yet (early startup) or when the target agent - /// definition isn't found, so the channel layer never crashes the - /// runtime over a routing miss. - fn unscoped() -> Self { - Self { - target_agent_id: None, - visible_tool_names: None, - extra_tools: Vec::new(), - } - } -} - -/// Decide which agent should run for this channel turn and build the -/// matching tool-scoping payload. -/// -/// All channel turns route directly to the `orchestrator` agent. The -/// welcome agent has been removed; the Joyride walkthrough in the -/// frontend handles onboarding UI instead. -/// -/// On any failure path (missing registry, missing definition, missing -/// orchestrator delegation targets) the function logs and returns -/// [`AgentScoping::unscoped`], which lets the turn run with the legacy -/// unfiltered behaviour rather than failing the whole message. -async fn resolve_target_agent(channel: &str) -> AgentScoping { - let config = match Config::load_or_init().await { - Ok(c) => c, - Err(err) => { - tracing::warn!( - channel = %channel, - error = %err, - "[dispatch::routing] failed to load config — falling back to unscoped turn" - ); - return AgentScoping::unscoped(); - } - }; - - let target_id = "orchestrator"; - - tracing::info!( - channel = %channel, - target_agent = target_id, - ui_onboarding_completed = config.onboarding_completed, - "[dispatch::routing] selected target agent" - ); - - let registry = match AgentDefinitionRegistry::global() { - Some(reg) => reg, - None => { - tracing::warn!( - channel = %channel, - target_agent = target_id, - "[dispatch::routing] AgentDefinitionRegistry not initialised — falling back to unscoped turn" - ); - return AgentScoping::unscoped(); - } - }; - - let definition = match registry.get(target_id) { - Some(def) => def, - None => { - tracing::warn!( - channel = %channel, - target_agent = target_id, - "[dispatch::routing] target agent not in registry — falling back to unscoped turn" - ); - return AgentScoping::unscoped(); - } - }; - - // Synthesise per-turn delegation tools when the target agent has a - // `subagents = [...]` field. Today only the orchestrator does, but - // the helper is agent-agnostic so future agents that delegate - // (e.g. a custom workspace-override planner that subdivides work) - // pick this up for free. - // - // Wrap the Composio fetch in a 3-second timeout so a slow/unresponsive - // Composio API can never block turn dispatch indefinitely. - const COMPOSIO_FETCH_TIMEOUT_SECS: u64 = 3; - let extra_tools = if !definition.subagents.is_empty() { - let connected = match tokio::time::timeout( - Duration::from_secs(COMPOSIO_FETCH_TIMEOUT_SECS), - fetch_connected_integrations(&config), - ) - .await - { - Ok(list) => list, - Err(_) => { - tracing::warn!( - channel = %channel, - target_agent = target_id, - "[dispatch::routing] Composio fetch timed out after {}s — proceeding without connected integrations", - COMPOSIO_FETCH_TIMEOUT_SECS - ); - Vec::new() - } - }; - tracing::debug!( - channel = %channel, - target_agent = target_id, - connected_integration_count = connected.len(), - "[dispatch::routing] fetched connected integrations for delegation expansion" - ); - orchestrator_tools::collect_orchestrator_tools(definition, registry, &connected) - } else { - Vec::new() - }; - - let visible_tool_names = build_visible_tool_set(definition, &extra_tools); - - tracing::debug!( - channel = %channel, - target_agent = target_id, - named_tool_count = match &definition.tools { - ToolScope::Named(names) => names.len(), - ToolScope::Wildcard => 0, - }, - extra_tool_count = extra_tools.len(), - visible_tool_count = visible_tool_names.as_ref().map(|s| s.len()).unwrap_or(0), - "[dispatch::routing] assembled tool scoping for turn" - ); - - AgentScoping { - target_agent_id: Some(target_id.to_string()), - visible_tool_names, - extra_tools, - } -} - -/// Build the visible-tool whitelist for an agent. -/// -/// The set is the union of: -/// * every tool name in the agent's `[tools] named = [...]` list -/// (when the scope is [`ToolScope::Named`]); and -/// * every name produced by the per-turn synthesised delegation tools -/// in `extra_tools` (e.g. `research`, `plan`, -/// `delegate_to_integrations_agent`). -/// -/// When the agent's tool scope is [`ToolScope::Wildcard`] **and** there -/// are no `extra_tools`, returns `None` to preserve the legacy -/// "everything visible" semantics — a `Wildcard` agent that delegates -/// nothing should still see the full registry. When `Wildcard` is -/// combined with non-empty extras (an unusual but legal combination), -/// the legacy unfiltered behaviour also wins because the wildcard -/// implicitly covers anything in the registry plus the extras. -fn build_visible_tool_set( - definition: &AgentDefinition, - extra_tools: &[Box], -) -> Option> { - match &definition.tools { - ToolScope::Wildcard => None, - ToolScope::Named(names) => { - let mut set: HashSet = names.iter().cloned().collect(); - for tool in extra_tools { - set.insert(tool.name().to_string()); - } - Some(set) - } - } -} - -#[cfg(test)] -mod scoping_tests { - //! Pure-function unit tests for the agent-scoping helpers added by - //! the #525/#526 fix. These exercise the synchronous logic without - //! touching the real `Config::load_or_init` disk read or the global - //! `AgentDefinitionRegistry`, so they can run in any environment. - //! - //! End-to-end exercise of the dispatch path is covered by the - //! existing `runtime_dispatch::dispatch_routes_through_agent_run_turn_ - //! bus_handler` integration test, which still passes after the new - //! fields landed (the resolver gracefully falls back to - //! `AgentScoping::unscoped()` when no orchestrator is registered in - //! the test environment). - - use super::*; - use crate::openhuman::agent::harness::definition::{ - DefinitionSource, ModelSpec, PromptSource, SandboxMode, - }; - use crate::openhuman::tools::traits::{PermissionLevel, Tool, ToolCategory, ToolResult}; - use async_trait::async_trait; - - /// Minimal owned tool stub — just enough for `build_visible_tool_set` - /// to read its `name()`. - struct StubTool { - name: &'static str, - } - - #[async_trait] - impl Tool for StubTool { - fn name(&self) -> &str { - self.name - } - fn description(&self) -> &str { - "stub" - } - fn parameters_schema(&self) -> serde_json::Value { - serde_json::json!({"type": "object"}) - } - fn category(&self) -> ToolCategory { - ToolCategory::System - } - fn permission_level(&self) -> PermissionLevel { - PermissionLevel::None - } - async fn execute(&self, _args: serde_json::Value) -> anyhow::Result { - Ok(ToolResult::success("ok")) - } - } - - fn def_with_scope(scope: ToolScope) -> AgentDefinition { - AgentDefinition { - id: "test_agent".into(), - when_to_use: "test".into(), - display_name: None, - system_prompt: PromptSource::Inline(String::new()), - omit_identity: true, - omit_memory_context: true, - omit_safety_preamble: true, - omit_skills_catalog: true, - omit_profile: true, - omit_memory_md: true, - model: ModelSpec::Inherit, - temperature: 0.4, - tools: scope, - disallowed_tools: vec![], - skill_filter: None, - extra_tools: vec![], - max_iterations: 8, - iteration_policy: Default::default(), - max_result_chars: None, - timeout_secs: None, - sandbox_mode: SandboxMode::None, - background: false, - subagents: vec![], - delegate_name: None, - agent_tier: crate::openhuman::agent::harness::definition::AgentTier::Worker, - source: DefinitionSource::Builtin, - } - } - - /// `ToolScope::Wildcard` must yield `None` — the prompt builder - /// treats `None` as "no filter, every tool visible", which is the - /// correct behaviour for agents like `integrations_agent` that want the - /// full skill-category catalogue. Even when extras are present, a - /// wildcard agent should not start filtering. - #[test] - fn wildcard_scope_yields_none_filter() { - let def = def_with_scope(ToolScope::Wildcard); - let extras: Vec> = vec![Box::new(StubTool { name: "research" })]; - assert!(build_visible_tool_set(&def, &extras).is_none()); - assert!(build_visible_tool_set(&def, &[]).is_none()); - } - - /// `ToolScope::Named` with no extras returns exactly the named set. - /// For agents with a narrow tool scope (e.g. 2 tools in TOML, - /// no delegation, no extras) → 2 entries in the visibility whitelist. - #[test] - fn named_scope_without_extras_returns_named_only() { - let def = def_with_scope(ToolScope::Named(vec![ - "memory_recall".into(), - "ask_user_clarification".into(), - ])); - let set = build_visible_tool_set(&def, &[]).expect("named scope yields Some"); - assert_eq!(set.len(), 2); - assert!(set.contains("memory_recall")); - assert!(set.contains("ask_user_clarification")); - } - - /// `ToolScope::Named` with extras returns the union of the TOML - /// named list and the extras' names. This is the orchestrator's - /// path: direct tools from the TOML + the synthesised delegation - /// tools (`research`, `plan`, `delegate_to_integrations_agent`) - /// → all of them visible to the orchestrator's LLM. The stub - /// names in this test are arbitrary; they exercise the union - /// logic, not the real synthesiser. - #[test] - fn named_scope_with_extras_returns_union() { - let def = def_with_scope(ToolScope::Named(vec![ - "query_memory".into(), - "ask_user_clarification".into(), - "spawn_subagent".into(), - ])); - let extras: Vec> = vec![ - Box::new(StubTool { name: "research" }), - Box::new(StubTool { - name: "delegate_gmail", - }), - Box::new(StubTool { - name: "delegate_github", - }), - ]; - let set = build_visible_tool_set(&def, &extras).expect("named scope yields Some"); - assert_eq!(set.len(), 6); - assert!(set.contains("query_memory")); - assert!(set.contains("ask_user_clarification")); - assert!(set.contains("spawn_subagent")); - assert!(set.contains("research")); - assert!(set.contains("delegate_gmail")); - assert!(set.contains("delegate_github")); - } - - /// Empty `Named` list with extras still yields `Some` containing - /// just the extras — useful for hypothetical agents that only - /// reach the world via delegation, with no direct tools. - #[test] - fn empty_named_with_extras_returns_extras_only() { - let def = def_with_scope(ToolScope::Named(vec![])); - let extras: Vec> = vec![Box::new(StubTool { - name: "delegate_only", - })]; - let set = build_visible_tool_set(&def, &extras).expect("named scope yields Some"); - assert_eq!(set.len(), 1); - assert!(set.contains("delegate_only")); - } - - /// Empty `Named` list with no extras yields an empty `Some(set)` — - /// effectively "no tools visible". The prompt loop's `is_visible` - /// helper treats `Some(empty)` differently from `None`: the former - /// means "filter active, nothing matches" so the LLM gets an empty - /// tool list, while the latter means "no filter at all". - #[test] - fn empty_named_with_no_extras_returns_empty_set() { - let def = def_with_scope(ToolScope::Named(vec![])); - let set = build_visible_tool_set(&def, &[]).expect("named scope yields Some"); - assert!(set.is_empty()); - } - - /// Duplicate names across named + extras are de-duplicated by the - /// HashSet — no double-counting if a workspace override happens to - /// list a delegation tool name in the direct `named` list too. - #[test] - fn duplicate_names_across_named_and_extras_are_deduplicated() { - let def = def_with_scope(ToolScope::Named(vec![ - "research".into(), - "query_memory".into(), - ])); - let extras: Vec> = vec![ - Box::new(StubTool { name: "research" }), // collides with named - Box::new(StubTool { name: "plan" }), - ]; - let set = build_visible_tool_set(&def, &extras).expect("named scope yields Some"); - assert_eq!(set.len(), 3); - assert!(set.contains("research")); - assert!(set.contains("query_memory")); - assert!(set.contains("plan")); - } - - /// `AgentScoping::unscoped` is the safe-fallback constructor used - /// when the registry is uninitialised or the target agent isn't - /// found. All three fields must default to "no scoping applied" - /// so the channel turn runs with the legacy unfiltered behaviour. - #[test] - fn agent_scoping_unscoped_has_no_filter_or_extras() { - let scoping = AgentScoping::unscoped(); - assert!(scoping.target_agent_id.is_none()); - assert!(scoping.visible_tool_names.is_none()); - assert!(scoping.extra_tools.is_empty()); - } -} - -#[cfg(test)] -mod approval_surface_gating_tests { - use super::channel_has_approval_surface; - - // Sub-issue 2 of #3098: this gate is what decides whether the dispatch - // loop sets an `ApprovalChatContext` (→ gate fires for `Prompt`-class - // tools) versus the legacy bypass (→ tool calls silently allowed). - // Pin the matrix so silently broadening to a new channel can't - // accidentally TTL-deny every parked tool call there. - - #[test] - fn telegram_has_approval_surface() { - assert!(channel_has_approval_surface("telegram")); - } - - #[test] - fn other_channels_do_not_yet_have_an_approval_surface() { - for channel in ["discord", "slack", "imessage", "mattermost", "web", "irc"] { - assert!( - !channel_has_approval_surface(channel), - "channel {channel:?} is not (yet) wired to a per-channel approval surface; \ - the dispatch loop must not scope an ApprovalChatContext for it or every \ - Prompt-class tool call will park with nobody to answer and TTL-deny" - ); - } - } - - #[test] - fn unknown_channel_does_not_have_approval_surface() { - assert!(!channel_has_approval_surface("")); - assert!(!channel_has_approval_surface("Telegram")); // case-sensitive on purpose - assert!(!channel_has_approval_surface("telegram-bot")); - } -} - -#[cfg(any(test, debug_assertions))] -pub mod test_support { - //! Debug-build seams for raw integration coverage of dispatch helpers. - - use super::*; - - pub fn build_channel_context_block_for_test(msg: &traits::ChannelMessage) -> String { - build_channel_context_block(msg) - } - - pub fn select_acknowledgment_reaction_for_test(content: &str) -> &'static str { - select_acknowledgment_reaction(content) - } -} +use super::helpers::{ + build_channel_context_block, log_worker_join_result, select_acknowledgment_reaction, + spawn_scoped_typing_task, REPLY_LOG_TRUNCATE_CHARS, +}; +use super::routing::resolve_target_agent; /// Whether a channel currently has a registered approval surface — i.e. /// a subscriber that turns `ApprovalRequested` events into chat messages @@ -1135,7 +526,7 @@ pub(crate) async fn process_channel_message( turns.push(ChatMessage::user(&enriched_message)); turns.push(ChatMessage::assistant(&response)); // Trim to MAX_CHANNEL_HISTORY (keep recent turns) - while turns.len() > MAX_CHANNEL_HISTORY { + while turns.len() > crate::openhuman::channels::context::MAX_CHANNEL_HISTORY { turns.remove(0); } } @@ -1380,7 +771,3 @@ pub(crate) async fn run_message_dispatch_loop( log_worker_join_result(result); } } - -#[cfg(test)] -#[path = "dispatch_tests.rs"] -mod tests; diff --git a/src/openhuman/channels/runtime/dispatch/routing.rs b/src/openhuman/channels/runtime/dispatch/routing.rs new file mode 100644 index 0000000000..8c8d1ae102 --- /dev/null +++ b/src/openhuman/channels/runtime/dispatch/routing.rs @@ -0,0 +1,192 @@ +//! Agent routing and tool-scoping for channel dispatch turns. +//! +//! Contains: +//! * [`AgentScoping`] — per-turn scoping fields derived from the active agent. +//! * [`resolve_target_agent`] — reads config and registry to pick the active +//! agent and synthesise its delegation tool surface. +//! * [`build_visible_tool_set`] — union of named tools + extra (delegation) tools. + +use crate::openhuman::agent::harness::definition::{ + AgentDefinition, AgentDefinitionRegistry, ToolScope, +}; +use crate::openhuman::composio::fetch_connected_integrations; +use crate::openhuman::config::Config; +use crate::openhuman::tools::{orchestrator_tools, Tool}; +use std::collections::HashSet; +use std::time::Duration; + +/// Per-turn scoping fields derived from the active agent definition. +/// +/// Carries the three new fields that get spliced into [`AgentTurnRequest`] +/// in [`super::processor::process_channel_message`]. Constructed by +/// [`resolve_target_agent`] after reading `config.onboarding_completed`, +/// looking up the matching definition in [`AgentDefinitionRegistry`], and +/// synthesising any per-turn delegation tools the agent needs. +pub(super) struct AgentScoping { + pub(super) target_agent_id: Option, + pub(super) visible_tool_names: Option>, + pub(super) extra_tools: Vec>, +} + +impl AgentScoping { + /// Empty scoping — preserves the legacy "every tool in the global + /// registry is visible" behaviour. Returned when the registry isn't + /// initialised yet (early startup) or when the target agent + /// definition isn't found, so the channel layer never crashes the + /// runtime over a routing miss. + pub(super) fn unscoped() -> Self { + Self { + target_agent_id: None, + visible_tool_names: None, + extra_tools: Vec::new(), + } + } +} + +/// Decide which agent should run for this channel turn and build the +/// matching tool-scoping payload. +/// +/// All channel turns route directly to the `orchestrator` agent. The +/// welcome agent has been removed; the Joyride walkthrough in the +/// frontend handles onboarding UI instead. +/// +/// On any failure path (missing registry, missing definition, missing +/// orchestrator delegation targets) the function logs and returns +/// [`AgentScoping::unscoped`], which lets the turn run with the legacy +/// unfiltered behaviour rather than failing the whole message. +pub(super) async fn resolve_target_agent(channel: &str) -> AgentScoping { + let config = match Config::load_or_init().await { + Ok(c) => c, + Err(err) => { + tracing::warn!( + channel = %channel, + error = %err, + "[dispatch::routing] failed to load config — falling back to unscoped turn" + ); + return AgentScoping::unscoped(); + } + }; + + let target_id = "orchestrator"; + + tracing::info!( + channel = %channel, + target_agent = target_id, + ui_onboarding_completed = config.onboarding_completed, + "[dispatch::routing] selected target agent" + ); + + let registry = match AgentDefinitionRegistry::global() { + Some(reg) => reg, + None => { + tracing::warn!( + channel = %channel, + target_agent = target_id, + "[dispatch::routing] AgentDefinitionRegistry not initialised — falling back to unscoped turn" + ); + return AgentScoping::unscoped(); + } + }; + + let definition = match registry.get(target_id) { + Some(def) => def, + None => { + tracing::warn!( + channel = %channel, + target_agent = target_id, + "[dispatch::routing] target agent not in registry — falling back to unscoped turn" + ); + return AgentScoping::unscoped(); + } + }; + + // Synthesise per-turn delegation tools when the target agent has a + // `subagents = [...]` field. Today only the orchestrator does, but + // the helper is agent-agnostic so future agents that delegate + // (e.g. a custom workspace-override planner that subdivides work) + // pick this up for free. + // + // Wrap the Composio fetch in a 3-second timeout so a slow/unresponsive + // Composio API can never block turn dispatch indefinitely. + const COMPOSIO_FETCH_TIMEOUT_SECS: u64 = 3; + let extra_tools = if !definition.subagents.is_empty() { + let connected = match tokio::time::timeout( + Duration::from_secs(COMPOSIO_FETCH_TIMEOUT_SECS), + fetch_connected_integrations(&config), + ) + .await + { + Ok(list) => list, + Err(_) => { + tracing::warn!( + channel = %channel, + target_agent = target_id, + "[dispatch::routing] Composio fetch timed out after {}s — proceeding without connected integrations", + COMPOSIO_FETCH_TIMEOUT_SECS + ); + Vec::new() + } + }; + tracing::debug!( + channel = %channel, + target_agent = target_id, + connected_integration_count = connected.len(), + "[dispatch::routing] fetched connected integrations for delegation expansion" + ); + orchestrator_tools::collect_orchestrator_tools(definition, registry, &connected) + } else { + Vec::new() + }; + + let visible_tool_names = build_visible_tool_set(definition, &extra_tools); + + tracing::debug!( + channel = %channel, + target_agent = target_id, + named_tool_count = match &definition.tools { + ToolScope::Named(names) => names.len(), + ToolScope::Wildcard => 0, + }, + extra_tool_count = extra_tools.len(), + visible_tool_count = visible_tool_names.as_ref().map(|s| s.len()).unwrap_or(0), + "[dispatch::routing] assembled tool scoping for turn" + ); + + AgentScoping { + target_agent_id: Some(target_id.to_string()), + visible_tool_names, + extra_tools, + } +} + +/// Build the visible-tool whitelist for an agent. +/// +/// The set is the union of: +/// * every tool name in the agent's `[tools] named = [...]` list +/// (when the scope is [`ToolScope::Named`]); and +/// * every name produced by the per-turn synthesised delegation tools +/// in `extra_tools` (e.g. `research`, `plan`, +/// `delegate_to_integrations_agent`). +/// +/// When the agent's tool scope is [`ToolScope::Wildcard`] **and** there +/// are no `extra_tools`, returns `None` to preserve the legacy +/// "everything visible" semantics — a `Wildcard` agent that delegates +/// nothing should still see the full registry. When `Wildcard` is +/// combined with non-empty extras (an unusual but legal combination), +/// the legacy unfiltered behaviour also wins because the wildcard +/// implicitly covers anything in the registry plus the extras. +pub(super) fn build_visible_tool_set( + definition: &AgentDefinition, + extra_tools: &[Box], +) -> Option> { + match &definition.tools { + ToolScope::Wildcard => None, + ToolScope::Named(names) => { + let mut set: HashSet = names.iter().cloned().collect(); + for tool in extra_tools { + set.insert(tool.name().to_string()); + } + Some(set) + } + } +} diff --git a/src/openhuman/composio/mod.rs b/src/openhuman/composio/mod.rs index 9ca3800dd3..ef7c406cc3 100644 --- a/src/openhuman/composio/mod.rs +++ b/src/openhuman/composio/mod.rs @@ -54,6 +54,7 @@ pub mod tools; pub mod trigger_history; pub mod types; +pub use crate::openhuman::agent::prompts::types::ConnectedIntegration; pub use crate::openhuman::memory_sync::composio::bus::{ register_composio_trigger_subscriber, ComposioConfigChangedSubscriber, ComposioTriggerSubscriber, diff --git a/src/openhuman/composio/ops.rs b/src/openhuman/composio/ops.rs deleted file mode 100644 index cb3ae2d91f..0000000000 --- a/src/openhuman/composio/ops.rs +++ /dev/null @@ -1,1746 +0,0 @@ -//! RPC-facing operations for the Composio domain. -//! -//! Each `composio_*` function wraps a [`ComposioClient`] call, translates -//! errors to strings, and returns an [`RpcOutcome`] so the controller -//! schemas can log a user-visible line. The handlers in [`super::schemas`] -//! call into these. -//! -//! These ops are also callable directly from other domains (e.g. the -//! agent harness) when they need composio data at runtime. - -/// Toolkits that honour the `tags` query param on the backend tool-list endpoint. -/// Expand this list when a new toolkit gains tag support. -const TAG_QUERYABLE_TOOLKITS: &[&str] = &["github"]; - -/// Returns `true` when `tags` should be forwarded to the backend. -/// -/// Tags are forwarded when no toolkit filter is active (`None` / empty slice) -/// or when at least one requested toolkit is in [`TAG_QUERYABLE_TOOLKITS`]. -/// This is `pub(crate)` so `tools.rs` can reuse it without duplicating the list. -pub(crate) fn should_forward_tags(toolkits: Option<&[String]>) -> bool { - match toolkits { - None => true, - Some(kits) => { - kits.is_empty() - || kits.iter().any(|k| { - TAG_QUERYABLE_TOOLKITS - .iter() - .any(|t| k.trim().eq_ignore_ascii_case(t)) - }) - } - } -} - -use crate::openhuman::config::Config; -use crate::openhuman::memory::MemoryClient; -use crate::openhuman::memory_store::chunks::store as memory_tree_store; -use crate::openhuman::memory_store::chunks::types::SourceKind; -use crate::rpc::RpcOutcome; - -/// Result alias used by every `composio_*` op in this module. -/// -/// We deliberately return a plain `String` error instead of -/// `anyhow::Error` — the controller layer in `schemas.rs` forwards -/// these straight into the RPC envelope, and `String` keeps the shape -/// obvious at a glance. -type OpResult = std::result::Result; - -use std::sync::Arc; - -use super::client::{ - build_composio_client, create_composio_client, direct_list_connections, direct_list_tools, - ComposioClient, ComposioClientKind, -}; -use super::providers::{ - agent_ready_toolkits, capability_matrix, get_provider, sync_state::SyncState, ProviderContext, - ProviderUserProfile, SyncOutcome, SyncReason, -}; -use super::types::{ - ComposioActiveTriggersResponse, ComposioAuthorizeResponse, ComposioAvailableTriggersResponse, - ComposioCapabilitiesResponse, ComposioConnectionsResponse, ComposioCreateTriggerResponse, - ComposioDeleteResponse, ComposioDisableTriggerResponse, ComposioEnableTriggerResponse, - ComposioExecuteResponse, ComposioGithubReposResponse, ComposioToolkitsResponse, - ComposioToolsResponse, ComposioTriggerHistoryResult, -}; - -/// Resolve a backend-mode [`ComposioClient`] from the root config, or -/// return an error string that the caller can surface over RPC. -/// -/// Used by the **backend-only** Composio ops — `delete_connection`, -/// `list_github_repos`, the `triggers/*` family, and the provider -/// dispatch paths (`get_user_profile`, `refresh_all_identities`, -/// `sync`). These rely on the backend's bookkeeping -/// (HMAC-verified trigger fan-out, per-user provider registry, GitHub -/// repo enumeration) that the direct-mode v3 surface does not provide, -/// so they intentionally remain backend-only for now. The "mode-aware" -/// `composio_authorize` / `composio_execute` / `composio_list_*` -/// handlers go through [`create_composio_client`] instead so the -/// `config.composio.mode` toggle is honoured per call (#1710). -fn resolve_client(config: &Config) -> OpResult { - build_composio_client(config).ok_or_else(|| { - "composio unavailable: no backend session token. Sign in first \ - (auth_store_session)." - .to_string() - }) -} - -/// True when the user has selected Composio **direct** mode but has not yet -/// configured an API key (neither in the keychain nor `config.toml`). -/// -/// This is a valid, user-controlled *setup* state — the user just flipped to -/// direct mode and is about to paste their key — NOT an operation failure. -/// Callers short-circuit to an empty result instead of letting the -/// mode-aware factory bail with "composio direct mode selected but no api key -/// is configured", which the desktop UI's 5 s poll would otherwise funnel to -/// Sentry on every tick (TAURI-RUST-R4). -/// -/// Key presence MUST mirror the factory's own resolution in -/// [`create_composio_client`] (`client.rs`): a key counts if it is in the -/// keychain (`credentials::get_composio_api_key`) **or** in `config.toml` -/// (`config.composio.api_key`). Checking only the keychain would wrongly -/// short-circuit to an empty list for a user who configured their key via -/// `config.toml`, hiding their real connections. -fn direct_mode_without_key(config: &Config) -> OpResult { - if config.composio.mode.trim() != crate::openhuman::config::schema::COMPOSIO_MODE_DIRECT { - return Ok(false); - } - let has_key = crate::openhuman::credentials::get_composio_api_key(config) - .map_err(|e| format!("[composio] get_composio_api_key failed: {e}"))? - .or_else(|| { - config - .composio - .api_key - .as_ref() - .map(|k| k.trim().to_string()) - .filter(|k| !k.is_empty()) - }) - .is_some(); - Ok(!has_key) -} - -/// Defense-in-depth Sentry funnel for composio op-layer errors. -/// -/// The shared [`crate::openhuman::integrations::IntegrationClient`] -/// (which fronts every `client.list_*` / `client.execute_tool` / -/// `client.authorize` call) already reports its own failures under -/// `domain="integrations"` with `failure="non_2xx" | "transport"` tags, -/// and the Sentry `before_send` filter (`is_transient_integrations_failure`) -/// drops the transient subset. This helper re-classifies the same -/// anyhow chain at the **op layer** under `domain="composio"` so: -/// -/// 1. Future call sites that bypass `IntegrationClient` (the existing -/// `raw_delete` path, or any new bespoke HTTP client added under -/// `composio/`) still funnel through the same classifier. -/// 2. Op-layer-specific failures — provider sync errors, history archive -/// errors, profile-resolution errors — get tagged consistently rather -/// than reaching Sentry as bare `Err(String)` returned via RPC. -/// -/// The classifier (`expected_error_kind`) is purely message-substring -/// based — `Backend returned 502 …`, `error sending request for url …`, -/// `operation timed out` etc. all resolve to a warn/info breadcrumb -/// without a Sentry event. Genuine bugs (404s, 500s with bug-shape -/// payloads, envelope errors) still surface. -/// -/// `failure="non_2xx"` is the default tag because that is the dominant -/// shape in the leak set (OPENHUMAN-TAURI-35 / -2H: backend 502 from -/// `Backend returned …`). When the message contains a recognized -/// transport phrase (`operation timed out`, `connection refused`, `tls -/// handshake eof`, …), we tag `failure="transport"` instead so the -/// `before_send` filter's transport-phrase branch fires — and keep the -/// status tag absent (transport failures don't carry a status). -pub(crate) fn report_composio_op_error(operation: &str, err: &E) { - // `{err:#}` renders the full anyhow chain when applicable; for plain - // `String` / `&str` errors it falls back to the Display impl. - let rendered = format!("{err:#}"); - let failure_tag = classify_composio_failure_tag(rendered.as_str()); - if failure_tag == "non_2xx" { - if let Some(status) = extract_backend_returned_status(&rendered) { - crate::core::observability::report_error_or_expected( - rendered.as_str(), - "composio", - operation, - &[("failure", failure_tag), ("status", status.as_str())], - ); - return; - } - } - crate::core::observability::report_error_or_expected( - rendered.as_str(), - "composio", - operation, - &[("failure", failure_tag)], - ); -} - -/// Pick the `failure` tag for a composio op-layer error message based on -/// shape inspection. Transport-level reqwest chains (timeout, connection -/// reset, TLS handshake EOF, "error sending request for url") tag as -/// `"transport"` so the `before_send` filter's transport-phrase branch -/// fires; everything else (the dominant `Backend returned …` -/// shape from the integrations layer) tags as `"non_2xx"`. -/// -/// Extracted so tests can pin the routing without a Sentry test client. -fn classify_composio_failure_tag(rendered: &str) -> &'static str { - let lower = rendered.to_ascii_lowercase(); - // `rendered`: pass to callee-normalised checks - // (`contains_transient_transport_phrase` handles casing internally). - // `lower`: pre-lowered copy reused for literal substring matches that - // intentionally do their own case-folding here. - // A future contributor adding a new condition should extend the side - // that matches the new check's normaliser contract. - let is_transport = crate::core::observability::contains_transient_transport_phrase(rendered) - || lower.contains("error sending request"); - if is_transport { - "transport" - } else { - "non_2xx" - } -} - -/// Extract the HTTP status code from a `Backend returned ...` -/// rendering produced by the integrations layer. Returns `None` when no -/// numeric status follows the anchor phrase (e.g. envelope-only errors). -/// -/// Surfacing the status as a Sentry tag gives the `before_send` filter's -/// transient-status branch (`is_transient_integrations_failure`) a precise -/// signal to drop the dominant 5xx leak shape (OPENHUMAN-TAURI-35 / -2H) -/// without also dropping genuine 4xx bug-shape failures that share the -/// `failure="non_2xx"` tag. -fn extract_backend_returned_status(rendered: &str) -> Option { - let lower = rendered.to_ascii_lowercase(); - let rest = lower.split_once("backend returned ")?.1; - let digits: String = rest.chars().take_while(|c| c.is_ascii_digit()).collect(); - (!digits.is_empty()).then_some(digits) -} - -// ── Toolkits ──────────────────────────────────────────────────────── - -pub async fn composio_list_toolkits( - config: &Config, -) -> OpResult> { - tracing::debug!("[composio] rpc list_toolkits"); - // Route through the mode-aware factory so direct-mode users do NOT - // silently fall through to the backend tinyhumans tenant's allowlist. - // [composio-direct] In direct mode we don't expose a toolkit - // allowlist at all — the user's personal Composio account governs - // what's available. Returning an empty list signals "no curated - // allowlist" to the UI and prompt-builder, which matches the - // sovereign expectation: Direct mode users manage their toolkits - // through app.composio.dev directly. - let kind = - create_composio_client(config).map_err(|e| format!("[composio] list_toolkits: {e}"))?; - match kind { - ComposioClientKind::Backend(client) => { - tracing::debug!("[composio] list_toolkits: backend variant"); - let resp = client.list_toolkits().await.map_err(|e| { - report_composio_op_error("list_toolkits", &e); - format!("[composio] list_toolkits failed: {e:#}") - })?; - let count = resp.toolkits.len(); - Ok(RpcOutcome::new( - resp, - vec![format!("composio: {count} toolkit(s) enabled")], - )) - } - ComposioClientKind::Direct(_) => { - tracing::info!( - "[composio-direct] list_toolkits: direct mode active — no \ - server-side allowlist is enforced; returning empty toolkits \ - list. Users manage available toolkits via app.composio.dev." - ); - Ok(RpcOutcome::new( - ComposioToolkitsResponse::default(), - vec!["composio: direct mode — no curated allowlist (toolkits \ - managed via app.composio.dev)" - .to_string()], - )) - } - } -} - -pub async fn composio_list_capabilities( - _config: &Config, -) -> OpResult> { - tracing::debug!("[composio] rpc list_capabilities"); - let resp = ComposioCapabilitiesResponse { - capabilities: capability_matrix(), - }; - let count = resp.capabilities.len(); - Ok(RpcOutcome::new( - resp, - vec![format!("composio: {count} capability row(s) listed")], - )) -} - -/// List every toolkit slug that ships an agent-ready curated catalog. -/// -/// Connected toolkits that are NOT in this list can still be -/// authorized via OAuth, but the agent has no curated action surface -/// for them — the UI should label such connections as -/// "preview / agent integration coming soon" so users aren't led into -/// a broken `composio_list_tools` → max-iterations loop. See #2283. -pub async fn composio_list_agent_ready_toolkits( -) -> OpResult> { - tracing::debug!("[composio] rpc list_agent_ready_toolkits"); - let toolkits: Vec = agent_ready_toolkits() - .into_iter() - .map(|s| s.to_string()) - .collect(); - let count = toolkits.len(); - let resp = super::types::ComposioAgentReadyToolkitsResponse { toolkits }; - Ok(RpcOutcome::new( - resp, - vec![format!("composio: {count} agent-ready toolkit(s) listed")], - )) -} - -// ── Connections ───────────────────────────────────────────────────── - -pub async fn composio_list_connections( - config: &Config, -) -> OpResult> { - tracing::debug!("[composio] rpc list_connections"); - // [Sentry TAURI-RUST-R4] Direct mode with no API key yet is a valid, - // user-controlled *setup* state — not an operation failure. The desktop - // UI polls this RPC every 5 s; without this guard the mode-aware factory - // bails ("composio direct mode selected but no api key is configured") on - // every tick and the error funnels to Sentry until the user pastes a key - // (~3.2 k events, single user, release 0.57.5). Mirror `periodic.rs`'s - // graceful skip and return the truthful empty list (no key → no tenant → - // no connections). The Settings → Composio panel drives the "enter your - // key" prompt off the separate `api_key_set` status, so the user is still - // told what to do. We return BEFORE `create_composio_client` and - // `sync_cache_with_connections`, so no error is constructed and the - // integrations cache is left untouched. - if direct_mode_without_key(config)? { - tracing::debug!( - "[composio] list_connections: direct mode selected, no api key configured yet \ - — returning empty connection list (valid setup state, not an error)" - ); - return Ok(RpcOutcome::new( - ComposioConnectionsResponse { - connections: Vec::new(), - }, - vec!["composio: direct mode — no api key configured yet, 0 connection(s)".to_string()], - )); - } - // Route through the mode-aware factory so direct-mode users do NOT - // accidentally see the tinyhumans-tenant connections from the - // backend-proxied path. Mixing the two tenants is the bug behind the - // user-reported "I switched to Direct and my old integrations are - // still showing" symptom (#1710). - let kind = - create_composio_client(config).map_err(|e| format!("[composio] list_connections: {e}"))?; - let client = match kind { - ComposioClientKind::Backend(client) => { - tracing::debug!("[composio] list_connections: backend variant"); - client - } - ComposioClientKind::Direct(direct) => { - // [composio-direct] Translate the user's Composio v3 - // `/connected_accounts` view into the same - // `ComposioConnectionsResponse` shape the backend-proxied - // path emits. This is what unlocks end-to-end OAuth in - // direct mode: once the user completes the Composio-hosted - // flow, the UI's 5 s `composio_list_connections` poll picks - // up the new ACTIVE row from THEIR tenant (not the - // tinyhumans tenant) and flips the Settings badge to - // Connected (#1710). - tracing::info!( - "[composio-direct] list_connections: fetching v3 \ - /connected_accounts for the user's personal Composio tenant" - ); - let resp = direct_list_connections(&direct).await.map_err(|e| { - // [#1166 / Sentry TAURI-RUST-X9] Restore symmetric error - // routing for the direct-mode branch. Without this hook the - // direct-mode 401 ("Invalid API key …") wire shape bypassed - // `report_error_or_expected` and leaked ~15.7k events in ~22h - // — same UI 5 s poll + `periodic.rs` tick that the - // backend branch (line ~266) was already classifying. - // - // Render WITH the `[composio-direct]` anchor BEFORE - // reporting so the classifier arm in - // `is_provider_user_state_message` (which gates on that - // prefix) actually fires. - let rendered = format!("[composio-direct] list_connections failed: {e:#}"); - report_composio_op_error("list_connections", &rendered); - rendered - })?; - let active = resp.connections.iter().filter(|c| c.is_active()).count(); - let total = resp.connections.len(); - // Reconcile the integrations cache against this fresh live - // snapshot from the user's own tenant — same defensive - // behaviour as the backend path, so the chat runtime's - // connected-toolkits view stays in sync within one poll - // interval. - sync_cache_with_connections(&resp.connections); - let resp = enrich_connections_with_identity(resp); - return Ok(RpcOutcome::new( - resp, - vec![format!( - "composio: direct mode — {total} connection(s) listed ({active} active)" - )], - )); - } - }; - let resp = client.list_connections().await.map_err(|e| { - report_composio_op_error("list_connections", &e); - format!("[composio] list_connections failed: {e:#}") - })?; - let active = resp.connections.iter().filter(|c| c.is_active()).count(); - let total = resp.connections.len(); - // Reconcile the chat-runtime integrations cache against this fresh - // snapshot. The desktop UI polls this RPC every 5 s, so any OAuth - // completion that lands out-of-band from the event-bus invalidation - // path (common on Windows when `wait_for_connection_active`'s 60 s - // timeout fires before the user finishes the hosted flow) is still - // reflected in chat within one poll interval. - sync_cache_with_connections(&resp.connections); - let resp = enrich_connections_with_identity(resp); - Ok(RpcOutcome::new( - resp, - vec![format!( - "composio: {total} connection(s) listed ({active} active)" - )], - )) -} - -pub async fn composio_authorize( - config: &Config, - toolkit: &str, - extra_params: Option, -) -> OpResult> { - tracing::debug!(toolkit = %toolkit, has_extra_params = extra_params.is_some(), "[composio] rpc authorize"); - // Route through the mode-aware factory so direct-mode users get a - // hosted Composio OAuth URL for THEIR personal tenant — not the - // backend tinyhumans tenant's OAuth proxy (#1710). The pre-factory - // path hard-routed through `staging-api.tinyhumans.ai`, so a user - // toggled into Direct mode would silently complete OAuth against - // the wrong tenant and never see the new connection in their - // own Composio account. - let kind = create_composio_client(config).map_err(|e| format!("[composio] authorize: {e}"))?; - let resp = match kind { - ComposioClientKind::Backend(client) => { - tracing::debug!(toolkit = %toolkit, "[composio] authorize: backend variant"); - super::oauth_handoff::authorize_with_meta_guard(&client, toolkit, extra_params) - .await - .map_err(|e| { - report_composio_op_error("authorize", &e); - let wrapped = super::oauth_handoff::wrap_authorize_rate_limit_error(toolkit, e); - format!("[composio] authorize failed: {wrapped:#}") - })? - } - ComposioClientKind::Direct(direct) => { - tracing::info!( - toolkit = %toolkit, - "[composio-direct] authorize: routing to user's personal Composio tenant" - ); - // [composio-direct] `extra_params` is the backend's escape - // hatch for toolkit-specific request fields (e.g. WhatsApp - // `waba_id`). The v3 direct endpoint takes no such surface - // — toolkit-specific data is configured upstream on - // app.composio.dev when the user creates the auth config. - // We log a warning instead of failing so the WhatsApp UX - // (which always passes a WABA id) still works for users - // who configured the auth config correctly on Composio's - // side. - if extra_params.is_some() { - tracing::warn!( - toolkit = %toolkit, - "[composio-direct] authorize: extra_params is set but direct mode does \ - not propagate it — configure toolkit-specific fields via \ - app.composio.dev for your auth config" - ); - } - super::oauth_handoff::direct_authorize_with_meta_guard( - &direct, - toolkit, - &config.composio.entity_id, - ) - .await - .map_err(|e| { - let wrapped = super::oauth_handoff::wrap_authorize_rate_limit_error(toolkit, e); - // [#1166 / Sentry TAURI-RUST-X9] Symmetric with the - // backend branch's `report_composio_op_error` on the - // same handler — direct-mode 401s from - // `connected_accounts/link` were leaking otherwise. - // Render WITH the `[composio-direct]` anchor so the - // classifier arm fires; wrapped error preserves any - // rate-limit classifications fed up the ladder. - let rendered = format!("[composio-direct] authorize failed: {wrapped:#}"); - report_composio_op_error("authorize", &rendered); - rendered - })? - } - }; - - // Publish an event so any interested subscribers (e.g. UI refreshers, - // analytics) can react to the new connection handoff. - crate::core::event_bus::publish_global( - crate::core::event_bus::DomainEvent::ComposioConnectionCreated { - toolkit: toolkit.to_string(), - connection_id: resp.connection_id.clone(), - connect_url: resp.connect_url.clone(), - }, - ); - - Ok(RpcOutcome::new( - resp, - vec![format!("composio: authorize flow started for {toolkit}")], - )) -} - -pub async fn composio_delete_connection( - config: &Config, - connection_id: &str, - clear_memory: bool, -) -> OpResult> { - tracing::debug!(connection_id = %connection_id, "[composio] rpc delete_connection"); - let client = resolve_client(config)?; - let toolkit = match resolve_toolkit_for_connection(&client, connection_id).await { - Ok(toolkit) => Some(toolkit), - Err(error) if clear_memory => { - return Err(format!( - "[composio] delete_connection cannot clear memory without resolving toolkit: {error}" - )); - } - Err(_) => None, - }; - let memory_targets = if clear_memory { - composio_memory_targets_for_connection(config, toolkit.as_deref(), connection_id) - .await - .map_err(|error| { - format!("[composio] delete_connection cannot enumerate memory targets: {error:#}") - })? - } else { - Vec::new() - }; - let mut resp = client.delete_connection(connection_id).await.map_err(|e| { - report_composio_op_error("delete_connection", &e); - format!("[composio] delete_connection failed: {e:#}") - })?; - let mut memory_chunks_deleted = 0; - let mut memory_clear_errors = Vec::new(); - for target in &memory_targets { - match target.delete(config) { - Ok(deleted) => { - memory_chunks_deleted += deleted; - } - Err(error) => { - memory_clear_errors.push(format!( - "[composio] connection deleted, but failed to clear memory chunks for {}: {error:#}", - target.label() - )); - } - } - } - resp.memory_chunks_deleted = memory_chunks_deleted; - if let Some(toolkit) = toolkit.as_deref() { - let deleted = - super::providers::profile::delete_connected_identity_facets(toolkit, connection_id); - tracing::debug!( - toolkit = %toolkit, - connection_id = %connection_id, - facets_deleted = deleted, - "[composio] deleted connected identity facets after connection removal" - ); - if let Err(e) = super::providers::profile_md::remove_provider_from_profile_md( - &config.workspace_dir, - toolkit, - connection_id, - ) { - tracing::warn!( - toolkit = %toolkit, - connection_id = %connection_id, - error = %e, - "[composio] PROFILE.md bullet removal failed (non-fatal)" - ); - } - } - // Prune the local memory_sources registry entry for this connection. - // The registry keys composio sources by `connection_id` and the - // reconciler only ever upserts, so a deleted connection's - // `[[memory_sources]]` entry is otherwise orphaned forever (and on - // reconnect the backend mints a fresh `connection_id`, leaving the stale - // one stranded). Best-effort: the backend connection is already gone, so - // a config-save failure must not fail the whole delete — log and move on. - match crate::openhuman::memory_sources::registry::remove_composio_source_by_connection_id( - connection_id, - ) - .await - { - Ok(0) => {} - Ok(removed) => tracing::debug!( - connection_id = %connection_id, - removed, - "[composio] pruned memory_sources entry after connection deletion" - ), - Err(e) => tracing::warn!( - connection_id = %connection_id, - error = %e, - "[composio] failed to prune memory_sources entry after connection deletion (non-fatal)" - ), - } - crate::core::event_bus::publish_global( - crate::core::event_bus::DomainEvent::ComposioConnectionDeleted { - toolkit: toolkit.unwrap_or_else(|| "unknown".to_string()), - connection_id: connection_id.to_string(), - }, - ); - // Bust the integrations cache so the next prompt reflects the removal. - invalidate_connected_integrations_cache(); - // Eagerly warm the cache from the backend so the very next - // `cached_active_integrations` read (typically the orchestrator's - // next-turn refresh, or the desktop UI's 5 s - // `composio_list_connections` poll) sees the removal immediately - // instead of waiting for a cache-miss round trip on the hot path. - // Symmetric with the connect-side eager warm in - // [`super::bus::ComposioConnectionCreatedSubscriber`]. Best-effort — - // on backend failure the UI poll repopulates within ~5 s as a - // safety net. - // - // Use the status-distinguishing fetcher so we log - // `Authoritative(empty)` and backend unavailability differently — - // `fetch_connected_integrations` collapses both to `Vec::new()` - // and would otherwise hide auth/backend failures from incident - // triage. - match fetch_connected_integrations_status(config).await { - FetchConnectedIntegrationsStatus::Authoritative(entries) => { - tracing::debug!( - connection_id = %connection_id, - cached_entries = entries.len(), - "[composio] eagerly warmed integrations cache after connection deletion" - ); - } - FetchConnectedIntegrationsStatus::Unavailable => { - tracing::warn!( - connection_id = %connection_id, - "[composio] eager cache warm after connection deletion skipped: backend unavailable" - ); - } - } - if !memory_clear_errors.is_empty() { - return Err(memory_clear_errors.join("; ")); - } - Ok(RpcOutcome::new( - resp, - vec![format!("composio: connection {connection_id} deleted")], - )) -} - -#[derive(Debug, Clone, PartialEq, Eq)] -enum MemoryCleanupTarget { - Exact(SourceKind, String), - Prefix(SourceKind, String), - Owner(SourceKind, String), -} - -impl MemoryCleanupTarget { - fn delete(&self, config: &Config) -> anyhow::Result { - match self { - Self::Exact(source_kind, source_id) => { - memory_tree_store::delete_chunks_by_source(config, *source_kind, source_id) - } - Self::Prefix(source_kind, source_id_prefix) => { - memory_tree_store::delete_chunks_by_source_prefix( - config, - *source_kind, - source_id_prefix, - ) - } - Self::Owner(source_kind, owner) => { - memory_tree_store::delete_chunks_by_owner(config, *source_kind, owner) - } - } - } - - fn label(&self) -> String { - match self { - Self::Exact(source_kind, source_id) => { - format!("{}:{source_id}", source_kind.as_str()) - } - Self::Prefix(source_kind, source_id_prefix) => { - format!("{}:{source_id_prefix}*", source_kind.as_str()) - } - Self::Owner(source_kind, owner) => { - format!("{}:owner:{owner}", source_kind.as_str()) - } - } - } -} - -async fn composio_memory_targets_for_connection( - config: &Config, - toolkit: Option<&str>, - connection_id: &str, -) -> anyhow::Result> { - let Some(toolkit) = toolkit.map(str::trim).filter(|s| !s.is_empty()) else { - return Ok(Vec::new()); - }; - - let targets = match toolkit.to_ascii_lowercase().as_str() { - "slack" => vec![MemoryCleanupTarget::Exact( - SourceKind::Chat, - format!("slack:{connection_id}"), - )], - "gmail" => gmail_memory_sources_for_connection(connection_id), - "notion" => notion_memory_targets_for_connection(config, connection_id).await?, - "drive" | "googledrive" | "google_drive" => { - drive_memory_targets_for_connection(connection_id) - } - _ => Vec::new(), - }; - Ok(targets) -} - -fn gmail_memory_sources_for_connection(connection_id: &str) -> Vec { - vec![ - MemoryCleanupTarget::Owner(SourceKind::Email, format!("gmail-sync:{connection_id}")), - MemoryCleanupTarget::Exact(SourceKind::Email, format!("gmail:{connection_id}")), - MemoryCleanupTarget::Prefix(SourceKind::Email, format!("gmail:{connection_id}:")), - MemoryCleanupTarget::Prefix(SourceKind::Email, format!("gmail:{connection_id}/")), - ] -} - -async fn notion_memory_targets_for_connection( - config: &Config, - connection_id: &str, -) -> anyhow::Result> { - let mut targets = connection_scoped_document_targets("notion", connection_id); - - let memory = Arc::new( - MemoryClient::from_workspace_dir(config.workspace_dir.clone()).map_err(|error| { - anyhow::anyhow!( - "failed to open memory client for notion cleanup target discovery: {error}" - ) - })?, - ); - let state = SyncState::load(&memory, "notion", connection_id) - .await - .map_err(|error| { - anyhow::anyhow!("failed to load notion sync state for memory cleanup: {error}") - })?; - for raw_id in state.synced_ids { - let Some(page_id) = notion_synced_page_id(&raw_id) else { - continue; - }; - targets.push(MemoryCleanupTarget::Exact( - SourceKind::Document, - format!("notion:{page_id}"), - )); - targets.push(MemoryCleanupTarget::Exact( - SourceKind::Document, - format!("composio-notion-page-{page_id}"), - )); - } - - Ok(dedupe_memory_targets(targets)) -} - -fn drive_memory_targets_for_connection(connection_id: &str) -> Vec { - ["drive", "googledrive", "google_drive"] - .into_iter() - .flat_map(|prefix| connection_scoped_document_targets(prefix, connection_id)) - .collect() -} - -fn connection_scoped_document_targets( - prefix: &str, - connection_id: &str, -) -> Vec { - vec![ - MemoryCleanupTarget::Exact(SourceKind::Document, format!("{prefix}:{connection_id}")), - MemoryCleanupTarget::Prefix(SourceKind::Document, format!("{prefix}:{connection_id}:")), - MemoryCleanupTarget::Prefix(SourceKind::Document, format!("{prefix}:{connection_id}/")), - ] -} - -fn notion_synced_page_id(raw_id: &str) -> Option { - let page_id = raw_id.split_once('@').map_or(raw_id, |(id, _)| id).trim(); - (!page_id.is_empty()).then(|| page_id.to_string()) -} - -fn dedupe_memory_targets(targets: Vec) -> Vec { - let mut unique = Vec::new(); - for target in targets { - if !unique.contains(&target) { - unique.push(target); - } - } - unique -} - -// ── Tools ─────────────────────────────────────────────────────────── - -pub async fn composio_list_tools( - config: &Config, - toolkits: Option>, - tags: Option>, -) -> OpResult> { - let effective_tags = if should_forward_tags(toolkits.as_deref()) { - tags - } else { - None - }; - tracing::debug!(?toolkits, ?effective_tags, "[composio] rpc list_tools"); - // Route through the mode-aware factory. In direct mode the backend - // tool catalogue (which is shaped by the tinyhumans-tenant - // allowlist + curated whitelist) does NOT apply — the user's - // personal Composio account governs discovery via app.composio.dev. - // Mirrors the empty-response short-circuit in `composio_list_toolkits` - // / `composio_list_connections` so the three "list_*" surfaces - // behave consistently and we don't accidentally leak backend-tenant - // data into direct mode (#1710). - let kind = create_composio_client(config).map_err(|e| format!("[composio] list_tools: {e}"))?; - match kind { - ComposioClientKind::Backend(client) => { - tracing::debug!("[composio] list_tools: backend variant"); - let resp = client - .list_tools(toolkits.as_deref(), effective_tags.as_deref()) - .await - .map_err(|e| { - report_composio_op_error("list_tools", &e); - format!("[composio] list_tools failed: {e:#}") - })?; - let count = resp.tools.len(); - Ok(RpcOutcome::new( - resp, - vec![format!("composio: {count} tool(s) listed")], - )) - } - ComposioClientKind::Direct(direct) => { - // [composio-direct] Discovery now hits Composio v3 `/tools` - // directly with the user's own API key. Tenant isolation is - // preserved (we never surface backend-tenant catalogue here), - // and the schemas Composio returns are tenant-agnostic so - // the LLM agent gets the same model-callable shape backend - // mode surfaces. Scope the request to the user's connected - // toolkits when no explicit filter was supplied — keeps the - // response bounded and skips schemas the agent can't call. - let scope: Vec = match toolkits { - Some(list) if !list.is_empty() => list, - _ => { - let conns = direct_list_connections(&direct).await.map_err(|e| { - // [#1166 / Sentry TAURI-RUST-X9] Symmetric error - // routing — the prefetch call goes to the same v3 - // `/connected_accounts` endpoint as `list_connections` - // and would emit the same 401 wire shape. Render - // WITH the `[composio-direct]` anchor so the - // classifier arm fires on the prefetch path too. - let rendered = format!( - "[composio-direct] list_tools: prefetch connections failed: {e:#}" - ); - report_composio_op_error("list_connections", &rendered); - rendered - })?; - let mut v: Vec = conns - .connections - .iter() - .filter(|c| c.is_active()) - .map(|c| c.normalized_toolkit()) - .filter(|t| !t.is_empty()) - .collect(); - v.sort(); - v.dedup(); - v - } - }; - if scope.is_empty() { - tracing::info!( - "[composio-direct] list_tools: no connected toolkits on this tenant — \ - returning empty tool list" - ); - return Ok(RpcOutcome::new( - ComposioToolsResponse::default(), - vec!["composio: direct mode — 0 tool(s) listed (no connected \ - toolkits on this tenant)" - .to_string()], - )); - } - tracing::debug!( - toolkits = scope.len(), - ?effective_tags, - "[composio-direct] list_tools: fetching v3 tool schemas" - ); - // Forward the same `effective_tags` the backend branch uses so the - // tag filter is honoured in direct (BYO-key) mode too — previously - // it was computed above and then dropped on this branch. - let mut resp = direct_list_tools(&direct, &scope, effective_tags.as_deref()) - .await - .map_err(|e| { - // [#1166 / Sentry TAURI-RUST-X9] Symmetric with the backend - // branch's hook (line ~451). Direct-mode `list_tools` - // failures are user-state when the API key is bad. Render - // WITH the `[composio-direct]` anchor so the classifier - // arm fires. - let rendered = format!("[composio-direct] list_tools failed: {e:#}"); - report_composio_op_error("list_tools", &rendered); - rendered - })?; - // Apply the same curated-whitelist + user-scope filter the - // backend path runs — schemas may be tenant-agnostic but - // OpenHuman's curation policy isn't, and direct-mode users - // should benefit from the same safety net (e.g. dangerous - // destructive actions hidden by default). - let before = resp.tools.len(); - filter_list_tools_response_for_direct(&mut resp).await; - let after = resp.tools.len(); - tracing::debug!( - before, - after, - dropped = before - after, - "[composio-direct] list_tools: curated filter applied" - ); - let count = resp.tools.len(); - Ok(RpcOutcome::new( - resp, - vec![format!( - "composio: direct mode — {count} tool(s) listed across \ - {} toolkit(s)", - scope.len() - )], - )) - } - } -} - -/// Apply OpenHuman's curated-whitelist + user-scope visibility filter to -/// a fresh `ComposioToolsResponse` in direct mode. Mirrors the per-call -/// filter loop in `tools.rs::filter_list_tools_response` so backend and -/// direct surfaces share the same safety net. -async fn filter_list_tools_response_for_direct(resp: &mut ComposioToolsResponse) { - use super::providers::{ - catalog_for_toolkit, classify_unknown, find_curated, get_provider, - load_user_scope_or_default, toolkit_from_slug, - }; - - let mut keep: Vec = Vec::with_capacity(resp.tools.len()); - for t in &resp.tools { - let slug = &t.function.name; - let Some(toolkit) = toolkit_from_slug(slug) else { - keep.push(true); - continue; - }; - let pref = load_user_scope_or_default(&toolkit).await; - let catalog = get_provider(&toolkit) - .and_then(|p| p.curated_tools()) - .or_else(|| catalog_for_toolkit(&toolkit)); - let allowed = match catalog { - Some(cat) => match find_curated(cat, slug) { - Some(curated) => pref.allows(curated.scope), - None => false, - }, - None => pref.allows(classify_unknown(slug)), - }; - keep.push(allowed); - } - let drained: Vec<_> = resp.tools.drain(..).collect(); - resp.tools = drained - .into_iter() - .zip(keep) - .filter_map(|(tool, keep_it)| if keep_it { Some(tool) } else { None }) - .collect(); -} - -// ── Execute ───────────────────────────────────────────────────────── - -pub async fn composio_execute( - config: &Config, - tool: &str, - arguments: Option, -) -> OpResult> { - tracing::debug!(tool = %tool, "[composio] rpc execute"); - // Route through the mode-aware factory so direct-mode users hit - // their personal Composio tenant for tool execution. Mirrors the - // agent-tool path's `ComposioExecuteTool::execute` (commit - // 814fdd97); the shared `direct_execute` helper in `client.rs` - // keeps the envelope identical between backend and direct so the - // `ComposioActionExecuted` event-bus payload, markdown-vs-JSON - // body preference, and cost-USD log line all stay uniform (#1710). - let kind = create_composio_client(config).map_err(|e| format!("[composio] execute: {e}"))?; - let started = std::time::Instant::now(); - // Centralized prepare → retry → error-mapping pipeline (#1797), - // mode-aware over the backend/direct split (#1710). The dispatcher - // returns pre-formatted `[composio:error:] …` strings so the - // frontend formatter at `app/src/lib/composio/formatters.ts` can - // parse the class regardless of which mode produced the failure. - let result = super::execute_dispatch::execute_composio_action_kind( - kind, - tool, - arguments, - &config.composio.entity_id, - ) - .await; - let elapsed_ms = started.elapsed().as_millis() as u64; - - match result { - Ok(resp) => { - crate::core::event_bus::publish_global( - crate::core::event_bus::DomainEvent::ComposioActionExecuted { - tool: tool.to_string(), - success: resp.successful, - error: resp.error.clone(), - cost_usd: resp.cost_usd, - elapsed_ms, - }, - ); - // Backend (tinyhumansai/backend#683) now parses all composio - // payloads server-side and returns a `markdownFormatted` - // string for known tools, so callers should consume that - // directly. Core no longer reshapes `resp.data` here. Memory - // ingestion paths still call `post_process_action_result` - // explicitly when they need the structured slim envelope. - Ok(RpcOutcome::new( - resp, - vec![format!("composio: executed {tool} ({elapsed_ms}ms)")], - )) - } - Err(e) => { - crate::core::event_bus::publish_global( - crate::core::event_bus::DomainEvent::ComposioActionExecuted { - tool: tool.to_string(), - success: false, - error: Some(e.to_string()), - cost_usd: 0.0, - elapsed_ms, - }, - ); - report_composio_op_error("execute", &e); - // Preserve already-classified errors from the dispatcher - // (`[composio:error:] …`) so the frontend formatter at - // `app/src/lib/composio/formatters.ts` can still parse the class. - let is_classified = e.starts_with("[composio:error:"); - tracing::debug!( - tool = %tool, - elapsed_ms, - classified = is_classified, - "[composio] rpc execute error mapped" - ); - if is_classified { - Err(e) - } else { - Err(format!("[composio] execute failed: {e}")) - } - } - } -} - -// ── GitHub repos + trigger provisioning ───────────────────────────── - -pub async fn composio_list_github_repos( - config: &Config, - connection_id: Option, -) -> OpResult> { - tracing::debug!(?connection_id, "[composio] rpc list_github_repos"); - let client = resolve_client(config)?; - let resp = client - .list_github_repos(connection_id.as_deref()) - .await - .map_err(|e| { - report_composio_op_error("list_github_repos", &e); - format!("[composio] list_github_repos failed: {e:#}") - })?; - let count = resp.repositories.len(); - let connection_id = resp.connection_id.clone(); - Ok(RpcOutcome::new( - resp, - vec![format!( - "composio: {count} github repo(s) listed for connection {connection_id}" - )], - )) -} - -pub async fn composio_create_trigger( - config: &Config, - slug: &str, - connection_id: Option, - trigger_config: Option, -) -> OpResult> { - tracing::debug!(slug = %slug, ?connection_id, "[composio] rpc create_trigger"); - let client = resolve_client(config)?; - let resp = client - .create_trigger(slug, connection_id.as_deref(), trigger_config) - .await - .map_err(|e| { - report_composio_op_error("create_trigger", &e); - format!("[composio] create_trigger failed: {e:#}") - })?; - let trigger_id = resp.trigger_id.clone(); - Ok(RpcOutcome::new( - resp, - vec![format!( - "composio: trigger {trigger_id} created for slug {slug}" - )], - )) -} - -// ── Trigger management (catalog + enable/disable) ────────────────── - -pub async fn composio_list_available_triggers( - config: &Config, - toolkit: &str, - connection_id: Option, -) -> OpResult> { - tracing::debug!(toolkit = %toolkit, ?connection_id, "[composio] rpc list_available_triggers"); - let client = resolve_client(config)?; - let resp = client - .list_available_triggers(toolkit, connection_id.as_deref()) - .await - .map_err(|e| { - report_composio_op_error("list_available_triggers", &e); - format!("[composio] list_available_triggers failed: {e:#}") - })?; - let count = resp.triggers.len(); - Ok(RpcOutcome::new( - resp, - vec![format!( - "composio: {count} available trigger(s) for toolkit {toolkit}" - )], - )) -} - -pub async fn composio_list_triggers( - config: &Config, - toolkit: Option, -) -> OpResult> { - tracing::debug!(?toolkit, "[composio] rpc list_triggers"); - let client = resolve_client(config)?; - let resp = client - .list_active_triggers(toolkit.as_deref()) - .await - .map_err(|e| { - report_composio_op_error("list_triggers", &e); - format!("[composio] list_triggers failed: {e:#}") - })?; - let count = resp.triggers.len(); - Ok(RpcOutcome::new( - resp, - vec![format!("composio: {count} active trigger(s) listed")], - )) -} - -pub async fn composio_enable_trigger( - config: &Config, - connection_id: &str, - slug: &str, - trigger_config: Option, -) -> OpResult> { - tracing::debug!(slug = %slug, connection_id = %connection_id, "[composio] rpc enable_trigger"); - let client = resolve_client(config)?; - let resp = client - .enable_trigger(connection_id, slug, trigger_config) - .await - .map_err(|e| { - // Keep the raw error on the Sentry funnel for diagnosis (unchanged). - report_composio_op_error("enable_trigger", &e); - // Map the backend error (e.g. a 403 "you do not have permission to - // enable triggers on this connection") into actionable, user-facing - // guidance instead of leaking the raw blob to the UI (issue #2913). - let raw = format!("{e:#}"); - let class = super::error_mapping::classify_composio_error(slug, &raw); - let mapped = super::error_mapping::format_provider_error(slug, &raw); - tracing::warn!( - slug = %slug, - connection_id = %connection_id, - class = class.as_str(), - "[composio] enable_trigger failed; surfacing mapped error" - ); - mapped - })?; - let trigger_id = resp.trigger_id.clone(); - Ok(RpcOutcome::new( - resp, - vec![format!("composio: enabled trigger {slug} → {trigger_id}")], - )) -} - -pub async fn composio_disable_trigger( - config: &Config, - trigger_id: &str, -) -> OpResult> { - tracing::debug!(trigger_id = %trigger_id, "[composio] rpc disable_trigger"); - let client = resolve_client(config)?; - let resp = client.disable_trigger(trigger_id).await.map_err(|e| { - report_composio_op_error("disable_trigger", &e); - format!("[composio] disable_trigger failed: {e:#}") - })?; - let message = if resp.deleted { - format!("composio: disabled trigger {trigger_id}") - } else { - format!("composio: trigger {trigger_id} was not active") - }; - Ok(RpcOutcome::new(resp, vec![message])) -} - -// ── Trigger history ──────────────────────────────────────────────── - -pub async fn composio_list_trigger_history( - config: &Config, - limit: Option, -) -> OpResult> { - let requested_limit = limit.unwrap_or(100).clamp(1, 500); - let workspace_label = config - .workspace_dir - .file_name() - .and_then(|value| value.to_str()) - .unwrap_or(""); - tracing::debug!( - limit = requested_limit, - workspace = workspace_label, - "[composio] rpc list_trigger_history" - ); - - let store = super::trigger_history::global().ok_or_else(|| { - "[composio] trigger history unavailable: archive store is not initialized".to_string() - })?; - - let history = store - .list_recent(requested_limit) - .map_err(|error| format!("[composio] list_trigger_history failed: {error}"))?; - let count = history.entries.len(); - - Ok(RpcOutcome::new( - history, - vec![format!( - "composio: {count} trigger history entrie(s) loaded (archive present)" - )], - )) -} - -// ── Provider-backed ops ───────────────────────────────────────────── -// -// `composio_get_user_profile` and `composio_sync` route through the -// per-toolkit `ComposioProvider` registry instead of executing a -// single Composio action directly. The caller passes a `connection_id`, -// the op resolves the connection's toolkit slug from the backend, looks -// up the provider, and dispatches to it. -// -// These exist because individual toolkits need to do *several* -// `composio.execute` calls + bespoke result reshaping to produce a -// usable user profile or sync snapshot — wrapping that in a single -// RPC method keeps the UI/agent surface tiny and consistent across -// toolkits. - -/// Look up the toolkit slug for an existing connection. Returns an -/// error string if the connection is unknown to the backend. -async fn resolve_toolkit_for_connection( - client: &ComposioClient, - connection_id: &str, -) -> OpResult { - tracing::debug!(connection_id = %connection_id, "[composio] resolve_toolkit_for_connection"); - let resp = client.list_connections().await.map_err(|e| { - report_composio_op_error("resolve_toolkit_for_connection", &e); - format!("[composio] list_connections failed: {e:#}") - })?; - let conn = resp - .connections - .into_iter() - .find(|c| c.id == connection_id) - .ok_or_else(|| format!("[composio] no connection with id '{connection_id}'"))?; - Ok(conn.toolkit) -} - -/// `openhuman.composio_get_user_profile` — fetch a normalized user -/// profile for a connected account by dispatching to the toolkit's -/// registered [`super::providers::ComposioProvider`]. -pub async fn composio_get_user_profile( - config: &Config, - connection_id: &str, -) -> OpResult> { - tracing::debug!(connection_id = %connection_id, "[composio] rpc get_user_profile"); - let client = resolve_client(config)?; - let toolkit = resolve_toolkit_for_connection(&client, connection_id).await?; - - let provider = get_provider(&toolkit).ok_or_else(|| { - format!("[composio] no native provider registered for toolkit '{toolkit}'") - })?; - - // #1710: drop the pre-baked `client` field from `ProviderContext`. - // The factory resolves a fresh client per `ctx.execute(...)` call so - // a mode toggle is honoured immediately. We keep the local `client` - // binding alive for the toolkit lookup above (which still uses the - // explicit handle); the context itself just carries `Arc`. - let _ = client; - let ctx = ProviderContext { - config: Arc::new(config.clone()), - toolkit: toolkit.clone(), - connection_id: Some(connection_id.to_string()), - usage: Default::default(), - max_items: None, - sync_depth_days: None, - }; - - let profile = provider.fetch_user_profile(&ctx).await.map_err(|e| { - report_composio_op_error("get_user_profile", &e); - format!("[composio] get_user_profile({toolkit}) failed: {e}") - })?; - - // Side-effect: persist profile fields into the local user_profile - // facet table so any RPC call also refreshes the local store. - let facets = provider.identity_set(&profile); - tracing::debug!( - toolkit = %toolkit, - facets_written = facets, - "[composio] identity_set persisted profile facets from get_user_profile" - ); - - Ok(RpcOutcome::new( - profile, - vec![format!( - "composio: fetched {toolkit} profile for connection {connection_id}" - )], - )) -} - -/// `openhuman.composio_refresh_all_identities` — re-fetch the user -/// profile for every active connection and persist via `identity_set`. -/// Used to populate kind-tagged `user_profile` rows on existing -/// connections after the #1365 schema rewrite without waiting for the -/// next periodic sync tick. -/// -/// Best-effort per connection: a failure on one toolkit does not abort -/// the others. Returns aggregate counts plus a per-connection trail in -/// the envelope messages. -pub async fn composio_refresh_all_identities( - config: &Config, -) -> OpResult> { - tracing::info!("[composio] rpc refresh_all_identities"); - let client = resolve_client(config)?; - let conns = client.list_connections().await.map_err(|e| { - report_composio_op_error("refresh_all_identities", &e); - format!("[composio] list_connections failed: {e:#}") - })?; - - let mut report = RefreshIdentitiesReport::default(); - let mut messages: Vec = Vec::with_capacity(conns.connections.len() + 1); - - for conn in conns.connections { - if !conn.is_active() { - report.skipped_inactive += 1; - continue; - } - let toolkit = conn.toolkit.clone(); - let connection_id = conn.id.clone(); - - let Some(provider) = get_provider(&toolkit) else { - tracing::debug!( - toolkit = %toolkit, - connection_id = %connection_id, - "[composio] refresh_all_identities: no native provider — skipping" - ); - report.skipped_no_provider += 1; - messages.push(format!( - "{toolkit}/{connection_id}: skipped (no native provider)" - )); - continue; - }; - - let ctx = ProviderContext { - config: Arc::new(config.clone()), - toolkit: toolkit.clone(), - connection_id: Some(connection_id.clone()), - usage: Default::default(), - max_items: None, - sync_depth_days: None, - }; - - match provider.fetch_user_profile(&ctx).await { - Ok(profile) => { - let rows = provider.identity_set(&profile); - report.refreshed += 1; - report.rows_written += rows; - tracing::debug!( - toolkit = %toolkit, - connection_id = %connection_id, - rows_written = rows, - "[composio] refresh_all_identities: identity_set ok" - ); - messages.push(format!("{toolkit}/{connection_id}: {rows} row(s)")); - } - Err(e) => { - report.failed += 1; - tracing::warn!( - toolkit = %toolkit, - connection_id = %connection_id, - error = %e, - "[composio] refresh_all_identities: fetch_user_profile failed" - ); - messages.push(format!("{toolkit}/{connection_id}: ERROR — {e}")); - } - } - } - - let summary = format!( - "composio: refreshed {ok}/{tried} active conn(s) — {rows} rows; \ - {fail} failed, {nopv} skipped (no provider), {inact} inactive", - ok = report.refreshed, - // `tried` is the count of active connections we actually scanned — - // include `skipped_no_provider` so the denominator covers the full - // active set, not just provider-backed ones (#1381 review). - tried = report.refreshed + report.failed + report.skipped_no_provider, - rows = report.rows_written, - fail = report.failed, - nopv = report.skipped_no_provider, - inact = report.skipped_inactive, - ); - let mut envelope = vec![summary]; - envelope.extend(messages); - Ok(RpcOutcome::new(report, envelope)) -} - -/// Aggregate result of [`composio_refresh_all_identities`]. -#[derive(Debug, Default, Clone, serde::Serialize, serde::Deserialize)] -pub struct RefreshIdentitiesReport { - pub refreshed: usize, - pub failed: usize, - pub skipped_no_provider: usize, - pub skipped_inactive: usize, - pub rows_written: usize, -} - -/// `openhuman.composio_sync` — run a sync pass for a connected account -/// by dispatching to the toolkit's registered provider. `reason` is -/// `"manual"` by default; the periodic scheduler passes `"periodic"` -/// and the OAuth event subscriber passes `"connection_created"`. -pub async fn composio_sync( - config: &Config, - connection_id: &str, - reason: Option, -) -> OpResult> { - let reason = parse_sync_reason(reason.as_deref())?; - tracing::debug!( - connection_id = %connection_id, - reason = reason.as_str(), - "[composio] rpc sync (spawned)" - ); - // Validate synchronously — a bad request (unknown connection / no native - // provider for toolkit) must surface to the caller via the RPC error - // envelope, not silently inside a spawned task. - let client = resolve_client(config)?; - let toolkit = resolve_toolkit_for_connection(&client, connection_id).await?; - let provider = get_provider(&toolkit).ok_or_else(|| { - format!("[composio] no native provider registered for toolkit '{toolkit}'") - })?; - let _ = client; // see analogous comment above — drop the pre-baked client (#1710). - - // `provider.sync` walks every page of the upstream API and ingests every - // message in-band — on a real prod inbox a healthy run can legitimately - // exceed the frontend's 30s `composio_sync` RPC `.await` cap (one - // healthy periodic tick is already ~100s for 20 pages / 500 messages). - // There is no reason for the UI to block on it: per-source progress is - // already exposed via the polled `openhuman.memory_sync_status_list` RPC, - // which reads `mem_tree_chunks` directly and therefore reflects the - // spawned task's per-message ingest in real time. So we spawn the sync - // as a background task and return immediately with a "started" envelope. - // The periodic scheduler (`composio::periodic`) already runs - // `provider.sync` from inside its own `tokio::spawn` loop — same pattern. - let ctx = ProviderContext { - config: Arc::new(config.clone()), - toolkit: toolkit.clone(), - connection_id: Some(connection_id.to_string()), - usage: Default::default(), - max_items: None, - sync_depth_days: None, - }; - let started_at_ms = std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .map(|d| d.as_millis() as u64) - .unwrap_or(0); - let toolkit_for_outcome = toolkit.clone(); - let connection_id_for_log = connection_id.to_string(); - - tokio::spawn(async move { - let toolkit_in_task = ctx.toolkit.clone(); - match provider.sync(&ctx, reason).await { - Ok(out) => { - tracing::info!( - toolkit = %toolkit_in_task, - connection_id = %connection_id_for_log, - items_ingested = out.items_ingested, - elapsed_ms = out.elapsed_ms(), - "[composio] background sync ok" - ); - } - Err(e) => { - report_composio_op_error("sync", &e); - tracing::warn!( - toolkit = %toolkit_in_task, - connection_id = %connection_id_for_log, - error = %e, - "[composio] background sync failed" - ); - } - } - }); - - let summary = format!("composio: {toolkit_for_outcome} sync started (background)"); - let outcome = SyncOutcome { - toolkit: toolkit_for_outcome, - connection_id: Some(connection_id.to_string()), - reason: reason.as_str().to_string(), - items_ingested: 0, - started_at_ms, - // Sentinel: still running. Frontend should rely on - // `memory_sync_status_list` for progress; `finished_at_ms == 0` - // means "spawned, not yet complete". - finished_at_ms: 0, - summary: summary.clone(), - details: serde_json::json!({ "status": "started" }), - }; - Ok(RpcOutcome::new(outcome, vec![summary])) -} - -/// Parse the optional `reason` parameter into a [`SyncReason`]. -/// -/// `None` and the explicit `"manual"` value both map to -/// [`SyncReason::Manual`]. Any other unrecognized string is rejected -/// with a clear error so a typo in a caller (UI, CLI, agent) surfaces -/// at the RPC boundary instead of being silently coerced. -fn parse_sync_reason(raw: Option<&str>) -> OpResult { - match raw { - None | Some("manual") => Ok(SyncReason::Manual), - Some("periodic") => Ok(SyncReason::Periodic), - Some("connection_created") => Ok(SyncReason::ConnectionCreated), - Some(other) => Err(format!( - "[composio] unrecognized sync reason '{other}': expected one of \ - 'manual', 'periodic', 'connection_created'" - )), - } -} - -/// Enrich each [`super::types::ComposioConnection`] with human-readable -/// identity fields (`account_email`, `workspace`, `username`) from the -/// persisted provider profile cache so the UI picker can show -/// "Gmail · user@example.com" instead of a generic "Account N" label. -/// -/// This is best-effort — no live API calls are made (one SQLite read per poll). -/// If the memory client is not ready yet (first launch before any sync) -/// or no profile rows exist for a connection, that connection is returned -/// unchanged and the UI falls back to its numbered label logic. -/// -/// Connections that already carry identity fields (e.g. from the -/// backend-proxied path) are left untouched. -fn enrich_connections_with_identity( - mut resp: super::types::ComposioConnectionsResponse, -) -> super::types::ComposioConnectionsResponse { - use std::collections::HashMap; - - use super::providers::profile::{load_connected_identities, normalize_connection_identifier}; - - let identities = load_connected_identities(); - if identities.is_empty() { - tracing::debug!( - "[composio] enrich_connections_with_identity: no cached identities yet \ - — picker will fall back to numbered labels until first sync completes" - ); - return resp; - } - - // (normalized_toolkit, normalized_conn_id) → identity - let lookup: HashMap<(String, String), _> = identities - .iter() - .map(|id| { - ( - ( - normalize_connection_identifier(&id.source), - normalize_connection_identifier(&id.identifier), - ), - id, - ) - }) - .collect(); - - tracing::debug!( - total = resp.connections.len(), - cached_identities = identities.len(), - "[composio] enrich_connections_with_identity: enriching connection labels" - ); - - for conn in &mut resp.connections { - // Skip connections already carrying identity info (backend-proxied - // path may supply them directly). - if conn.account_email.is_some() || conn.workspace.is_some() || conn.username.is_some() { - continue; - } - let toolkit_key = normalize_connection_identifier(&conn.toolkit); - let conn_id_key = normalize_connection_identifier(&conn.id); - if let Some(identity) = lookup.get(&(toolkit_key, conn_id_key)) { - conn.account_email = identity.email.clone(); - // display_name carries the user's name; for Slack it falls back - // to the team/workspace name when no per-user display name exists, - // making it the best available workspace signal. - conn.workspace = identity.display_name.clone(); - conn.username = identity.handle.clone(); - tracing::debug!( - toolkit = %conn.toolkit, - connection_id = %conn.id, - has_email = conn.account_email.is_some(), - has_workspace = conn.workspace.is_some(), - has_username = conn.username.is_some(), - "[composio] enrich_connections_with_identity: enriched connection" - ); - } - } - resp -} - -#[cfg(test)] -pub(crate) use super::connected_integrations::cache_key; -use super::connected_integrations::sync_cache_with_connections; -pub use super::connected_integrations::{ - cached_active_integrations, connected_set_hash, fetch_connected_integrations, - fetch_connected_integrations_status, fetch_toolkit_actions, - invalidate_connected_integrations_cache, FetchConnectedIntegrationsStatus, -}; -#[cfg(test)] -pub(crate) use super::connected_integrations::{CachedIntegrations, CACHE_TTL, INTEGRATIONS_CACHE}; -#[cfg(test)] -pub(crate) use crate::openhuman::context::prompt::ConnectedIntegration; -#[cfg(test)] -pub(crate) use std::time::{Duration, Instant}; - -// ── Direct mode (BYO API key) ─────────────────────────────────────── - -/// Read the current Composio routing mode and whether a direct-mode API -/// key is stored. **The key itself is never returned** — only a boolean -/// flag so the UI can show a "Connected" / "Not set" status. -pub async fn composio_get_mode(config: &Config) -> OpResult> { - let mode = config.composio.mode.trim().to_string(); - let key_present = crate::openhuman::credentials::get_composio_api_key(config) - .map_err(|e| format!("[composio-direct] get_composio_api_key failed: {e}"))? - .is_some(); - tracing::debug!( - mode = %mode, - key_present = key_present, - "[composio-direct] get_mode" - ); - let payload = serde_json::json!({ - "mode": mode, - "api_key_set": key_present, - }); - Ok(RpcOutcome::new( - payload, - vec![format!( - "composio: mode={mode}, api_key={}", - if key_present { "set" } else { "unset" } - )], - )) -} - -/// Persist a user-provided Composio API key for direct mode and -/// (optionally) flip `config.composio.mode` over to `"direct"`. -/// -/// **Logging redacts the key** — only its length and presence are -/// recorded. See the `[composio-direct]` debug-logging contract in -/// CLAUDE.md. -pub async fn composio_set_api_key( - config: &Config, - api_key: &str, - activate_direct: bool, -) -> OpResult> { - let trimmed = api_key.trim(); - if trimmed.is_empty() { - return Err("composio.set_api_key: api_key must not be empty".to_string()); - } - tracing::debug!( - key_len = trimmed.len(), - activate_direct, - "[composio-direct] set_api_key (redacted)" - ); - - crate::openhuman::credentials::store_composio_api_key(config, trimmed) - .await - .map_err(|e| format!("[composio-direct] store_composio_api_key failed: {e}"))?; - - let mode_log = if activate_direct { - // Persist the mode flip too — we route through the standard - // config save path so the snapshot, watchers, and reload paths - // all observe it. - let mut cfg_mut = crate::openhuman::config::rpc::load_config_with_timeout() - .await - .map_err(|e| format!("[composio-direct] reload config failed: {e}"))?; - cfg_mut.composio.mode = crate::openhuman::config::schema::COMPOSIO_MODE_DIRECT.into(); - cfg_mut - .save() - .await - .map_err(|e| format!("[composio-direct] save config failed: {e}"))?; - "mode=direct" - } else { - "mode unchanged" - }; - - let effective_mode: String = if activate_direct { - "direct".to_string() - } else { - config.composio.mode.clone() - }; - - // [composio-cache] Broadcast a ComposioConfigChanged event so any - // tenant-scoped caches (chat-runtime integrations snapshot, agent - // tool catalogue, frontend useComposioIntegrations poll) can drop - // stale entries and re-fetch against the new client. Without this - // the chat panel keeps showing backend-tenant integrations even - // though the user just switched to direct mode (#1710). - crate::core::event_bus::publish_global( - crate::core::event_bus::DomainEvent::ComposioConfigChanged { - mode: effective_mode.clone(), - api_key_set: true, - }, - ); - tracing::debug!( - mode = %effective_mode, - "[composio-cache] published ComposioConfigChanged after set_api_key" - ); - - Ok(RpcOutcome::new( - serde_json::json!({ - "stored": true, - "mode": effective_mode, - }), - vec![format!("composio: api key stored ({mode_log})")], - )) -} - -/// Clear the stored direct-mode API key and reset -/// `config.composio.mode` back to `"backend"`. -pub async fn composio_clear_api_key(config: &Config) -> OpResult> { - tracing::debug!("[composio-direct] clear_api_key"); - crate::openhuman::credentials::clear_composio_api_key(config) - .await - .map_err(|e| format!("[composio-direct] clear_composio_api_key failed: {e}"))?; - - let mut cfg_mut = crate::openhuman::config::rpc::load_config_with_timeout() - .await - .map_err(|e| format!("[composio-direct] reload config failed: {e}"))?; - cfg_mut.composio.mode = crate::openhuman::config::schema::COMPOSIO_MODE_BACKEND.into(); - cfg_mut - .save() - .await - .map_err(|e| format!("[composio-direct] save config failed: {e}"))?; - - // [composio-cache] Symmetric with composio_set_api_key — any - // tenant-scoped caches that were populated while the user was in - // direct mode must be invalidated when we drop back to backend - // mode, otherwise the chat panel would keep showing the (now - // empty) direct-tenant state instead of the live backend tenant. - crate::core::event_bus::publish_global( - crate::core::event_bus::DomainEvent::ComposioConfigChanged { - mode: "backend".to_string(), - api_key_set: false, - }, - ); - tracing::debug!("[composio-cache] published ComposioConfigChanged after clear_api_key"); - - Ok(RpcOutcome::new( - serde_json::json!({ "cleared": true, "mode": "backend" }), - vec!["composio: api key cleared, mode reset to backend".into()], - )) -} - -#[cfg(test)] -#[path = "ops_tests.rs"] -mod tests; - -// ── Helpers re-exported so callers can pull connection/tool types without -// reaching into the nested types module. -pub use super::types::{ComposioConnection as Connection, ComposioToolSchema as ToolSchemaType}; diff --git a/src/openhuman/composio/ops/connections.rs b/src/openhuman/composio/ops/connections.rs new file mode 100644 index 0000000000..868c83e2db --- /dev/null +++ b/src/openhuman/composio/ops/connections.rs @@ -0,0 +1,342 @@ +//! Connection listing, authorization, deletion, and identity enrichment ops. + +use std::collections::HashMap; + +use crate::openhuman::config::Config; +use crate::rpc::RpcOutcome; + +use super::super::client::{ + create_composio_client, direct_list_connections, ComposioClient, ComposioClientKind, +}; +use super::super::connected_integrations::{ + fetch_connected_integrations_status, invalidate_connected_integrations_cache, + sync_cache_with_connections, FetchConnectedIntegrationsStatus, +}; +use super::super::providers::profile::{ + load_connected_identities, normalize_connection_identifier, +}; +use super::super::types::{ + ComposioAuthorizeResponse, ComposioConnectionsResponse, ComposioDeleteResponse, +}; +use super::error_utils::{ + direct_mode_without_key, report_composio_op_error, resolve_client, OpResult, +}; +use super::memory_cleanup::composio_memory_targets_for_connection; + +pub async fn composio_list_connections( + config: &Config, +) -> OpResult> { + tracing::debug!("[composio] rpc list_connections"); + if direct_mode_without_key(config)? { + tracing::debug!( + "[composio] list_connections: direct mode selected, no api key configured yet \ + — returning empty connection list (valid setup state, not an error)" + ); + return Ok(RpcOutcome::new( + ComposioConnectionsResponse { + connections: Vec::new(), + }, + vec!["composio: direct mode — no api key configured yet, 0 connection(s)".to_string()], + )); + } + let kind = + create_composio_client(config).map_err(|e| format!("[composio] list_connections: {e}"))?; + let client = match kind { + ComposioClientKind::Backend(client) => { + tracing::debug!("[composio] list_connections: backend variant"); + client + } + ComposioClientKind::Direct(direct) => { + tracing::info!( + "[composio-direct] list_connections: fetching v3 \ + /connected_accounts for the user's personal Composio tenant" + ); + let resp = direct_list_connections(&direct).await.map_err(|e| { + let rendered = format!("[composio-direct] list_connections failed: {e:#}"); + report_composio_op_error("list_connections", &rendered); + rendered + })?; + let active = resp.connections.iter().filter(|c| c.is_active()).count(); + let total = resp.connections.len(); + sync_cache_with_connections(&resp.connections); + let resp = enrich_connections_with_identity(resp); + return Ok(RpcOutcome::new( + resp, + vec![format!( + "composio: direct mode — {total} connection(s) listed ({active} active)" + )], + )); + } + }; + let resp = client.list_connections().await.map_err(|e| { + report_composio_op_error("list_connections", &e); + format!("[composio] list_connections failed: {e:#}") + })?; + let active = resp.connections.iter().filter(|c| c.is_active()).count(); + let total = resp.connections.len(); + sync_cache_with_connections(&resp.connections); + let resp = enrich_connections_with_identity(resp); + Ok(RpcOutcome::new( + resp, + vec![format!( + "composio: {total} connection(s) listed ({active} active)" + )], + )) +} + +pub async fn composio_authorize( + config: &Config, + toolkit: &str, + extra_params: Option, +) -> OpResult> { + tracing::debug!(toolkit = %toolkit, has_extra_params = extra_params.is_some(), "[composio] rpc authorize"); + let kind = create_composio_client(config).map_err(|e| format!("[composio] authorize: {e}"))?; + let resp = match kind { + ComposioClientKind::Backend(client) => { + tracing::debug!(toolkit = %toolkit, "[composio] authorize: backend variant"); + super::super::oauth_handoff::authorize_with_meta_guard(&client, toolkit, extra_params) + .await + .map_err(|e| { + report_composio_op_error("authorize", &e); + let wrapped = + super::super::oauth_handoff::wrap_authorize_rate_limit_error(toolkit, e); + format!("[composio] authorize failed: {wrapped:#}") + })? + } + ComposioClientKind::Direct(direct) => { + tracing::info!( + toolkit = %toolkit, + "[composio-direct] authorize: routing to user's personal Composio tenant" + ); + if extra_params.is_some() { + tracing::warn!( + toolkit = %toolkit, + "[composio-direct] authorize: extra_params is set but direct mode does \ + not propagate it — configure toolkit-specific fields via \ + app.composio.dev for your auth config" + ); + } + super::super::oauth_handoff::direct_authorize_with_meta_guard( + &direct, + toolkit, + &config.composio.entity_id, + ) + .await + .map_err(|e| { + let wrapped = + super::super::oauth_handoff::wrap_authorize_rate_limit_error(toolkit, e); + let rendered = format!("[composio-direct] authorize failed: {wrapped:#}"); + report_composio_op_error("authorize", &rendered); + rendered + })? + } + }; + + crate::core::event_bus::publish_global( + crate::core::event_bus::DomainEvent::ComposioConnectionCreated { + toolkit: toolkit.to_string(), + connection_id: resp.connection_id.clone(), + connect_url: resp.connect_url.clone(), + }, + ); + + Ok(RpcOutcome::new( + resp, + vec![format!("composio: authorize flow started for {toolkit}")], + )) +} + +pub async fn composio_delete_connection( + config: &Config, + connection_id: &str, + clear_memory: bool, +) -> OpResult> { + tracing::debug!(connection_id = %connection_id, "[composio] rpc delete_connection"); + let client = resolve_client(config)?; + let toolkit = match resolve_toolkit_for_connection(&client, connection_id).await { + Ok(toolkit) => Some(toolkit), + Err(error) if clear_memory => { + return Err(format!( + "[composio] delete_connection cannot clear memory without resolving toolkit: {error}" + )); + } + Err(_) => None, + }; + let memory_targets = if clear_memory { + composio_memory_targets_for_connection(config, toolkit.as_deref(), connection_id) + .await + .map_err(|error| { + format!("[composio] delete_connection cannot enumerate memory targets: {error:#}") + })? + } else { + Vec::new() + }; + let mut resp = client.delete_connection(connection_id).await.map_err(|e| { + report_composio_op_error("delete_connection", &e); + format!("[composio] delete_connection failed: {e:#}") + })?; + let mut memory_chunks_deleted = 0; + let mut memory_clear_errors = Vec::new(); + for target in &memory_targets { + match target.delete(config) { + Ok(deleted) => { + memory_chunks_deleted += deleted; + } + Err(error) => { + memory_clear_errors.push(format!( + "[composio] connection deleted, but failed to clear memory chunks for {}: {error:#}", + target.label() + )); + } + } + } + resp.memory_chunks_deleted = memory_chunks_deleted; + if let Some(toolkit) = toolkit.as_deref() { + let deleted = super::super::providers::profile::delete_connected_identity_facets( + toolkit, + connection_id, + ); + tracing::debug!( + toolkit = %toolkit, + connection_id = %connection_id, + facets_deleted = deleted, + "[composio] deleted connected identity facets after connection removal" + ); + if let Err(e) = super::super::providers::profile_md::remove_provider_from_profile_md( + &config.workspace_dir, + toolkit, + connection_id, + ) { + tracing::warn!( + toolkit = %toolkit, + connection_id = %connection_id, + error = %e, + "[composio] PROFILE.md bullet removal failed (non-fatal)" + ); + } + } + match crate::openhuman::memory_sources::registry::remove_composio_source_by_connection_id( + connection_id, + ) + .await + { + Ok(0) => {} + Ok(removed) => tracing::debug!( + connection_id = %connection_id, + removed, + "[composio] pruned memory_sources entry after connection deletion" + ), + Err(e) => tracing::warn!( + connection_id = %connection_id, + error = %e, + "[composio] failed to prune memory_sources entry after connection deletion (non-fatal)" + ), + } + crate::core::event_bus::publish_global( + crate::core::event_bus::DomainEvent::ComposioConnectionDeleted { + toolkit: toolkit.unwrap_or_else(|| "unknown".to_string()), + connection_id: connection_id.to_string(), + }, + ); + invalidate_connected_integrations_cache(); + match fetch_connected_integrations_status(config).await { + FetchConnectedIntegrationsStatus::Authoritative(entries) => { + tracing::debug!( + connection_id = %connection_id, + cached_entries = entries.len(), + "[composio] eagerly warmed integrations cache after connection deletion" + ); + } + FetchConnectedIntegrationsStatus::Unavailable => { + tracing::warn!( + connection_id = %connection_id, + "[composio] eager cache warm after connection deletion skipped: backend unavailable" + ); + } + } + if !memory_clear_errors.is_empty() { + return Err(memory_clear_errors.join("; ")); + } + Ok(RpcOutcome::new( + resp, + vec![format!("composio: connection {connection_id} deleted")], + )) +} + +/// Look up the toolkit slug for an existing connection. +pub(super) async fn resolve_toolkit_for_connection( + client: &ComposioClient, + connection_id: &str, +) -> OpResult { + tracing::debug!(connection_id = %connection_id, "[composio] resolve_toolkit_for_connection"); + let resp = client.list_connections().await.map_err(|e| { + report_composio_op_error("resolve_toolkit_for_connection", &e); + format!("[composio] list_connections failed: {e:#}") + })?; + let conn = resp + .connections + .into_iter() + .find(|c| c.id == connection_id) + .ok_or_else(|| format!("[composio] no connection with id '{connection_id}'"))?; + Ok(conn.toolkit) +} + +/// Enrich each [`ComposioConnectionsResponse`] connection with human-readable +/// identity fields (`account_email`, `workspace`, `username`) from the +/// persisted provider profile cache so the UI picker can show +/// "Gmail · user@example.com" instead of a generic "Account N" label. +/// +/// This is best-effort — no live API calls are made (one SQLite read per poll). +pub(crate) fn enrich_connections_with_identity( + mut resp: ComposioConnectionsResponse, +) -> ComposioConnectionsResponse { + let identities = load_connected_identities(); + if identities.is_empty() { + tracing::debug!( + "[composio] enrich_connections_with_identity: no cached identities yet \ + — picker will fall back to numbered labels until first sync completes" + ); + return resp; + } + + let lookup: HashMap<(String, String), _> = identities + .iter() + .map(|id| { + ( + ( + normalize_connection_identifier(&id.source), + normalize_connection_identifier(&id.identifier), + ), + id, + ) + }) + .collect(); + + tracing::debug!( + total = resp.connections.len(), + cached_identities = identities.len(), + "[composio] enrich_connections_with_identity: enriching connection labels" + ); + + for conn in &mut resp.connections { + if conn.account_email.is_some() || conn.workspace.is_some() || conn.username.is_some() { + continue; + } + let toolkit_key = normalize_connection_identifier(&conn.toolkit); + let conn_id_key = normalize_connection_identifier(&conn.id); + if let Some(identity) = lookup.get(&(toolkit_key, conn_id_key)) { + conn.account_email = identity.email.clone(); + conn.workspace = identity.display_name.clone(); + conn.username = identity.handle.clone(); + tracing::debug!( + toolkit = %conn.toolkit, + connection_id = %conn.id, + has_email = conn.account_email.is_some(), + has_workspace = conn.workspace.is_some(), + has_username = conn.username.is_some(), + "[composio] enrich_connections_with_identity: enriched connection" + ); + } + } + resp +} diff --git a/src/openhuman/composio/ops/direct_mode.rs b/src/openhuman/composio/ops/direct_mode.rs new file mode 100644 index 0000000000..d3f0eb8df7 --- /dev/null +++ b/src/openhuman/composio/ops/direct_mode.rs @@ -0,0 +1,126 @@ +//! Direct mode (BYO API key) ops. + +use crate::openhuman::config::Config; +use crate::rpc::RpcOutcome; + +use super::error_utils::OpResult; + +/// Read the current Composio routing mode and whether a direct-mode API +/// key is stored. **The key itself is never returned** — only a boolean +/// flag so the UI can show a "Connected" / "Not set" status. +pub async fn composio_get_mode(config: &Config) -> OpResult> { + let mode = config.composio.mode.trim().to_string(); + let key_present = crate::openhuman::credentials::get_composio_api_key(config) + .map_err(|e| format!("[composio-direct] get_composio_api_key failed: {e}"))? + .is_some(); + tracing::debug!( + mode = %mode, + key_present = key_present, + "[composio-direct] get_mode" + ); + let payload = serde_json::json!({ + "mode": mode, + "api_key_set": key_present, + }); + Ok(RpcOutcome::new( + payload, + vec![format!( + "composio: mode={mode}, api_key={}", + if key_present { "set" } else { "unset" } + )], + )) +} + +/// Persist a user-provided Composio API key for direct mode and +/// (optionally) flip `config.composio.mode` over to `"direct"`. +/// +/// **Logging redacts the key** — only its length and presence are recorded. +pub async fn composio_set_api_key( + config: &Config, + api_key: &str, + activate_direct: bool, +) -> OpResult> { + let trimmed = api_key.trim(); + if trimmed.is_empty() { + return Err("composio.set_api_key: api_key must not be empty".to_string()); + } + tracing::debug!( + key_len = trimmed.len(), + activate_direct, + "[composio-direct] set_api_key (redacted)" + ); + + crate::openhuman::credentials::store_composio_api_key(config, trimmed) + .await + .map_err(|e| format!("[composio-direct] store_composio_api_key failed: {e}"))?; + + let mode_log = if activate_direct { + let mut cfg_mut = crate::openhuman::config::rpc::load_config_with_timeout() + .await + .map_err(|e| format!("[composio-direct] reload config failed: {e}"))?; + cfg_mut.composio.mode = crate::openhuman::config::schema::COMPOSIO_MODE_DIRECT.into(); + cfg_mut + .save() + .await + .map_err(|e| format!("[composio-direct] save config failed: {e}"))?; + "mode=direct" + } else { + "mode unchanged" + }; + + let effective_mode: String = if activate_direct { + "direct".to_string() + } else { + config.composio.mode.clone() + }; + + crate::core::event_bus::publish_global( + crate::core::event_bus::DomainEvent::ComposioConfigChanged { + mode: effective_mode.clone(), + api_key_set: true, + }, + ); + tracing::debug!( + mode = %effective_mode, + "[composio-cache] published ComposioConfigChanged after set_api_key" + ); + + Ok(RpcOutcome::new( + serde_json::json!({ + "stored": true, + "mode": effective_mode, + }), + vec![format!("composio: api key stored ({mode_log})")], + )) +} + +/// Clear the stored direct-mode API key and reset +/// `config.composio.mode` back to `"backend"`. +pub async fn composio_clear_api_key(config: &Config) -> OpResult> { + tracing::debug!("[composio-direct] clear_api_key"); + crate::openhuman::credentials::clear_composio_api_key(config) + .await + .map_err(|e| format!("[composio-direct] clear_composio_api_key failed: {e}"))?; + + let mut cfg_mut = crate::openhuman::config::rpc::load_config_with_timeout() + .await + .map_err(|e| format!("[composio-direct] reload config failed: {e}"))?; + cfg_mut.composio.mode = crate::openhuman::config::schema::COMPOSIO_MODE_BACKEND.into(); + cfg_mut + .save() + .await + .map_err(|e| format!("[composio-direct] save config failed: {e}"))?; + + crate::core::event_bus::publish_global( + crate::core::event_bus::DomainEvent::ComposioConfigChanged { + mode: "backend".to_string(), + api_key_set: false, + }, + ); + tracing::debug!("[composio-cache] published ComposioConfigChanged after clear_api_key"); + + Ok(RpcOutcome::new( + serde_json::json!({ "cleared": true, "mode": "backend" }), + vec!["composio: api key cleared, mode reset to backend".into()], + )) +} diff --git a/src/openhuman/composio/ops/error_utils.rs b/src/openhuman/composio/ops/error_utils.rs new file mode 100644 index 0000000000..75ebceac11 --- /dev/null +++ b/src/openhuman/composio/ops/error_utils.rs @@ -0,0 +1,138 @@ +//! Shared error helpers for the Composio op layer. + +use crate::openhuman::config::Config; + +use super::super::client::{build_composio_client, ComposioClient}; + +/// Toolkits that honour the `tags` query param on the backend tool-list endpoint. +/// Expand this list when a new toolkit gains tag support. +const TAG_QUERYABLE_TOOLKITS: &[&str] = &["github"]; + +/// Returns `true` when `tags` should be forwarded to the backend. +/// +/// Tags are forwarded when no toolkit filter is active (`None` / empty slice) +/// or when at least one requested toolkit is in [`TAG_QUERYABLE_TOOLKITS`]. +/// This is `pub(crate)` so `tools.rs` can reuse it without duplicating the list. +pub(crate) fn should_forward_tags(toolkits: Option<&[String]>) -> bool { + match toolkits { + None => true, + Some(kits) => { + kits.is_empty() + || kits.iter().any(|k| { + TAG_QUERYABLE_TOOLKITS + .iter() + .any(|t| k.trim().eq_ignore_ascii_case(t)) + }) + } + } +} + +/// Result alias used by every `composio_*` op in this module. +pub(super) type OpResult = std::result::Result; + +/// Resolve a backend-mode [`ComposioClient`] from the root config, or +/// return an error string that the caller can surface over RPC. +pub(crate) fn resolve_client(config: &Config) -> OpResult { + build_composio_client(config).ok_or_else(|| { + "composio unavailable: no backend session token. Sign in first \ + (auth_store_session)." + .to_string() + }) +} + +/// True when the user has selected Composio **direct** mode but has not yet +/// configured an API key (neither in the keychain nor `config.toml`). +/// +/// This is a valid, user-controlled *setup* state — the user just flipped to +/// direct mode and is about to paste their key — NOT an operation failure. +/// Callers short-circuit to an empty result instead of letting the +/// mode-aware factory bail with "composio direct mode selected but no api key +/// is configured", which the desktop UI's 5 s poll would otherwise funnel to +/// Sentry on every tick (TAURI-RUST-R4). +/// +/// Key presence MUST mirror the factory's own resolution in +/// [`create_composio_client`] (`client.rs`): a key counts if it is in the +/// keychain (`credentials::get_composio_api_key`) **or** in `config.toml` +/// (`config.composio.api_key`). Checking only the keychain would wrongly +/// short-circuit to an empty list for a user who configured their key via +/// `config.toml`, hiding their real connections. +pub(crate) fn direct_mode_without_key(config: &Config) -> OpResult { + if config.composio.mode.trim() != crate::openhuman::config::schema::COMPOSIO_MODE_DIRECT { + return Ok(false); + } + let has_key = crate::openhuman::credentials::get_composio_api_key(config) + .map_err(|e| format!("[composio] get_composio_api_key failed: {e}"))? + .or_else(|| { + config + .composio + .api_key + .as_ref() + .map(|k| k.trim().to_string()) + .filter(|k| !k.is_empty()) + }) + .is_some(); + Ok(!has_key) +} + +/// Defense-in-depth Sentry funnel for composio op-layer errors. +/// +/// The shared [`crate::openhuman::integrations::IntegrationClient`] +/// (which fronts every `client.list_*` / `client.execute_tool` / +/// `client.authorize` call) already reports its own failures under +/// `domain="integrations"` with `failure="non_2xx" | "transport"` tags, +/// and the Sentry `before_send` filter (`is_transient_integrations_failure`) +/// drops the transient subset. This helper re-classifies the same +/// anyhow chain at the **op layer** under `domain="composio"` so: +/// +/// 1. Future call sites that bypass `IntegrationClient` still funnel through +/// the same classifier. +/// 2. Op-layer-specific failures get tagged consistently rather than +/// reaching Sentry as bare `Err(String)` returned via RPC. +pub(crate) fn report_composio_op_error(operation: &str, err: &E) { + let rendered = format!("{err:#}"); + let failure_tag = classify_composio_failure_tag(rendered.as_str()); + if failure_tag == "non_2xx" { + if let Some(status) = extract_backend_returned_status(&rendered) { + crate::core::observability::report_error_or_expected( + rendered.as_str(), + "composio", + operation, + &[("failure", failure_tag), ("status", status.as_str())], + ); + return; + } + } + crate::core::observability::report_error_or_expected( + rendered.as_str(), + "composio", + operation, + &[("failure", failure_tag)], + ); +} + +/// Pick the `failure` tag for a composio op-layer error message based on +/// shape inspection. Transport-level reqwest chains tag as `"transport"`; +/// everything else (the dominant `Backend returned …` shape) tags +/// as `"non_2xx"`. +/// +/// Extracted so tests can pin the routing without a Sentry test client. +pub(crate) fn classify_composio_failure_tag(rendered: &str) -> &'static str { + let lower = rendered.to_ascii_lowercase(); + let is_transport = crate::core::observability::contains_transient_transport_phrase(rendered) + || lower.contains("error sending request"); + if is_transport { + "transport" + } else { + "non_2xx" + } +} + +/// Extract the HTTP status code from a `Backend returned ...` +/// rendering produced by the integrations layer. Returns `None` when no +/// numeric status follows the anchor phrase. +pub(crate) fn extract_backend_returned_status(rendered: &str) -> Option { + let lower = rendered.to_ascii_lowercase(); + let rest = lower.split_once("backend returned ")?.1; + let digits: String = rest.chars().take_while(|c| c.is_ascii_digit()).collect(); + (!digits.is_empty()).then_some(digits) +} diff --git a/src/openhuman/composio/ops/execute.rs b/src/openhuman/composio/ops/execute.rs new file mode 100644 index 0000000000..ad3747576e --- /dev/null +++ b/src/openhuman/composio/ops/execute.rs @@ -0,0 +1,68 @@ +//! Tool execution op. + +use crate::openhuman::config::Config; +use crate::rpc::RpcOutcome; + +use super::super::client::create_composio_client; +use super::super::types::ComposioExecuteResponse; +use super::error_utils::{report_composio_op_error, OpResult}; + +pub async fn composio_execute( + config: &Config, + tool: &str, + arguments: Option, +) -> OpResult> { + tracing::debug!(tool = %tool, "[composio] rpc execute"); + let kind = create_composio_client(config).map_err(|e| format!("[composio] execute: {e}"))?; + let started = std::time::Instant::now(); + let result = super::super::execute_dispatch::execute_composio_action_kind( + kind, + tool, + arguments, + &config.composio.entity_id, + ) + .await; + let elapsed_ms = started.elapsed().as_millis() as u64; + + match result { + Ok(resp) => { + crate::core::event_bus::publish_global( + crate::core::event_bus::DomainEvent::ComposioActionExecuted { + tool: tool.to_string(), + success: resp.successful, + error: resp.error.clone(), + cost_usd: resp.cost_usd, + elapsed_ms, + }, + ); + Ok(RpcOutcome::new( + resp, + vec![format!("composio: executed {tool} ({elapsed_ms}ms)")], + )) + } + Err(e) => { + crate::core::event_bus::publish_global( + crate::core::event_bus::DomainEvent::ComposioActionExecuted { + tool: tool.to_string(), + success: false, + error: Some(e.to_string()), + cost_usd: 0.0, + elapsed_ms, + }, + ); + report_composio_op_error("execute", &e); + let is_classified = e.starts_with("[composio:error:"); + tracing::debug!( + tool = %tool, + elapsed_ms, + classified = is_classified, + "[composio] rpc execute error mapped" + ); + if is_classified { + Err(e) + } else { + Err(format!("[composio] execute failed: {e}")) + } + } + } +} diff --git a/src/openhuman/composio/ops/memory_cleanup.rs b/src/openhuman/composio/ops/memory_cleanup.rs new file mode 100644 index 0000000000..18ffd84b0b --- /dev/null +++ b/src/openhuman/composio/ops/memory_cleanup.rs @@ -0,0 +1,152 @@ +//! Memory cleanup helpers used when deleting a Composio connection. + +use std::sync::Arc; + +use crate::openhuman::config::Config; +use crate::openhuman::memory::MemoryClient; +use crate::openhuman::memory_store::chunks::store as memory_tree_store; +use crate::openhuman::memory_store::chunks::types::SourceKind; + +use super::super::providers::sync_state::SyncState; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub(crate) enum MemoryCleanupTarget { + Exact(SourceKind, String), + Prefix(SourceKind, String), + Owner(SourceKind, String), +} + +impl MemoryCleanupTarget { + pub(super) fn delete(&self, config: &Config) -> anyhow::Result { + match self { + Self::Exact(source_kind, source_id) => { + memory_tree_store::delete_chunks_by_source(config, *source_kind, source_id) + } + Self::Prefix(source_kind, source_id_prefix) => { + memory_tree_store::delete_chunks_by_source_prefix( + config, + *source_kind, + source_id_prefix, + ) + } + Self::Owner(source_kind, owner) => { + memory_tree_store::delete_chunks_by_owner(config, *source_kind, owner) + } + } + } + + pub(super) fn label(&self) -> String { + match self { + Self::Exact(source_kind, source_id) => { + format!("{}:{source_id}", source_kind.as_str()) + } + Self::Prefix(source_kind, source_id_prefix) => { + format!("{}:{source_id_prefix}*", source_kind.as_str()) + } + Self::Owner(source_kind, owner) => { + format!("{}:owner:{owner}", source_kind.as_str()) + } + } + } +} + +pub(crate) async fn composio_memory_targets_for_connection( + config: &Config, + toolkit: Option<&str>, + connection_id: &str, +) -> anyhow::Result> { + let Some(toolkit) = toolkit.map(str::trim).filter(|s| !s.is_empty()) else { + return Ok(Vec::new()); + }; + + let targets = match toolkit.to_ascii_lowercase().as_str() { + "slack" => vec![MemoryCleanupTarget::Exact( + SourceKind::Chat, + format!("slack:{connection_id}"), + )], + "gmail" => gmail_memory_sources_for_connection(connection_id), + "notion" => notion_memory_targets_for_connection(config, connection_id).await?, + "drive" | "googledrive" | "google_drive" => { + drive_memory_targets_for_connection(connection_id) + } + _ => Vec::new(), + }; + Ok(targets) +} + +fn gmail_memory_sources_for_connection(connection_id: &str) -> Vec { + vec![ + MemoryCleanupTarget::Owner(SourceKind::Email, format!("gmail-sync:{connection_id}")), + MemoryCleanupTarget::Exact(SourceKind::Email, format!("gmail:{connection_id}")), + MemoryCleanupTarget::Prefix(SourceKind::Email, format!("gmail:{connection_id}:")), + MemoryCleanupTarget::Prefix(SourceKind::Email, format!("gmail:{connection_id}/")), + ] +} + +async fn notion_memory_targets_for_connection( + config: &Config, + connection_id: &str, +) -> anyhow::Result> { + let mut targets = connection_scoped_document_targets("notion", connection_id); + + let memory = Arc::new( + MemoryClient::from_workspace_dir(config.workspace_dir.clone()).map_err(|error| { + anyhow::anyhow!( + "failed to open memory client for notion cleanup target discovery: {error}" + ) + })?, + ); + let state = SyncState::load(&memory, "notion", connection_id) + .await + .map_err(|error| { + anyhow::anyhow!("failed to load notion sync state for memory cleanup: {error}") + })?; + for raw_id in state.synced_ids { + let Some(page_id) = notion_synced_page_id(&raw_id) else { + continue; + }; + targets.push(MemoryCleanupTarget::Exact( + SourceKind::Document, + format!("notion:{page_id}"), + )); + targets.push(MemoryCleanupTarget::Exact( + SourceKind::Document, + format!("composio-notion-page-{page_id}"), + )); + } + + Ok(dedupe_memory_targets(targets)) +} + +fn drive_memory_targets_for_connection(connection_id: &str) -> Vec { + ["drive", "googledrive", "google_drive"] + .into_iter() + .flat_map(|prefix| connection_scoped_document_targets(prefix, connection_id)) + .collect() +} + +fn connection_scoped_document_targets( + prefix: &str, + connection_id: &str, +) -> Vec { + vec![ + MemoryCleanupTarget::Exact(SourceKind::Document, format!("{prefix}:{connection_id}")), + MemoryCleanupTarget::Prefix(SourceKind::Document, format!("{prefix}:{connection_id}:")), + MemoryCleanupTarget::Prefix(SourceKind::Document, format!("{prefix}:{connection_id}/")), + ] +} + +fn notion_synced_page_id(raw_id: &str) -> Option { + let page_id = raw_id.split_once('@').map_or(raw_id, |(id, _)| id).trim(); + (!page_id.is_empty()).then(|| page_id.to_string()) +} + +fn dedupe_memory_targets(targets: Vec) -> Vec { + let mut unique = Vec::new(); + for target in targets { + if !unique.contains(&target) { + unique.push(target); + } + } + unique +} diff --git a/src/openhuman/composio/ops/mod.rs b/src/openhuman/composio/ops/mod.rs new file mode 100644 index 0000000000..7d8ee86c73 --- /dev/null +++ b/src/openhuman/composio/ops/mod.rs @@ -0,0 +1,104 @@ +//! RPC-facing operations for the Composio domain. +//! +//! Each `composio_*` function wraps a [`ComposioClient`] call, translates +//! errors to strings, and returns an [`RpcOutcome`] so the controller +//! schemas can log a user-visible line. The handlers in [`super::schemas`] +//! call into these. +//! +//! These ops are also callable directly from other domains (e.g. the +//! agent harness) when they need composio data at runtime. +//! +//! ## Module layout +//! +//! | Sub-module | Contents | +//! |-------------------|--------------------------------------------------------------------| +//! | `error_utils` | `OpResult`, `resolve_client`, `report_composio_op_error`, helpers | +//! | `toolkits` | `composio_list_toolkits`, `composio_list_capabilities`, ... | +//! | `connections` | `composio_list_connections`, `composio_authorize`, `_delete_...` | +//! | `memory_cleanup` | Memory-cleanup helpers for connection deletion | +//! | `tools_ops` | `composio_list_tools` | +//! | `execute` | `composio_execute` | +//! | `triggers` | GitHub repos + trigger CRUD + trigger history | +//! | `providers_ops` | `composio_get_user_profile`, `_refresh_...`, `composio_sync` | +//! | `direct_mode` | `composio_get_mode`, `composio_set_api_key`, `_clear_...` | + +mod connections; +mod direct_mode; +mod error_utils; +mod execute; +mod memory_cleanup; +mod providers_ops; +mod toolkits; +mod tools_ops; +mod triggers; + +// ── Public re-exports (match original ops.rs public surface) ─────────────── + +pub use connections::{composio_authorize, composio_delete_connection, composio_list_connections}; +pub use direct_mode::{composio_clear_api_key, composio_get_mode, composio_set_api_key}; +pub(crate) use error_utils::{report_composio_op_error, should_forward_tags}; +pub use execute::composio_execute; +pub use providers_ops::{ + composio_get_user_profile, composio_refresh_all_identities, composio_sync, + RefreshIdentitiesReport, +}; +pub use toolkits::{ + composio_list_agent_ready_toolkits, composio_list_capabilities, composio_list_toolkits, +}; +pub use tools_ops::composio_list_tools; +pub use triggers::{ + composio_create_trigger, composio_disable_trigger, composio_enable_trigger, + composio_list_available_triggers, composio_list_github_repos, composio_list_trigger_history, + composio_list_triggers, +}; + +// ── Re-export connected_integrations public items ────────────────────────── +// (originally at the bottom of ops.rs) + +pub use super::connected_integrations::{ + cached_active_integrations, connected_set_hash, fetch_connected_integrations, + fetch_connected_integrations_status, fetch_toolkit_actions, + invalidate_connected_integrations_cache, FetchConnectedIntegrationsStatus, +}; + +// ── Type aliases re-exported for callers ────────────────────────────────── + +pub use super::types::{ComposioConnection as Connection, ComposioToolSchema as ToolSchemaType}; + +// ── Test-only re-exports (pub(crate) to match original visibility) ───────── + +#[cfg(test)] +pub(crate) use super::connected_integrations::cache_key; +#[cfg(test)] +pub(crate) use super::connected_integrations::{CachedIntegrations, CACHE_TTL, INTEGRATIONS_CACHE}; +#[cfg(test)] +pub(crate) use crate::openhuman::context::prompt::ConnectedIntegration; +#[cfg(test)] +pub(crate) use std::time::{Duration, Instant}; + +// Private items needed by the test module via `use super::*` +#[cfg(test)] +pub(crate) use super::connected_integrations::sync_cache_with_connections; +#[cfg(test)] +pub(crate) use crate::openhuman::config::Config; +#[cfg(test)] +pub(crate) use crate::openhuman::memory_store::MemoryClient; +#[cfg(test)] +pub(crate) use crate::openhuman::memory_sync::composio::providers::sync_state::SyncState; +#[cfg(test)] +pub(crate) use crate::openhuman::memory_sync::composio::providers::SyncReason; +#[cfg(test)] +pub(crate) use connections::enrich_connections_with_identity; +#[cfg(test)] +pub(crate) use error_utils::{ + classify_composio_failure_tag, direct_mode_without_key, extract_backend_returned_status, + resolve_client, +}; +#[cfg(test)] +pub(crate) use memory_cleanup::{composio_memory_targets_for_connection, MemoryCleanupTarget}; +#[cfg(test)] +pub(crate) use providers_ops::parse_sync_reason; + +#[cfg(test)] +#[path = "../ops_tests.rs"] +mod tests; diff --git a/src/openhuman/composio/ops/providers_ops.rs b/src/openhuman/composio/ops/providers_ops.rs new file mode 100644 index 0000000000..42aa4590a0 --- /dev/null +++ b/src/openhuman/composio/ops/providers_ops.rs @@ -0,0 +1,252 @@ +//! Provider-backed ops: profile fetch, identity refresh, and sync. + +use std::sync::Arc; + +use crate::openhuman::config::Config; +use crate::rpc::RpcOutcome; + +use super::super::providers::{ + get_provider, ProviderContext, ProviderUserProfile, SyncOutcome, SyncReason, +}; +use super::connections::resolve_toolkit_for_connection; +use super::error_utils::{report_composio_op_error, resolve_client, OpResult}; + +/// Aggregate result of [`composio_refresh_all_identities`]. +#[derive(Debug, Default, Clone, serde::Serialize, serde::Deserialize)] +pub struct RefreshIdentitiesReport { + pub refreshed: usize, + pub failed: usize, + pub skipped_no_provider: usize, + pub skipped_inactive: usize, + pub rows_written: usize, +} + +/// `openhuman.composio_get_user_profile` — fetch a normalized user +/// profile for a connected account by dispatching to the toolkit's +/// registered [`super::super::providers::ComposioProvider`]. +pub async fn composio_get_user_profile( + config: &Config, + connection_id: &str, +) -> OpResult> { + tracing::debug!(connection_id = %connection_id, "[composio] rpc get_user_profile"); + let client = resolve_client(config)?; + let toolkit = resolve_toolkit_for_connection(&client, connection_id).await?; + + let provider = get_provider(&toolkit).ok_or_else(|| { + format!("[composio] no native provider registered for toolkit '{toolkit}'") + })?; + + let _ = client; + let ctx = ProviderContext { + config: Arc::new(config.clone()), + toolkit: toolkit.clone(), + connection_id: Some(connection_id.to_string()), + usage: Default::default(), + max_items: None, + sync_depth_days: None, + }; + + let profile = provider.fetch_user_profile(&ctx).await.map_err(|e| { + report_composio_op_error("get_user_profile", &e); + format!("[composio] get_user_profile({toolkit}) failed: {e}") + })?; + + let facets = provider.identity_set(&profile); + tracing::debug!( + toolkit = %toolkit, + facets_written = facets, + "[composio] identity_set persisted profile facets from get_user_profile" + ); + + Ok(RpcOutcome::new( + profile, + vec![format!( + "composio: fetched {toolkit} profile for connection {connection_id}" + )], + )) +} + +/// `openhuman.composio_refresh_all_identities` — re-fetch the user +/// profile for every active connection and persist via `identity_set`. +/// Used to populate kind-tagged `user_profile` rows on existing +/// connections after the #1365 schema rewrite without waiting for the +/// next periodic sync tick. +/// +/// Best-effort per connection: a failure on one toolkit does not abort +/// the others. Returns aggregate counts plus a per-connection trail in +/// the envelope messages. +pub async fn composio_refresh_all_identities( + config: &Config, +) -> OpResult> { + tracing::info!("[composio] rpc refresh_all_identities"); + let client = resolve_client(config)?; + let conns = client.list_connections().await.map_err(|e| { + report_composio_op_error("refresh_all_identities", &e); + format!("[composio] list_connections failed: {e:#}") + })?; + + let mut report = RefreshIdentitiesReport::default(); + let mut messages: Vec = Vec::with_capacity(conns.connections.len() + 1); + + for conn in conns.connections { + if !conn.is_active() { + report.skipped_inactive += 1; + continue; + } + let toolkit = conn.toolkit.clone(); + let connection_id = conn.id.clone(); + + let Some(provider) = get_provider(&toolkit) else { + tracing::debug!( + toolkit = %toolkit, + connection_id = %connection_id, + "[composio] refresh_all_identities: no native provider — skipping" + ); + report.skipped_no_provider += 1; + messages.push(format!( + "{toolkit}/{connection_id}: skipped (no native provider)" + )); + continue; + }; + + let ctx = ProviderContext { + config: Arc::new(config.clone()), + toolkit: toolkit.clone(), + connection_id: Some(connection_id.clone()), + usage: Default::default(), + max_items: None, + sync_depth_days: None, + }; + + match provider.fetch_user_profile(&ctx).await { + Ok(profile) => { + let rows = provider.identity_set(&profile); + report.refreshed += 1; + report.rows_written += rows; + tracing::debug!( + toolkit = %toolkit, + connection_id = %connection_id, + rows_written = rows, + "[composio] refresh_all_identities: identity_set ok" + ); + messages.push(format!("{toolkit}/{connection_id}: {rows} row(s)")); + } + Err(e) => { + report.failed += 1; + tracing::warn!( + toolkit = %toolkit, + connection_id = %connection_id, + error = %e, + "[composio] refresh_all_identities: fetch_user_profile failed" + ); + messages.push(format!("{toolkit}/{connection_id}: ERROR — {e}")); + } + } + } + + let summary = format!( + "composio: refreshed {ok}/{tried} active conn(s) — {rows} rows; \ + {fail} failed, {nopv} skipped (no provider), {inact} inactive", + ok = report.refreshed, + tried = report.refreshed + report.failed + report.skipped_no_provider, + rows = report.rows_written, + fail = report.failed, + nopv = report.skipped_no_provider, + inact = report.skipped_inactive, + ); + let mut envelope = vec![summary]; + envelope.extend(messages); + Ok(RpcOutcome::new(report, envelope)) +} + +/// `openhuman.composio_sync` — run a sync pass for a connected account +/// by dispatching to the toolkit's registered provider. `reason` is +/// `"manual"` by default; the periodic scheduler passes `"periodic"` +/// and the OAuth event subscriber passes `"connection_created"`. +pub async fn composio_sync( + config: &Config, + connection_id: &str, + reason: Option, +) -> OpResult> { + let reason = parse_sync_reason(reason.as_deref())?; + tracing::debug!( + connection_id = %connection_id, + reason = reason.as_str(), + "[composio] rpc sync (spawned)" + ); + let client = resolve_client(config)?; + let toolkit = resolve_toolkit_for_connection(&client, connection_id).await?; + let provider = get_provider(&toolkit).ok_or_else(|| { + format!("[composio] no native provider registered for toolkit '{toolkit}'") + })?; + let _ = client; + + let ctx = ProviderContext { + config: Arc::new(config.clone()), + toolkit: toolkit.clone(), + connection_id: Some(connection_id.to_string()), + usage: Default::default(), + max_items: None, + sync_depth_days: None, + }; + let started_at_ms = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.as_millis() as u64) + .unwrap_or(0); + let toolkit_for_outcome = toolkit.clone(); + let connection_id_for_log = connection_id.to_string(); + + tokio::spawn(async move { + let toolkit_in_task = ctx.toolkit.clone(); + match provider.sync(&ctx, reason).await { + Ok(out) => { + tracing::info!( + toolkit = %toolkit_in_task, + connection_id = %connection_id_for_log, + items_ingested = out.items_ingested, + elapsed_ms = out.elapsed_ms(), + "[composio] background sync ok" + ); + } + Err(e) => { + report_composio_op_error("sync", &e); + tracing::warn!( + toolkit = %toolkit_in_task, + connection_id = %connection_id_for_log, + error = %e, + "[composio] background sync failed" + ); + } + } + }); + + let summary = format!("composio: {toolkit_for_outcome} sync started (background)"); + let outcome = SyncOutcome { + toolkit: toolkit_for_outcome, + connection_id: Some(connection_id.to_string()), + reason: reason.as_str().to_string(), + items_ingested: 0, + started_at_ms, + finished_at_ms: 0, + summary: summary.clone(), + details: serde_json::json!({ "status": "started" }), + }; + Ok(RpcOutcome::new(outcome, vec![summary])) +} + +/// Parse the optional `reason` parameter into a [`SyncReason`]. +/// +/// `None` and the explicit `"manual"` value both map to +/// [`SyncReason::Manual`]. Any other unrecognized string is rejected +/// with a clear error so a typo in a caller surfaces at the RPC boundary. +pub(crate) fn parse_sync_reason(raw: Option<&str>) -> OpResult { + match raw { + None | Some("manual") => Ok(SyncReason::Manual), + Some("periodic") => Ok(SyncReason::Periodic), + Some("connection_created") => Ok(SyncReason::ConnectionCreated), + Some(other) => Err(format!( + "[composio] unrecognized sync reason '{other}': expected one of \ + 'manual', 'periodic', 'connection_created'" + )), + } +} diff --git a/src/openhuman/composio/ops/toolkits.rs b/src/openhuman/composio/ops/toolkits.rs new file mode 100644 index 0000000000..86c3df0bd8 --- /dev/null +++ b/src/openhuman/composio/ops/toolkits.rs @@ -0,0 +1,80 @@ +//! Toolkit and capability listing ops. + +use crate::openhuman::config::Config; +use crate::rpc::RpcOutcome; + +use super::super::client::{create_composio_client, ComposioClientKind}; +use super::super::providers::{agent_ready_toolkits, capability_matrix}; +use super::super::types::{ComposioCapabilitiesResponse, ComposioToolkitsResponse}; +use super::error_utils::{report_composio_op_error, OpResult}; + +pub async fn composio_list_toolkits( + config: &Config, +) -> OpResult> { + tracing::debug!("[composio] rpc list_toolkits"); + let kind = + create_composio_client(config).map_err(|e| format!("[composio] list_toolkits: {e}"))?; + match kind { + ComposioClientKind::Backend(client) => { + tracing::debug!("[composio] list_toolkits: backend variant"); + let resp = client.list_toolkits().await.map_err(|e| { + report_composio_op_error("list_toolkits", &e); + format!("[composio] list_toolkits failed: {e:#}") + })?; + let count = resp.toolkits.len(); + Ok(RpcOutcome::new( + resp, + vec![format!("composio: {count} toolkit(s) enabled")], + )) + } + ComposioClientKind::Direct(_) => { + tracing::info!( + "[composio-direct] list_toolkits: direct mode active — no \ + server-side allowlist is enforced; returning empty toolkits \ + list. Users manage available toolkits via app.composio.dev." + ); + Ok(RpcOutcome::new( + ComposioToolkitsResponse::default(), + vec!["composio: direct mode — no curated allowlist (toolkits \ + managed via app.composio.dev)" + .to_string()], + )) + } + } +} + +pub async fn composio_list_capabilities( + _config: &Config, +) -> OpResult> { + tracing::debug!("[composio] rpc list_capabilities"); + let resp = ComposioCapabilitiesResponse { + capabilities: capability_matrix(), + }; + let count = resp.capabilities.len(); + Ok(RpcOutcome::new( + resp, + vec![format!("composio: {count} capability row(s) listed")], + )) +} + +/// List every toolkit slug that ships an agent-ready curated catalog. +/// +/// Connected toolkits that are NOT in this list can still be +/// authorized via OAuth, but the agent has no curated action surface +/// for them — the UI should label such connections as +/// "preview / agent integration coming soon" so users aren't led into +/// a broken `composio_list_tools` → max-iterations loop. See #2283. +pub async fn composio_list_agent_ready_toolkits( +) -> OpResult> { + tracing::debug!("[composio] rpc list_agent_ready_toolkits"); + let toolkits: Vec = agent_ready_toolkits() + .into_iter() + .map(|s| s.to_string()) + .collect(); + let count = toolkits.len(); + let resp = super::super::types::ComposioAgentReadyToolkitsResponse { toolkits }; + Ok(RpcOutcome::new( + resp, + vec![format!("composio: {count} agent-ready toolkit(s) listed")], + )) +} diff --git a/src/openhuman/composio/ops/tools_ops.rs b/src/openhuman/composio/ops/tools_ops.rs new file mode 100644 index 0000000000..fee92360af --- /dev/null +++ b/src/openhuman/composio/ops/tools_ops.rs @@ -0,0 +1,145 @@ +//! Tool listing ops. + +use crate::openhuman::config::Config; +use crate::rpc::RpcOutcome; + +use super::super::client::{ + create_composio_client, direct_list_connections, direct_list_tools, ComposioClientKind, +}; +use super::super::types::ComposioToolsResponse; +use super::error_utils::{report_composio_op_error, should_forward_tags, OpResult}; + +pub async fn composio_list_tools( + config: &Config, + toolkits: Option>, + tags: Option>, +) -> OpResult> { + let effective_tags = if should_forward_tags(toolkits.as_deref()) { + tags + } else { + None + }; + tracing::debug!(?toolkits, ?effective_tags, "[composio] rpc list_tools"); + let kind = create_composio_client(config).map_err(|e| format!("[composio] list_tools: {e}"))?; + match kind { + ComposioClientKind::Backend(client) => { + tracing::debug!("[composio] list_tools: backend variant"); + let resp = client + .list_tools(toolkits.as_deref(), effective_tags.as_deref()) + .await + .map_err(|e| { + report_composio_op_error("list_tools", &e); + format!("[composio] list_tools failed: {e:#}") + })?; + let count = resp.tools.len(); + Ok(RpcOutcome::new( + resp, + vec![format!("composio: {count} tool(s) listed")], + )) + } + ComposioClientKind::Direct(direct) => { + let scope: Vec = match toolkits { + Some(list) if !list.is_empty() => list, + _ => { + let conns = direct_list_connections(&direct).await.map_err(|e| { + let rendered = format!( + "[composio-direct] list_tools: prefetch connections failed: {e:#}" + ); + report_composio_op_error("list_connections", &rendered); + rendered + })?; + let mut v: Vec = conns + .connections + .iter() + .filter(|c| c.is_active()) + .map(|c| c.normalized_toolkit()) + .filter(|t| !t.is_empty()) + .collect(); + v.sort(); + v.dedup(); + v + } + }; + if scope.is_empty() { + tracing::info!( + "[composio-direct] list_tools: no connected toolkits on this tenant — \ + returning empty tool list" + ); + return Ok(RpcOutcome::new( + ComposioToolsResponse::default(), + vec!["composio: direct mode — 0 tool(s) listed (no connected \ + toolkits on this tenant)" + .to_string()], + )); + } + tracing::debug!( + toolkits = scope.len(), + ?effective_tags, + "[composio-direct] list_tools: fetching v3 tool schemas" + ); + let mut resp = direct_list_tools(&direct, &scope, effective_tags.as_deref()) + .await + .map_err(|e| { + let rendered = format!("[composio-direct] list_tools failed: {e:#}"); + report_composio_op_error("list_tools", &rendered); + rendered + })?; + let before = resp.tools.len(); + filter_list_tools_response_for_direct(&mut resp).await; + let after = resp.tools.len(); + tracing::debug!( + before, + after, + dropped = before - after, + "[composio-direct] list_tools: curated filter applied" + ); + let count = resp.tools.len(); + Ok(RpcOutcome::new( + resp, + vec![format!( + "composio: direct mode — {count} tool(s) listed across \ + {} toolkit(s)", + scope.len() + )], + )) + } + } +} + +/// Apply OpenHuman's curated-whitelist + user-scope visibility filter to +/// a fresh `ComposioToolsResponse` in direct mode. Mirrors the per-call +/// filter loop in `tools.rs::filter_list_tools_response` so backend and +/// direct surfaces share the same safety net. +async fn filter_list_tools_response_for_direct(resp: &mut ComposioToolsResponse) { + use super::super::providers::{ + catalog_for_toolkit, classify_unknown, find_curated, get_provider, + load_user_scope_or_default, toolkit_from_slug, + }; + + let mut keep: Vec = Vec::with_capacity(resp.tools.len()); + for t in &resp.tools { + let slug = &t.function.name; + let Some(toolkit) = toolkit_from_slug(slug) else { + keep.push(true); + continue; + }; + let pref = load_user_scope_or_default(&toolkit).await; + let catalog = get_provider(&toolkit) + .and_then(|p| p.curated_tools()) + .or_else(|| catalog_for_toolkit(&toolkit)); + let allowed = match catalog { + Some(cat) => match find_curated(cat, slug) { + Some(curated) => pref.allows(curated.scope), + None => false, + }, + None => pref.allows(classify_unknown(slug)), + }; + keep.push(allowed); + } + let drained: Vec<_> = resp.tools.drain(..).collect(); + resp.tools = drained + .into_iter() + .zip(keep) + .filter_map(|(tool, keep_it)| if keep_it { Some(tool) } else { None }) + .collect(); +} diff --git a/src/openhuman/composio/ops/triggers.rs b/src/openhuman/composio/ops/triggers.rs new file mode 100644 index 0000000000..f26c3c3285 --- /dev/null +++ b/src/openhuman/composio/ops/triggers.rs @@ -0,0 +1,183 @@ +//! GitHub repo listing and trigger management ops. + +use crate::openhuman::config::Config; +use crate::rpc::RpcOutcome; + +use super::super::types::{ + ComposioActiveTriggersResponse, ComposioAvailableTriggersResponse, + ComposioCreateTriggerResponse, ComposioDisableTriggerResponse, ComposioEnableTriggerResponse, + ComposioGithubReposResponse, ComposioTriggerHistoryResult, +}; +use super::error_utils::{report_composio_op_error, resolve_client, OpResult}; + +pub async fn composio_list_github_repos( + config: &Config, + connection_id: Option, +) -> OpResult> { + tracing::debug!(?connection_id, "[composio] rpc list_github_repos"); + let client = resolve_client(config)?; + let resp = client + .list_github_repos(connection_id.as_deref()) + .await + .map_err(|e| { + report_composio_op_error("list_github_repos", &e); + format!("[composio] list_github_repos failed: {e:#}") + })?; + let count = resp.repositories.len(); + let connection_id = resp.connection_id.clone(); + Ok(RpcOutcome::new( + resp, + vec![format!( + "composio: {count} github repo(s) listed for connection {connection_id}" + )], + )) +} + +pub async fn composio_create_trigger( + config: &Config, + slug: &str, + connection_id: Option, + trigger_config: Option, +) -> OpResult> { + tracing::debug!(slug = %slug, ?connection_id, "[composio] rpc create_trigger"); + let client = resolve_client(config)?; + let resp = client + .create_trigger(slug, connection_id.as_deref(), trigger_config) + .await + .map_err(|e| { + report_composio_op_error("create_trigger", &e); + format!("[composio] create_trigger failed: {e:#}") + })?; + let trigger_id = resp.trigger_id.clone(); + Ok(RpcOutcome::new( + resp, + vec![format!( + "composio: trigger {trigger_id} created for slug {slug}" + )], + )) +} + +pub async fn composio_list_available_triggers( + config: &Config, + toolkit: &str, + connection_id: Option, +) -> OpResult> { + tracing::debug!(toolkit = %toolkit, ?connection_id, "[composio] rpc list_available_triggers"); + let client = resolve_client(config)?; + let resp = client + .list_available_triggers(toolkit, connection_id.as_deref()) + .await + .map_err(|e| { + report_composio_op_error("list_available_triggers", &e); + format!("[composio] list_available_triggers failed: {e:#}") + })?; + let count = resp.triggers.len(); + Ok(RpcOutcome::new( + resp, + vec![format!( + "composio: {count} available trigger(s) for toolkit {toolkit}" + )], + )) +} + +pub async fn composio_list_triggers( + config: &Config, + toolkit: Option, +) -> OpResult> { + tracing::debug!(?toolkit, "[composio] rpc list_triggers"); + let client = resolve_client(config)?; + let resp = client + .list_active_triggers(toolkit.as_deref()) + .await + .map_err(|e| { + report_composio_op_error("list_triggers", &e); + format!("[composio] list_triggers failed: {e:#}") + })?; + let count = resp.triggers.len(); + Ok(RpcOutcome::new( + resp, + vec![format!("composio: {count} active trigger(s) listed")], + )) +} + +pub async fn composio_enable_trigger( + config: &Config, + connection_id: &str, + slug: &str, + trigger_config: Option, +) -> OpResult> { + tracing::debug!(slug = %slug, connection_id = %connection_id, "[composio] rpc enable_trigger"); + let client = resolve_client(config)?; + let resp = client + .enable_trigger(connection_id, slug, trigger_config) + .await + .map_err(|e| { + report_composio_op_error("enable_trigger", &e); + let raw = format!("{e:#}"); + let class = super::super::error_mapping::classify_composio_error(slug, &raw); + let mapped = super::super::error_mapping::format_provider_error(slug, &raw); + tracing::warn!( + slug = %slug, + connection_id = %connection_id, + class = class.as_str(), + "[composio] enable_trigger failed; surfacing mapped error" + ); + mapped + })?; + let trigger_id = resp.trigger_id.clone(); + Ok(RpcOutcome::new( + resp, + vec![format!("composio: enabled trigger {slug} → {trigger_id}")], + )) +} + +pub async fn composio_disable_trigger( + config: &Config, + trigger_id: &str, +) -> OpResult> { + tracing::debug!(trigger_id = %trigger_id, "[composio] rpc disable_trigger"); + let client = resolve_client(config)?; + let resp = client.disable_trigger(trigger_id).await.map_err(|e| { + report_composio_op_error("disable_trigger", &e); + format!("[composio] disable_trigger failed: {e:#}") + })?; + let message = if resp.deleted { + format!("composio: disabled trigger {trigger_id}") + } else { + format!("composio: trigger {trigger_id} was not active") + }; + Ok(RpcOutcome::new(resp, vec![message])) +} + +pub async fn composio_list_trigger_history( + config: &Config, + limit: Option, +) -> OpResult> { + let requested_limit = limit.unwrap_or(100).clamp(1, 500); + let workspace_label = config + .workspace_dir + .file_name() + .and_then(|value| value.to_str()) + .unwrap_or(""); + tracing::debug!( + limit = requested_limit, + workspace = workspace_label, + "[composio] rpc list_trigger_history" + ); + + let store = super::super::trigger_history::global().ok_or_else(|| { + "[composio] trigger history unavailable: archive store is not initialized".to_string() + })?; + + let history = store + .list_recent(requested_limit) + .map_err(|error| format!("[composio] list_trigger_history failed: {error}"))?; + let count = history.entries.len(); + + Ok(RpcOutcome::new( + history, + vec![format!( + "composio: {count} trigger history entrie(s) loaded (archive present)" + )], + )) +} diff --git a/src/openhuman/config/ops.rs b/src/openhuman/config/ops.rs deleted file mode 100644 index 047504206e..0000000000 --- a/src/openhuman/config/ops.rs +++ /dev/null @@ -1,2515 +0,0 @@ -//! JSON-RPC / CLI controller surface for persisted config and runtime flags. - -use std::path::{Path, PathBuf}; - -use serde::Serialize; -use serde_json::json; - -use crate::openhuman::config::Config; -use crate::openhuman::screen_intelligence; -use crate::rpc::RpcOutcome; - -/// Checks if an environment variable flag is enabled (e.g., "1", "true", "yes"). -fn env_flag_enabled(key: &str) -> bool { - matches!( - std::env::var(key).ok().as_deref(), - Some("1") | Some("true") | Some("TRUE") | Some("yes") | Some("YES") - ) -} - -/// Returns the core RPC URL from environment variables or a default value. -pub fn core_rpc_url_from_env() -> String { - std::env::var("OPENHUMAN_CORE_RPC_URL") - .unwrap_or_else(|_| "http://127.0.0.1:7788/rpc".to_string()) -} - -const CONFIG_LOAD_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(30); - -/// Loads persisted config with a 30s timeout. -/// -/// This is used by JSON-RPC and CLI handlers to ensure they don't hang -/// indefinitely if disk I/O is blocked. -/// -/// The TOML parse itself runs on the blocking pool via -/// `parse_config_with_recovery` (see `src/openhuman/config/schema/load.rs`) -/// so the recursive-descent parser's serde Visitor frames don't compound -/// with whatever deep async tower called us. That's the stack-overflow -/// fix from `crahs.log` (2026-05-17); a per-call cache here would shave -/// the disk read on hot paths but proved racy across the in-process -/// integration tests (re-used workspace paths, concurrent server tasks -/// loading mid-mutation), so it isn't worth it. -pub async fn load_config_with_timeout() -> Result { - match tokio::time::timeout(CONFIG_LOAD_TIMEOUT, Config::load_or_init()).await { - Ok(Ok(mut config)) => { - normalize_loaded_config(&mut config).await; - Ok(config) - } - Ok(Err(e)) => Err(e.to_string()), - Err(_) => Err("Config loading timed out".to_string()), - } -} - -/// Reloads the config file represented by an existing runtime snapshot. -/// -/// Use this for long-lived objects that need fresh config values while -/// staying anchored to their original user/workspace. Unlike -/// [`load_config_with_timeout`], this does not re-resolve the process-global -/// `OPENHUMAN_WORKSPACE` env var on every call. -pub async fn reload_config_snapshot_with_timeout(snapshot: &Config) -> Result { - match tokio::time::timeout( - CONFIG_LOAD_TIMEOUT, - Config::load_from_config_path(&snapshot.config_path, &snapshot.workspace_dir), - ) - .await - { - Ok(Ok(mut config)) => { - normalize_loaded_config(&mut config).await; - Ok(config) - } - Ok(Err(e)) => Err(e.to_string()), - Err(_) => Err("Config loading timed out".to_string()), - } -} - -async fn normalize_loaded_config(_config: &mut Config) { - // No-op: welcome-agent routing normalization removed. The welcome agent - // has been deleted; all chat turns route directly to the orchestrator. - // The `chat_onboarding_completed` field in Config is retained for - // backward-compatible deserialization of existing config.toml files - // but is no longer read by routing logic. -} - -/// Returns the default workspace directory fallback (~/.openhuman/workspace). -fn fallback_workspace_dir() -> PathBuf { - crate::openhuman::config::default_root_openhuman_dir() - .unwrap_or_else(|_| env_scoped_fallback_root_dir()) - .join("workspace") -} - -/// Returns the default OpenHuman configuration directory (~/.openhuman). -fn default_openhuman_dir() -> PathBuf { - crate::openhuman::config::default_root_openhuman_dir() - .unwrap_or_else(|_| env_scoped_fallback_root_dir()) -} - -fn env_scoped_fallback_root_dir() -> PathBuf { - let suffix = if crate::api::config::is_staging_app_env( - crate::api::config::app_env_from_env().as_deref(), - ) { - "-staging" - } else { - "" - }; - PathBuf::from(format!(".openhuman{suffix}")) -} - -/// Returns the path to the active workspace marker file. -fn active_workspace_marker_path(default_openhuman_dir: &Path) -> PathBuf { - default_openhuman_dir.join("active_workspace.toml") -} - -/// Returns the parent directory of the config file. -fn config_openhuman_dir(config: &Config) -> PathBuf { - config - .config_path - .parent() - .map_or_else(|| PathBuf::from("."), PathBuf::from) -} - -fn is_windows_file_lock_error(error: &std::io::Error) -> bool { - cfg!(windows) && matches!(error.raw_os_error(), Some(32 | 33)) -} - -fn reset_local_data_remove_error(path: &Path, error: &std::io::Error) -> String { - if is_windows_file_lock_error(error) { - tracing::warn!( - path = %path.display(), - error = %error, - "[config] reset_local_data: Windows file lock blocked local data deletion" - ); - return format!( - "Failed to remove {} because it is locked by another OpenHuman window or process. Close all OpenHuman windows and try again. ({error})", - path.display() - ); - } - - format!("Failed to remove {}: {error}", path.display()) -} - -fn reset_local_data_marker_remove_error(path: &Path, error: &std::io::Error) -> String { - if is_windows_file_lock_error(error) { - tracing::warn!( - marker = %path.display(), - error = %error, - "[config] reset_local_data: Windows file lock blocked active workspace marker deletion" - ); - return format!( - "Failed to remove active workspace marker {} because it is locked by another OpenHuman window or process. Close all OpenHuman windows and try again. ({error})", - path.display() - ); - } - - format!("Failed to remove active workspace marker: {error}") -} - -/// Internal helper to reset local data by removing specific directories and markers. -async fn reset_local_data_for_paths( - current_openhuman_dir: &Path, - default_openhuman_dir: &Path, -) -> Result, String> { - let active_workspace_marker = active_workspace_marker_path(default_openhuman_dir); - tracing::debug!( - current_dir = %current_openhuman_dir.display(), - default_dir = %default_openhuman_dir.display(), - marker = %active_workspace_marker.display(), - "[config] reset_local_data: starting" - ); - - let mut removed_paths = Vec::new(); - - if active_workspace_marker.exists() { - if let Err(error) = tokio::fs::remove_file(&active_workspace_marker).await { - return Err(reset_local_data_marker_remove_error( - &active_workspace_marker, - &error, - )); - } - tracing::debug!( - marker = %active_workspace_marker.display(), - "[config] reset_local_data: removed active workspace marker" - ); - removed_paths.push(active_workspace_marker.display().to_string()); - } - - for target_dir in [current_openhuman_dir, default_openhuman_dir] { - if !target_dir.exists() { - tracing::debug!( - dir = %target_dir.display(), - "[config] reset_local_data: directory already absent" - ); - continue; - } - - if let Err(error) = tokio::fs::remove_dir_all(target_dir).await { - return Err(reset_local_data_remove_error(target_dir, &error)); - } - tracing::debug!( - dir = %target_dir.display(), - "[config] reset_local_data: removed directory" - ); - removed_paths.push(target_dir.display().to_string()); - } - - Ok(RpcOutcome::new( - json!({ - "removed_paths": removed_paths, - "current_openhuman_dir": current_openhuman_dir.display().to_string(), - "default_openhuman_dir": default_openhuman_dir.display().to_string(), - }), - vec![ - format!( - "reset local data for active config dir {}", - current_openhuman_dir.display() - ), - format!( - "removed default data dir {} if present", - default_openhuman_dir.display() - ), - ], - )) -} - -/// Serializes the current configuration into a JSON snapshot for the UI. -pub fn snapshot_config_json(config: &Config) -> Result { - let value = serde_json::to_value(config).map_err(|e| e.to_string())?; - Ok(json!({ - "config": value, - "workspace_dir": config.workspace_dir.display().to_string(), - "config_path": config.config_path.display().to_string(), - })) -} - -/// Serializes the client-facing AI config slice consumed by the settings UI. -pub fn client_config_json(config: &Config) -> serde_json::Value { - let app_version = - std::env::var("OPENHUMAN_APP_VERSION").unwrap_or_else(|_| "unknown".to_string()); - let api_key_set = config - .api_key - .as_deref() - .map(|k| !k.trim().is_empty()) - .unwrap_or(false); - let model_routes: Vec = config - .model_routes - .iter() - .map(|r| serde_json::json!({ "hint": r.hint, "model": r.model })) - .collect(); - let cloud_providers: Vec = config - .cloud_providers - .iter() - .map(|c| { - serde_json::json!({ - "id": c.id, - "slug": c.slug, - "label": c.label, - "endpoint": c.endpoint, - "auth_style": c.auth_style.as_str(), - }) - }) - .collect(); - - serde_json::json!({ - "api_url": config.api_url, - "inference_url": config.inference_url, - "default_model": config.default_model, - "app_version": app_version, - "api_key_set": api_key_set, - "model_routes": model_routes, - "cloud_providers": cloud_providers, - "primary_cloud": config.primary_cloud, - "chat_provider": config.chat_provider, - "reasoning_provider": config.reasoning_provider, - "agentic_provider": config.agentic_provider, - "coding_provider": config.coding_provider, - "memory_provider": config.memory_provider, - "embeddings_provider": config.embeddings_provider, - "heartbeat_provider": config.heartbeat_provider, - "learning_provider": config.learning_provider, - "subconscious_provider": config.subconscious_provider, - "voice_providers": config.voice_providers.iter().map(|v| { - serde_json::json!({ - "id": v.id, - "slug": v.slug, - "label": v.label, - "endpoint": v.endpoint, - "auth_style": v.auth_style.as_str(), - "capability": v.capability.as_str(), - "stt_api_style": v.stt_api_style, - "tts_api_style": v.tts_api_style, - "default_stt_model": v.default_stt_model, - "default_tts_voice": v.default_tts_voice, - }) - }).collect::>(), - "stt_provider": config.stt_provider, - "tts_provider": config.tts_provider, - }) -} - -/// Loads config and returns the client-facing AI config slice. -pub async fn load_and_get_client_config_snapshot() -> Result, String> -{ - let config = load_config_with_timeout().await?; - let snapshot = client_config_json(&config); - Ok(RpcOutcome::new( - snapshot, - vec!["client config read".to_string()], - )) -} - -#[derive(Debug, Clone, Default)] -pub struct ModelSettingsPatch { - pub api_url: Option, - /// Custom OpenAI-compatible LLM endpoint. Empty string clears the - /// override (inference falls back through the OpenHuman backend). - pub inference_url: Option, - pub api_key: Option, - pub default_model: Option, - pub default_temperature: Option, - /// When `Some`, REPLACES the entire `config.model_routes` array with the - /// supplied (hint, model) pairs. Pass `Some(vec![])` to clear all routes - /// (e.g. when switching back to the OpenHuman backend whose built-in - /// router picks per-task models on its own). Leave `None` to keep the - /// current routes untouched. - pub model_routes: Option>, - /// When `Some`, REPLACES the entire `config.cloud_providers` array with - /// the supplied entries (each lacking the API key — those live in - /// `auth-profiles.json` via [`crate::openhuman::credentials::AuthService`]). - /// Pass `Some(vec![])` to clear all third-party cloud providers. - pub cloud_providers: - Option>, - /// Id of the `cloud_providers` entry used when a workload routes to - /// `"cloud"`. Empty string clears (factory falls back to OpenHuman). - pub primary_cloud: Option, - pub chat_provider: Option, - pub reasoning_provider: Option, - pub agentic_provider: Option, - pub coding_provider: Option, - pub memory_provider: Option, - pub embeddings_provider: Option, - pub heartbeat_provider: Option, - pub learning_provider: Option, - pub subconscious_provider: Option, -} - -#[derive(Debug, Clone, Default)] -pub struct MemorySettingsPatch { - pub backend: Option, - pub auto_save: Option, - pub embedding_provider: Option, - pub embedding_model: Option, - pub embedding_dimensions: Option, - /// Stepped user-facing memory-context window preset (see - /// [`crate::openhuman::config::schema::agent::MemoryContextWindow`]). - /// Accepts `"minimal" | "balanced" | "extended" | "maximum"`. - /// Unknown values are silently ignored so old clients can keep - /// posting partial patches. - pub memory_window: Option, -} - -#[derive(Debug, Clone, Default)] -pub struct RuntimeSettingsPatch { - pub kind: Option, - pub reasoning_enabled: Option, -} - -/// Partial update for the `[autonomy]` block — the agent's filesystem access -/// mode. Each `None` field is left unchanged. `trusted_roots`, `allowed_commands`, -/// `forbidden_paths`, and `auto_approve`, when `Some`, REPLACE the corresponding -/// array wholesale. -#[derive(Debug, Clone, Default)] -pub struct AutonomySettingsPatch { - /// `"readonly" | "supervised" | "full"` (case-insensitive). - pub level: Option, - pub workspace_only: Option, - pub allowed_commands: Option>, - pub forbidden_paths: Option>, - pub trusted_roots: Option>, - pub allow_tool_install: Option, - pub max_actions_per_hour: Option, - /// "Always allow" allowlist — tool names the gate skips prompting for. - pub auto_approve: Option>, - pub require_task_plan_approval: Option, -} - -/// Partial update for the `[agent]` block. Currently carries the single -/// user-facing `agent_timeout_secs` knob (the tool/action wall-clock timeout); -/// other `AgentConfig` fields are not yet UI-exposed. `None` leaves the value -/// unchanged. -#[derive(Debug, Clone, Default)] -pub struct AgentSettingsPatch { - /// Tool/action wall-clock timeout in seconds. Validated to - /// `tool_timeout::MIN_TIMEOUT_SECS..=tool_timeout::MAX_TIMEOUT_SECS`. - pub agent_timeout_secs: Option, -} - -#[derive(Debug, Clone, Default)] -pub struct BrowserSettingsPatch { - pub enabled: Option, -} - -#[derive(Debug, Clone, Default)] -pub struct ScreenIntelligenceSettingsPatch { - pub enabled: Option, - pub capture_policy: Option, - pub policy_mode: Option, - pub baseline_fps: Option, - pub vision_enabled: Option, - pub autocomplete_enabled: Option, - pub use_vision_model: Option, - pub keep_screenshots: Option, - pub allowlist: Option>, - pub denylist: Option>, -} - -#[derive(Debug, Clone, Default)] -pub struct AnalyticsSettingsPatch { - pub enabled: Option, -} - -#[derive(Debug, Clone, Default)] -pub struct MeetSettingsPatch { - pub auto_orchestrator_handoff: Option, -} - -#[derive(Debug, Clone, Default)] -pub struct SearchSettingsPatch { - /// One of `disabled` | `managed` | `parallel` | `brave` | `querit`. - /// Empty/unknown values are rejected by `apply_search_settings`. - /// Runtime fallback to `managed` applies only to persisted/legacy config - /// values resolved by `SearchConfig::effective_engine()`. - pub engine: Option, - /// 1..=20. Clamped silently at apply time. - pub max_results: Option, - /// Per-request timeout in seconds (default 15). - pub timeout_secs: Option, - /// Parallel API key. An empty string clears the stored key. - pub parallel_api_key: Option, - /// Brave Search API key. An empty string clears the stored key. - pub brave_api_key: Option, - /// Querit API key. An empty string clears the stored key. - pub querit_api_key: Option, - /// Websites the assistant may open/read (`web_fetch` / `curl`), as a - /// host allowlist. Entries are exact hosts (`reuters.com`), which also - /// match their subdomains, or `"*"` for all public sites. Empty list - /// blocks all web access. Mirrors `[http_request].allowed_domains`. - pub allowed_domains: Option>, - /// Convenience toggle for the "Allow all sites" switch. `Some(true)` - /// sets the allowlist to `["*"]`; `Some(false)` drops the wildcard while - /// keeping any explicit hosts. Applied after `allowed_domains`. - pub allow_all: Option, -} - -#[derive(Debug, Clone, Default)] -pub struct LocalAiSettingsPatch { - pub runtime_enabled: Option, - /// MVP opt-in marker. Bootstrap hard-overrides status to "disabled" - /// when this is `false`, regardless of `runtime_enabled`. The unified - /// AI panel ties the two together (both flip on enable, both flip - /// off on disable) so a single toggle gives the user the obvious - /// behaviour without needing to apply a preset first. - pub opt_in_confirmed: Option, - pub provider: Option, - pub base_url: Option>, - pub model_id: Option, - pub chat_model_id: Option, - pub usage_embeddings: Option, - pub usage_heartbeat: Option, - pub usage_learning_reflection: Option, - pub usage_subconscious: Option, -} - -#[derive(Debug, Clone, Default)] -pub struct ComposioTriggerSettingsPatch { - /// When `Some(true)`, disables triage for all toolkits. - pub triage_disabled: Option, - /// When `Some(v)`, replaces the per-toolkit opt-out list entirely. - pub triage_disabled_toolkits: Option>, -} - -#[derive(Debug, Clone, Serialize)] -pub struct RuntimeFlagsOut { - pub browser_allow_all: bool, - pub log_prompts: bool, -} - -const BROWSER_ALLOW_ALL_ENV: &str = "OPENHUMAN_BROWSER_ALLOW_ALL"; -const BROWSER_ALLOW_ALL_RPC_ENABLE_ENV: &str = "OPENHUMAN_BROWSER_ALLOW_ALL_RPC_ENABLE"; - -/// Returns a full configuration snapshot for the UI. -pub async fn get_config_snapshot(config: &Config) -> Result, String> { - let snapshot = snapshot_config_json(config)?; - Ok(RpcOutcome::new( - snapshot, - vec![format!( - "config loaded from {}", - config.config_path.display() - )], - )) -} - -/// Updates the model-related settings in the configuration. -pub async fn apply_model_settings( - config: &mut Config, - update: ModelSettingsPatch, -) -> Result, String> { - if let Some(api_url) = update.api_url { - config.api_url = if api_url.trim().is_empty() { - None - } else { - Some(api_url) - }; - } - if let Some(inference_url) = update.inference_url { - config.inference_url = if inference_url.trim().is_empty() { - None - } else { - Some(inference_url.trim().to_string()) - }; - } - if let Some(api_key) = update.api_key { - let trimmed_key = api_key.trim(); - config.api_key = if trimmed_key.is_empty() { - None - } else { - Some(trimmed_key.to_string()) - }; - } - if let Some(model) = update.default_model { - let trimmed = model.trim(); - config.default_model = if trimmed.is_empty() { - None - } else { - Some(trimmed.to_string()) - }; - if let Some(ref m) = config.default_model { - if !crate::openhuman::inference::provider::factory::is_known_openhuman_tier(m) { - log::warn!( - "[config][model-settings] default_model '{}' is not a recognized \ - OpenHuman backend tier — it will be replaced with the platform \ - default at inference time.", - m - ); - } - } - } - if let Some(temp) = update.default_temperature { - config.default_temperature = temp; - } - if let Some(routes) = update.model_routes { - // Full replacement — UI sends the canonical set for the active provider - // (or an empty vec when switching back to the OpenHuman in-built router). - config.model_routes = routes; - } - if let Some(providers) = update.cloud_providers { - // The schema handlers strip reserved-slug entries (e.g. the built-in - // "openhuman" provider seeded by `migrations::unify_ai_provider_settings`) - // from the user's payload. Preserve any reserved-slug entries that - // already live in the stored config so a routine settings save - // doesn't accidentally delete them — `primary_cloud` and the - // per-workload routing fields can reference these built-ins, and - // losing them would break inference routing. - use crate::openhuman::config::schema::cloud_providers::is_slug_reserved; - let preserved: Vec<_> = config - .cloud_providers - .iter() - .filter(|e| is_slug_reserved(e.slug.trim())) - .cloned() - .collect(); - log::debug!( - "[config] apply_model_settings: preserving {} reserved cloud provider(s) before overwrite", - preserved.len() - ); - config.cloud_providers = providers; - let before_reinject = config.cloud_providers.len(); - for entry in preserved { - // Defensive: don't double-add if the payload (somehow) already - // contained an entry with this reserved slug — the schema-handler - // filter is the canonical guard, but apply_model_settings is also - // reachable from tests and CLI paths that bypass that filter. - let preserved_slug = entry.slug.trim(); - if !config - .cloud_providers - .iter() - .any(|e| e.slug.trim() == preserved_slug) - { - config.cloud_providers.push(entry); - } - } - log::debug!( - "[config] apply_model_settings: reinjected {} reserved cloud provider(s)", - config.cloud_providers.len() - before_reinject - ); - } - if let Some(primary) = update.primary_cloud { - let trimmed = primary.trim(); - config.primary_cloud = if trimmed.is_empty() { - None - } else { - Some(trimmed.to_string()) - }; - } - - // Per-workload provider strings. Empty / blank → None (factory default). - let normalise_provider = |s: String| -> Option { - let t = s.trim(); - if t.is_empty() { - None - } else { - Some(t.to_string()) - } - }; - if let Some(s) = update.chat_provider { - config.chat_provider = normalise_provider(s); - } - if let Some(s) = update.reasoning_provider { - config.reasoning_provider = normalise_provider(s); - } - if let Some(s) = update.agentic_provider { - config.agentic_provider = normalise_provider(s); - } - if let Some(s) = update.coding_provider { - config.coding_provider = normalise_provider(s); - } - if let Some(s) = update.memory_provider { - config.memory_provider = normalise_provider(s); - } - if let Some(s) = update.embeddings_provider { - config.embeddings_provider = normalise_provider(s); - } - if let Some(s) = update.heartbeat_provider { - config.heartbeat_provider = normalise_provider(s); - } - if let Some(s) = update.learning_provider { - config.learning_provider = normalise_provider(s); - } - if let Some(s) = update.subconscious_provider { - config.subconscious_provider = normalise_provider(s); - } - - config.save().await.map_err(|e| e.to_string())?; - // #1574 §4: the AIPanel workload matrix changes the embedder via THIS - // (model-settings) path — `embeddings_provider` above — not the - // memory-settings path. Trigger the same idempotent re-embed backfill - // so a UI embedder switch recovers prior memory under the new - // signature. Coverage-gated + non-fatal: if the active signature did - // not actually change, this enqueues nothing. - crate::openhuman::memory_queue::ensure_reembed_backfill(config); - let snapshot = snapshot_config_json(config)?; - Ok(RpcOutcome::new( - snapshot, - vec![format!( - "model settings saved to {}", - config.config_path.display() - )], - )) -} - -/// Updates the memory-related settings in the configuration. -pub async fn apply_memory_settings( - config: &mut Config, - update: MemorySettingsPatch, -) -> Result, String> { - if let Some(backend) = update.backend { - config.memory.backend = backend; - } - if let Some(auto_save) = update.auto_save { - config.memory.auto_save = auto_save; - } - if let Some(provider) = update.embedding_provider { - config.memory.embedding_provider = provider; - } - if let Some(model) = update.embedding_model { - config.memory.embedding_model = model; - } - if let Some(dimensions) = update.embedding_dimensions { - config.memory.embedding_dimensions = dimensions; - } - if let Some(window_label) = update.memory_window.as_deref() { - if let Some(window) = - crate::openhuman::config::schema::MemoryContextWindow::from_str_opt(window_label) - { - config.agent.memory_window = Some(window); - } else { - tracing::warn!( - requested = window_label, - "[config] unknown memory_window preset — leaving existing setting unchanged" - ); - } - } - config.save().await.map_err(|e| e.to_string())?; - // #1574 §4: the embedder may have just changed (provider/model/dims). - // Ensure a re-embed backfill chain exists for the new active signature - // so prior memory becomes retrievable again instead of silently going - // dark. Idempotent + non-fatal (covered space enqueues nothing; errors - // are logged, never fail the settings save). §7's migration is - // one-shot so it does not cover a later switch — this does. - crate::openhuman::memory_queue::ensure_reembed_backfill(config); - let snapshot = snapshot_config_json(config)?; - Ok(RpcOutcome::new( - snapshot, - vec![format!( - "memory settings saved to {}", - config.config_path.display() - )], - )) -} - -/// Updates the screen intelligence settings in the configuration. -pub async fn apply_screen_intelligence_settings( - config: &mut Config, - update: ScreenIntelligenceSettingsPatch, -) -> Result, String> { - if let Some(enabled) = update.enabled { - config.screen_intelligence.enabled = enabled; - } - if let Some(capture_policy) = update.capture_policy { - config.screen_intelligence.capture_policy = capture_policy; - } - if let Some(policy_mode) = update.policy_mode { - config.screen_intelligence.policy_mode = policy_mode; - } - if let Some(baseline_fps) = update.baseline_fps { - config.screen_intelligence.baseline_fps = baseline_fps.clamp(0.2, 30.0); - } - if let Some(vision_enabled) = update.vision_enabled { - config.screen_intelligence.vision_enabled = vision_enabled; - } - if let Some(autocomplete_enabled) = update.autocomplete_enabled { - config.screen_intelligence.autocomplete_enabled = autocomplete_enabled; - } - if let Some(use_vision_model) = update.use_vision_model { - config.screen_intelligence.use_vision_model = use_vision_model; - } - if let Some(keep_screenshots) = update.keep_screenshots { - config.screen_intelligence.keep_screenshots = keep_screenshots; - } - if let Some(allowlist) = update.allowlist { - config.screen_intelligence.allowlist = allowlist; - } - if let Some(denylist) = update.denylist { - config.screen_intelligence.denylist = denylist; - } - - config.save().await.map_err(|e| e.to_string())?; - let _ = screen_intelligence::global_engine() - .apply_config(config.screen_intelligence.clone()) - .await; - - let snapshot = snapshot_config_json(config)?; - Ok(RpcOutcome::new( - snapshot, - vec![format!( - "screen intelligence settings saved to {}", - config.config_path.display() - )], - )) -} - -/// Updates the runtime-related settings in the configuration. -pub async fn apply_runtime_settings( - config: &mut Config, - update: RuntimeSettingsPatch, -) -> Result, String> { - if let Some(kind) = update.kind { - config.runtime.kind = kind; - } - if let Some(reasoning_enabled) = update.reasoning_enabled { - config.runtime.reasoning_enabled = Some(reasoning_enabled); - } - config.save().await.map_err(|e| e.to_string())?; - let snapshot = snapshot_config_json(config)?; - Ok(RpcOutcome::new( - snapshot, - vec![format!( - "runtime settings saved to {}", - config.config_path.display() - )], - )) -} - -/// Updates the browser-related settings in the configuration. -pub async fn apply_browser_settings( - config: &mut Config, - update: BrowserSettingsPatch, -) -> Result, String> { - if let Some(enabled) = update.enabled { - config.browser.enabled = enabled; - } - config.save().await.map_err(|e| e.to_string())?; - let snapshot = snapshot_config_json(config)?; - Ok(RpcOutcome::new( - snapshot, - vec![format!( - "browser settings saved to {}", - config.config_path.display() - )], - )) -} - -/// Loads the configuration from disk and returns a snapshot. -pub async fn load_and_get_config_snapshot() -> Result, String> { - let config = load_config_with_timeout().await?; - get_config_snapshot(&config).await -} - -/// Loads the configuration, applies model settings updates, and saves it. -pub async fn load_and_apply_model_settings( - update: ModelSettingsPatch, -) -> Result, String> { - let mut config = load_config_with_timeout().await?; - apply_model_settings(&mut config, update).await -} - -/// Loads the configuration, applies memory settings updates, and saves it. -pub async fn load_and_apply_memory_settings( - update: MemorySettingsPatch, -) -> Result, String> { - let mut config = load_config_with_timeout().await?; - apply_memory_settings(&mut config, update).await -} - -/// Loads the configuration, applies screen intelligence settings updates, and saves it. -pub async fn load_and_apply_screen_intelligence_settings( - update: ScreenIntelligenceSettingsPatch, -) -> Result, String> { - let mut config = load_config_with_timeout().await?; - apply_screen_intelligence_settings(&mut config, update).await -} - -/// Loads the configuration, applies runtime settings updates, and saves it. -pub async fn load_and_apply_runtime_settings( - update: RuntimeSettingsPatch, -) -> Result, String> { - let mut config = load_config_with_timeout().await?; - apply_runtime_settings(&mut config, update).await -} - -/// Updates the `[autonomy]` (agent access mode) settings in the configuration. -/// -/// After saving, publishes a `DomainEvent::System(AutonomyConfigChanged)` so that -/// live agent sessions can rebuild their `SecurityPolicy` without a core restart -/// (see `channels::runtime`). Returns the updated config snapshot. -pub async fn apply_autonomy_settings( - config: &mut Config, - update: AutonomySettingsPatch, -) -> Result, String> { - use crate::openhuman::security::AutonomyLevel; - - if let Some(level) = update.level { - config.autonomy.level = match level.trim().to_ascii_lowercase().as_str() { - "readonly" | "read_only" | "read-only" => AutonomyLevel::ReadOnly, - "supervised" => AutonomyLevel::Supervised, - "full" => AutonomyLevel::Full, - other => { - return Err(format!( - "invalid autonomy level '{other}' (expected readonly | supervised | full)" - )) - } - }; - } - if let Some(workspace_only) = update.workspace_only { - config.autonomy.workspace_only = workspace_only; - } - if let Some(allowed_commands) = update.allowed_commands { - config.autonomy.allowed_commands = allowed_commands; - } - if let Some(forbidden_paths) = update.forbidden_paths { - config.autonomy.forbidden_paths = forbidden_paths; - } - if let Some(trusted_roots) = update.trusted_roots { - config.autonomy.trusted_roots = trusted_roots; - } - if let Some(allow_tool_install) = update.allow_tool_install { - config.autonomy.allow_tool_install = allow_tool_install; - } - if let Some(max_actions_per_hour) = update.max_actions_per_hour { - if max_actions_per_hour == 0 { - return Err(format!( - "max_actions_per_hour must be at least 1 (got {max_actions_per_hour})" - )); - } - config.autonomy.max_actions_per_hour = max_actions_per_hour; - } - if let Some(auto_approve) = update.auto_approve { - config.autonomy.auto_approve = auto_approve; - } - if let Some(require_task_plan_approval) = update.require_task_plan_approval { - config.autonomy.require_task_plan_approval = require_task_plan_approval; - } - - config.save().await.map_err(|e| e.to_string())?; - - // Swap the process-global live SecurityPolicy so `current()` reflects the new - // access mode immediately, then broadcast for any other interested listeners. - crate::openhuman::security::live_policy::reload_from(&config.autonomy); - crate::core::event_bus::publish_global( - crate::core::event_bus::DomainEvent::AutonomyConfigChanged, - ); - - let snapshot = snapshot_config_json(config)?; - Ok(RpcOutcome::new( - snapshot, - vec![format!( - "autonomy settings saved to {}", - config.config_path.display() - )], - )) -} - -/// Loads the configuration, applies autonomy settings updates, and saves it. -pub async fn load_and_apply_autonomy_settings( - update: AutonomySettingsPatch, -) -> Result, String> { - let mut config = load_config_with_timeout().await?; - apply_autonomy_settings(&mut config, update).await -} - -// ── Agent filesystem paths (editable action_dir) ────────────────────────────── - -/// Partial update for the agent's editable filesystem roots. -/// -/// Only `action_dir` is editable today (issue #3240). `workspace_dir` and -/// `projects_dir` are intentionally read-only and not part of this patch. -#[derive(Debug, Clone, Default)] -pub struct AgentPathsPatch { - /// New action sandbox root. `Some("")`/whitespace clears the override and - /// reverts to the default; `Some(path)` sets it; `None` leaves it unchanged. - pub action_dir: Option, -} - -/// Expand a leading `~/` to the user's home directory, building the path -/// component-by-component so the result uses the platform-native separator -/// throughout. A naive `format!("{}/{rest}", home)` — or even `home.join(rest)` -/// — leaves the embedded `/` inside `rest`, yielding a mixed-separator path like -/// `C:\Users\Harry/OpenHuman/projects` on Windows, which `CreateProcessW` -/// rejects with `ERROR_DIRECTORY` (os error 267) when used as a process CWD. -/// See issue #3353 (RC-B). -/// -/// This is the single source of truth for `~/` expansion; `SecurityPolicy:: -/// expand_tilde` delegates here so policy and config stay byte-for-byte -/// consistent. -pub fn expand_tilde(path: &str) -> String { - let Some(rest) = path.strip_prefix("~/") else { - return path.to_string(); - }; - let Some(home) = dirs::home_dir() else { - return path.to_string(); - }; - let mut out = home; - for part in rest.split('/') { - if !part.is_empty() { - out.push(part); - } - } - out.to_string_lossy().into_owned() -} - -/// Redact a path for logging by replacing the user's home-directory prefix with -/// `~`. Keeps the path *shape* (e.g. `~/OpenHuman/projects`) useful for -/// diagnosis while not leaking the OS username / full home path (PII). Paths -/// outside the home dir are returned unchanged. -pub(crate) fn redact_home(path: &Path) -> String { - let s = path.to_string_lossy(); - if let Some(home) = dirs::home_dir() { - let home = home.to_string_lossy(); - if !home.is_empty() { - if let Some(rest) = s.strip_prefix(home.as_ref()) { - return format!("~{rest}"); - } - } - } - s.into_owned() -} - -/// Ensure the agent's action sandbox + default projects home exist and the -/// projects dir is registered as a `ReadWrite` trusted root. Idempotent — safe -/// to call from every boot path (web-chat-only `bootstrap_core_runtime` **and** -/// `start_channels`). -/// -/// Without this on the always-run boot, a fresh desktop install with no -/// messaging integrations leaves `~/OpenHuman/projects` uncreated (the only -/// other creation lived inside the integration-gated `start_channels`), so the -/// shell tool's `current_dir` fails with `ERROR_DIRECTORY` (os error 267) on -/// Windows / `ENOENT` on Unix. See issue #3353 (RC-A). -pub async fn ensure_agent_dirs(config: &mut Config) { - use crate::openhuman::security::{TrustedAccess, TrustedRoot}; - - // Ensure the agent's default projects home (~/OpenHuman/projects) exists and - // is a read-write trusted root, so the coding agent creates/edits projects - // there freely — distinct from the hidden internal workspace dir. A user who - // has already granted it (or any other root) is left untouched. - let projects_dir = crate::openhuman::config::default_projects_dir(); - if let Err(e) = tokio::fs::create_dir_all(&projects_dir).await { - tracing::warn!( - dir = %redact_home(&projects_dir), - error = %e, - "[startup] could not create default projects dir" - ); - } - let projects_path = projects_dir.to_string_lossy().to_string(); - if !config - .autonomy - .trusted_roots - .iter() - .any(|r| r.path == projects_path) - { - config.autonomy.trusted_roots.push(TrustedRoot { - path: projects_path, - access: TrustedAccess::ReadWrite, - }); - } - - // Ensure the action sandbox directory exists (defaults to ~/OpenHuman/projects). - let action_dir = config.action_dir.clone(); - if let Err(e) = tokio::fs::create_dir_all(&action_dir).await { - tracing::warn!( - dir = %redact_home(&action_dir), - error = %e, - "[startup] could not create action sandbox dir" - ); - } - tracing::info!( - workspace = %redact_home(&config.workspace_dir), - action = %redact_home(&action_dir), - "[startup] workspace (internal state) and action sandbox (tool cwd) directories configured" - ); -} - -/// Ensure `dir` is usable as a process working directory: it must exist (we -/// attempt to create it if missing — covers a dir deleted after launch) and -/// resolve to a directory. Returns a descriptive error naming the path and the -/// Settings location to fix it, instead of letting the OS surface an opaque -/// `ERROR_DIRECTORY` (os error 267) from `CreateProcessW`. See issue #3353 -/// (Fix 2). Cheap stat-only calls on the happy path. -pub fn ensure_usable_cwd(dir: &Path) -> anyhow::Result<()> { - if !dir.exists() { - // Defensive auto-create (mirrors startup) — covers a dir deleted after - // launch or an override whose parent later disappeared. - std::fs::create_dir_all(dir).map_err(|e| { - anyhow::anyhow!( - "Working directory '{}' does not exist and could not be created: {e}. \ - Set a valid path in Settings → Agent access → Working directory.", - dir.display() - ) - })?; - } - if !dir.is_dir() { - anyhow::bail!( - "Working directory '{}' is not a directory. \ - Set a valid path in Settings → Agent access → Working directory.", - dir.display() - ); - } - Ok(()) -} - -/// Source of the currently-effective `action_dir`, so the UI can gate -/// editability honestly: -/// -/// * `"env"` — pinned by `OPENHUMAN_ACTION_DIR`; the override is ignored and the -/// input must be disabled. -/// * `"override"` — a persisted user choice (`action_dir_override`) is in effect. -/// * `"default"` — falling back to the default projects dir. -fn action_dir_source(config: &Config) -> &'static str { - if crate::openhuman::config::action_dir_env_override().is_some() { - "env" - } else if config.action_dir_override.is_some() { - "override" - } else { - "default" - } -} - -/// Build the agent-paths JSON payload (shared by `get_agent_paths` and -/// `apply_agent_paths_settings` so both return an identical shape). -fn agent_paths_payload(config: &Config) -> serde_json::Value { - let projects_dir = crate::openhuman::config::default_projects_dir(); - json!({ - "action_dir": config.action_dir.display().to_string(), - "workspace_dir": config.workspace_dir.display().to_string(), - "projects_dir": projects_dir.display().to_string(), - "action_dir_source": action_dir_source(config), - }) -} - -/// Applies an edit to the agent's `action_dir` sandbox root. -/// -/// Validation (fail-closed): the path is trimmed and `~`-expanded; it must be -/// **absolute**; it must not be an existing *file*; and it must not equal -/// `workspace_dir` (which holds memory DBs / tokens and must never become the -/// agent-writable root). A missing directory is auto-created (mirroring the -/// startup auto-create in `channels/runtime/startup.rs`). An empty input clears -/// the override and reverts `action_dir` to the default. -/// -/// On success the override is persisted (`action_dir_override`), `action_dir` is -/// recomputed from the precedence chain, the live `SecurityPolicy` is hot-swapped -/// (`live_policy::set_action_dir`), and `DomainEvent::AgentPathsChanged` is -/// published. Returns the same payload shape as [`get_agent_paths`]. -/// -/// When `OPENHUMAN_ACTION_DIR` is set the env var wins: the override is still -/// persisted, but the effective `action_dir` (and the returned `action_dir`) -/// continues to reflect the env value, and `action_dir_source` reports `"env"`. -pub async fn apply_agent_paths_settings( - config: &mut Config, - update: AgentPathsPatch, -) -> Result, String> { - let mut notes: Vec = Vec::new(); - - if let Some(raw) = update.action_dir { - let trimmed = raw.trim(); - log::debug!( - "[config][agent_paths] apply action_dir edit (input_len={})", - trimmed.len() - ); - - if trimmed.is_empty() { - // Empty input clears the override → revert to the default. - config.action_dir_override = None; - notes.push("action_dir override cleared (reverted to default)".to_string()); - } else { - let expanded = expand_tilde(trimmed); - let candidate = PathBuf::from(&expanded); - - if !candidate.is_absolute() { - return Err(format!( - "action_dir must be an absolute path (got '{expanded}')" - )); - } - - // Reject if the target is an existing *file* (a directory or a - // not-yet-existing path are both fine — the latter is auto-created). - if candidate.is_file() { - return Err(format!( - "action_dir must be a directory, not a file: {expanded}" - )); - } - - // The internal workspace holds memory DBs, sessions, tokens — it must - // never become the agent-writable sandbox root. Compare canonicalised - // forms when both resolve so symlinks can't sneak past the check. - if paths_equal(&candidate, &config.workspace_dir) { - return Err( - "action_dir must not equal the internal workspace directory".to_string() - ); - } - - // Auto-create the directory if it doesn't exist (mirrors startup). - if !candidate.exists() { - tokio::fs::create_dir_all(&candidate) - .await - .map_err(|e| format!("failed to create action_dir {expanded}: {e}"))?; - notes.push(format!("created action_dir {expanded}")); - } - - config.action_dir_override = Some(candidate); - notes.push(format!("action_dir override set to {expanded}")); - } - - // Recompute the effective action_dir from the precedence chain - // (env > override > default) so the env var still wins at runtime. - config.action_dir = - crate::openhuman::config::resolve_action_dir(&config.action_dir_override); - - config.save().await.map_err(|e| e.to_string())?; - - // Hot-swap the process-global live policy so new sessions pick up the - // new sandbox root without a core restart, then broadcast. - crate::openhuman::security::live_policy::set_action_dir(config.action_dir.clone()); - crate::core::event_bus::publish_global( - crate::core::event_bus::DomainEvent::AgentPathsChanged, - ); - - log::debug!( - "[config][agent_paths] action_dir now '{}' (source={})", - config.action_dir.display(), - action_dir_source(config) - ); - } - - Ok(RpcOutcome::new(agent_paths_payload(config), notes)) -} - -/// Loads the configuration, applies agent-paths updates, and saves it. -pub async fn load_and_apply_agent_paths_settings( - update: AgentPathsPatch, -) -> Result, String> { - let mut config = load_config_with_timeout().await?; - apply_agent_paths_settings(&mut config, update).await -} - -/// True when two paths refer to the same location. Compares canonicalised forms -/// when both paths exist (defeats symlink/`.`/`..` evasion); otherwise falls back -/// to a lexical comparison so a not-yet-created target is still checked. -fn paths_equal(a: &Path, b: &Path) -> bool { - match (a.canonicalize(), b.canonicalize()) { - (Ok(ca), Ok(cb)) => ca == cb, - _ => a == b, - } -} - -// ── Agent Activity Level ─────────────────────────────────────────────── - -/// Partial update for the agent activity level (0–4). -#[derive(Debug, Clone, Default)] -pub struct ActivityLevelSettingsPatch { - /// "off" | "minimal" | "moderate" | "active" | "always_on" (or "0"-"4"). - pub level: Option, -} - -/// Returns the current activity level and its derived settings. -pub async fn get_activity_level_settings() -> Result, String> { - let config = load_config_with_timeout().await?; - let level = config.agent_activity_level; - let (cost_min, cost_max) = level.estimated_monthly_cost_range(); - let value = serde_json::json!({ - "level": level as u8, - "level_label": level.as_str(), - "sync_interval_secs": level.sync_interval_secs(), - "heartbeat_enabled": level.heartbeat_enabled(), - "subconscious_enabled": level.subconscious_enabled(), - "token_budget_per_cycle": level.token_budget_per_cycle(), - "estimated_monthly_cost_min_usd": cost_min, - "estimated_monthly_cost_max_usd": cost_max, - }); - Ok(RpcOutcome::single_log( - value, - "activity level settings read", - )) -} - -/// Updates the agent activity level and pushes it into the scheduler gate. -pub async fn apply_activity_level_settings( - config: &mut Config, - update: ActivityLevelSettingsPatch, -) -> Result, String> { - use crate::openhuman::config::schema::activity_level::AgentActivityLevel; - use crate::openhuman::config::SchedulerGateMode; - - if let Some(level_str) = update.level { - let level = AgentActivityLevel::from_str_opt(&level_str).ok_or_else(|| { - format!( - "invalid activity level '{}' \ - (expected off|minimal|moderate|active|always_on or 0-4)", - level_str - ) - })?; - config.agent_activity_level = level; - } - - // Derive the gate mode from the (possibly updated) activity level and - // persist it alongside the level so the saved config is self-consistent. - let level = config.agent_activity_level; - let gate_mode = match level { - AgentActivityLevel::Off => SchedulerGateMode::Off, - AgentActivityLevel::Minimal | AgentActivityLevel::Moderate => SchedulerGateMode::Auto, - AgentActivityLevel::Active | AgentActivityLevel::AlwaysOn => SchedulerGateMode::AlwaysOn, - }; - config.scheduler_gate.mode = gate_mode; - - config.save().await.map_err(|e| e.to_string())?; - - let gate_cfg = config.scheduler_gate.clone(); - crate::openhuman::scheduler_gate::gate::update_config(gate_cfg); - - tracing::info!( - level = %level.as_str(), - gate_mode = %gate_mode.as_str(), - "[config:activity_level] activity level updated" - ); - - let (cost_min, cost_max) = level.estimated_monthly_cost_range(); - let value = serde_json::json!({ - "level": level as u8, - "level_label": level.as_str(), - "sync_interval_secs": level.sync_interval_secs(), - "heartbeat_enabled": level.heartbeat_enabled(), - "subconscious_enabled": level.subconscious_enabled(), - "token_budget_per_cycle": level.token_budget_per_cycle(), - "estimated_monthly_cost_min_usd": cost_min, - "estimated_monthly_cost_max_usd": cost_max, - }); - Ok(RpcOutcome::new( - value, - vec![format!( - "activity level set to '{}' — saved to {}", - level.as_str(), - config.config_path.display() - )], - )) -} - -/// Loads the configuration, applies activity level settings, and saves it. -pub async fn load_and_apply_activity_level_settings( - update: ActivityLevelSettingsPatch, -) -> Result, String> { - let mut config = load_config_with_timeout().await?; - apply_activity_level_settings(&mut config, update).await -} - -/// Patch for the global memory-sync cadence (#3302). -/// -/// `sync_interval_secs` carries the new value to store in -/// [`Config::memory_sync_interval_secs`]: -/// - omitted / `null` → reset to "use the default cadence" (`None`) -/// - `0` → "Manual only" (periodic auto-sync disabled) -/// - `n > 0` → sync every `n` seconds (applied per source as a floor over the -/// provider default by the scheduler) -#[derive(Debug, Default)] -pub struct MemorySyncSettingsPatch { - pub sync_interval_secs: Option, -} - -/// Build the JSON view of the memory-sync settings shared by get + apply. -fn memory_sync_settings_value(stored: Option) -> serde_json::Value { - let is_manual = stored == Some(0); - let is_default = stored.is_none(); - // The cadence the UI should highlight: the stored value when set, else the - // resolved 24h default. `0` (manual) is surfaced verbatim so the UI can - // select the "Manual only" option. - let selected_secs = - stored.unwrap_or(crate::openhuman::config::DEFAULT_MEMORY_SYNC_INTERVAL_SECS); - json!({ - "sync_interval_secs": stored, - "selected_secs": selected_secs, - "is_manual": is_manual, - "is_default": is_default, - "default_secs": crate::openhuman::config::DEFAULT_MEMORY_SYNC_INTERVAL_SECS, - "presets": crate::openhuman::config::MEMORY_SYNC_INTERVAL_PRESETS_SECS, - }) -} - -/// Returns the current global memory-sync cadence and its derived view. -pub async fn get_memory_sync_settings() -> Result, String> { - let config = load_config_with_timeout().await?; - let value = memory_sync_settings_value(config.memory_sync_interval_secs); - Ok(RpcOutcome::single_log(value, "memory sync settings read")) -} - -/// Updates the global memory-sync cadence and persists it. The running -/// scheduler reads `config.memory_sync_interval_secs` fresh on each tick, so -/// the new cadence takes effect from the next tick without a restart. -pub async fn apply_memory_sync_settings( - config: &mut Config, - update: MemorySyncSettingsPatch, -) -> Result, String> { - config.memory_sync_interval_secs = update.sync_interval_secs; - config.save().await.map_err(|e| e.to_string())?; - - tracing::info!( - sync_interval_secs = ?config.memory_sync_interval_secs, - "[config:memory_sync] memory sync interval updated" - ); - - let stored = config.memory_sync_interval_secs; - let value = memory_sync_settings_value(stored); - let msg = match stored { - Some(0) => "memory sync set to Manual only".to_string(), - Some(n) => format!("memory sync interval set to {n}s"), - None => "memory sync interval reset to default".to_string(), - }; - Ok(RpcOutcome::new( - value, - vec![format!("{msg} — saved to {}", config.config_path.display())], - )) -} - -/// Loads the configuration, applies memory-sync settings, and saves it. -pub async fn load_and_apply_memory_sync_settings( - update: MemorySyncSettingsPatch, -) -> Result, String> { - let mut config = load_config_with_timeout().await?; - apply_memory_sync_settings(&mut config, update).await -} - -/// Serializes the load-modify-save in [`add_auto_approve_tool`] so two -/// concurrent "Always allow" appends (different tools) can't read the same -/// `auto_approve`, each push their own, and clobber the other on save -/// (last-write-wins lost-update). Holding it across load→save makes the second -/// caller observe the first's write and union the entries. Process-local; the -/// allowlist lives in a single per-launch config file. (CodeRabbit, PR #2706.) -fn auto_approve_write_lock() -> &'static tokio::sync::Mutex<()> { - static LOCK: std::sync::OnceLock> = std::sync::OnceLock::new(); - LOCK.get_or_init(|| tokio::sync::Mutex::new(())) -} - -/// Append `tool_name` to `autonomy.auto_approve` ("Always allow") and persist + -/// reload the live policy. Idempotent — a no-op (no disk write) when the tool is -/// already allow-listed. Backs the `ApproveAlwaysForTool` approval decision. -pub async fn add_auto_approve_tool(tool_name: &str) -> Result<(), String> { - // Serialize the read-modify-write against concurrent appends (see lock doc). - let _guard = auto_approve_write_lock().lock().await; - let mut config = load_config_with_timeout().await?; - if config.autonomy.auto_approve.iter().any(|t| t == tool_name) { - tracing::debug!( - tool = tool_name, - "[config:auto_approve] tool already allow-listed; nothing to persist" - ); - return Ok(()); - } - let mut next = config.autonomy.auto_approve.clone(); - next.push(tool_name.to_string()); - let patch = AutonomySettingsPatch { - auto_approve: Some(next), - ..AutonomySettingsPatch::default() - }; - apply_autonomy_settings(&mut config, patch) - .await - .map(|_| ()) -} - -/// Returns the current `[autonomy]` settings block as JSON (no secrets). -/// -/// Emits a log line so `into_cli_compatible_json` wraps the payload under -/// `result` — the shape every consumer reads (`AgentAccessPanel` / -/// `AutonomyPanel` use `res.result.*`, and `json_rpc_e2e` strips the wrapper). -pub async fn get_autonomy_settings() -> Result, String> { - let config = load_config_with_timeout().await?; - let value = serde_json::to_value(&config.autonomy).map_err(|e| e.to_string())?; - Ok(RpcOutcome::single_log(value, "autonomy settings read")) -} - -/// Updates the `[agent]` block (currently the `agent_timeout_secs` tool/action -/// wall-clock timeout). -/// -/// After persisting, pushes the new value into the live -/// [`crate::openhuman::tool_timeout`] runtime so subsequent tool calls honour -/// it without a core restart. The `OPENHUMAN_TOOL_TIMEOUT_SECS` env var, when -/// set, still overrides the config value (the push is a no-op in that case). -/// Returns the updated config snapshot. -pub async fn apply_agent_settings( - config: &mut Config, - update: AgentSettingsPatch, -) -> Result, String> { - use crate::openhuman::tool_timeout::{MAX_TIMEOUT_SECS, MIN_TIMEOUT_SECS}; - - if let Some(timeout_secs) = update.agent_timeout_secs { - if !(MIN_TIMEOUT_SECS..=MAX_TIMEOUT_SECS).contains(&timeout_secs) { - log::warn!( - "[config][agent] rejected agent_timeout_secs={timeout_secs} (valid {MIN_TIMEOUT_SECS}..={MAX_TIMEOUT_SECS})" - ); - return Err(format!( - "agent_timeout_secs must be between {MIN_TIMEOUT_SECS} and {MAX_TIMEOUT_SECS} seconds (got {timeout_secs})" - )); - } - config.agent.agent_timeout_secs = timeout_secs; - } - - config.save().await.map_err(|e| e.to_string())?; - - // Push the persisted value into the live tool-timeout runtime so the change - // takes effect on the next tool call without restarting the core. The env - // override (if any) still wins inside `set_tool_timeout_secs`. - let effective = - crate::openhuman::tool_timeout::set_tool_timeout_secs(config.agent.agent_timeout_secs); - log::debug!( - "[config][agent] agent settings saved; agent_timeout_secs={} effective={}s", - config.agent.agent_timeout_secs, - effective - ); - - let snapshot = snapshot_config_json(config)?; - Ok(RpcOutcome::new( - snapshot, - vec![format!( - "agent settings saved to {}", - config.config_path.display() - )], - )) -} - -/// Loads the configuration, applies agent settings updates, and saves it. -pub async fn load_and_apply_agent_settings( - update: AgentSettingsPatch, -) -> Result, String> { - let mut config = load_config_with_timeout().await?; - apply_agent_settings(&mut config, update).await -} - -/// Returns the agent execution settings (currently the action timeout) plus the -/// runtime-effective value and whether the `OPENHUMAN_TOOL_TIMEOUT_SECS` env var -/// is overriding the configured value, so the UI can explain a no-op control. -pub async fn get_agent_settings() -> Result, String> { - let config = load_config_with_timeout().await?; - // Ensure the runtime timeout is seeded from the persisted config so the - // `effective_timeout_secs` field is correct even if startup didn't seed it - // (e.g. in CLI invocations or tests that skip the full boot sequence). - crate::openhuman::tool_timeout::set_tool_timeout_secs(config.agent.agent_timeout_secs); - let value = serde_json::json!({ - "agent_timeout_secs": config.agent.agent_timeout_secs, - "effective_timeout_secs": crate::openhuman::tool_timeout::tool_execution_timeout_secs(), - "env_override": crate::openhuman::tool_timeout::env_override_active(), - "min_timeout_secs": crate::openhuman::tool_timeout::MIN_TIMEOUT_SECS, - "max_timeout_secs": crate::openhuman::tool_timeout::MAX_TIMEOUT_SECS, - }); - Ok(RpcOutcome::single_log(value, "agent settings read")) -} - -/// Updates the analytics-related settings in the configuration. -pub async fn apply_analytics_settings( - config: &mut Config, - update: AnalyticsSettingsPatch, -) -> Result, String> { - if let Some(enabled) = update.enabled { - config.observability.analytics_enabled = enabled; - } - config.save().await.map_err(|e| e.to_string())?; - let snapshot = snapshot_config_json(config)?; - Ok(RpcOutcome::new( - snapshot, - vec![format!( - "analytics settings saved to {}", - config.config_path.display() - )], - )) -} - -/// Loads the configuration, applies analytics settings updates, and saves it. -pub async fn load_and_apply_analytics_settings( - update: AnalyticsSettingsPatch, -) -> Result, String> { - let mut config = load_config_with_timeout().await?; - apply_analytics_settings(&mut config, update).await -} - -/// Updates the Google Meet integration settings in the configuration. -pub async fn apply_meet_settings( - config: &mut Config, - update: MeetSettingsPatch, -) -> Result, String> { - if let Some(enabled) = update.auto_orchestrator_handoff { - config.meet.auto_orchestrator_handoff = enabled; - } - config.save().await.map_err(|e| e.to_string())?; - let snapshot = snapshot_config_json(config)?; - Ok(RpcOutcome::new( - snapshot, - vec![format!( - "meet settings saved to {}", - config.config_path.display() - )], - )) -} - -/// Loads the configuration, applies meet settings updates, and saves it. -pub async fn load_and_apply_meet_settings( - update: MeetSettingsPatch, -) -> Result, String> { - let mut config = load_config_with_timeout().await?; - apply_meet_settings(&mut config, update).await -} - -/// Updates the search engine configuration. Empty API-key strings clear the -/// stored value rather than treat empty-string as "credential present". -pub async fn apply_search_settings( - config: &mut Config, - update: SearchSettingsPatch, -) -> Result, String> { - if let Some(engine) = update.engine { - let trimmed = engine.trim(); - // Reject blatantly bogus values so the panel can show a friendly - // error. Unknown values still resolve to managed at registration - // time via `effective_engine()`, but failing fast in the writer keeps - // the TOML clean. - match trimmed { - "disabled" | "managed" | "parallel" | "brave" | "querit" => { - config.search.engine = trimmed.to_string(); - } - other => { - return Err(format!( - "engine must be one of disabled/managed/parallel/brave/querit (got {other:?})" - )); - } - } - } - if let Some(n) = update.max_results { - if !(1..=20).contains(&n) { - return Err(format!("max_results must be between 1 and 20 (got {n})")); - } - config.search.max_results = n; - } - if let Some(secs) = update.timeout_secs { - if !(1..=120).contains(&secs) { - return Err(format!( - "timeout_secs must be between 1 and 120 (got {secs})" - )); - } - config.search.timeout_secs = secs; - } - if let Some(raw) = update.parallel_api_key { - let trimmed = raw.trim(); - config.search.parallel.api_key = if trimmed.is_empty() { - None - } else { - Some(trimmed.to_string()) - }; - } - if let Some(raw) = update.brave_api_key { - let trimmed = raw.trim(); - config.search.brave.api_key = if trimmed.is_empty() { - None - } else { - Some(trimmed.to_string()) - }; - } - if let Some(raw) = update.querit_api_key { - let trimmed = raw.trim(); - config.search.querit.api_key = if trimmed.is_empty() { - None - } else { - Some(trimmed.to_string()) - }; - } - // Allowed websites (web_fetch / curl host allowlist). Trim + drop blanks - // + dedupe so the saved TOML stays clean; `"*"` is preserved as the - // allow-all wildcard. - let allowlist_touched = update.allowed_domains.is_some() || update.allow_all.is_some(); - let before_count = config.http_request.allowed_domains.len(); - let before_allow_all = config.http_request.allowed_domains.iter().any(|d| d == "*"); - if let Some(domains) = update.allowed_domains { - let mut cleaned: Vec = domains - .into_iter() - .map(|d| d.trim().to_string()) - .filter(|d| !d.is_empty()) - .collect(); - cleaned.sort(); - cleaned.dedup(); - config.http_request.allowed_domains = cleaned; - } - if let Some(allow_all) = update.allow_all { - if allow_all { - config.http_request.allowed_domains = vec!["*".to_string()]; - } else { - config.http_request.allowed_domains.retain(|d| d != "*"); - } - } - if allowlist_touched { - // Grep-friendly state-transition log for a security-sensitive surface. - // Record only host counts + the allow-all wildcard flag — never the raw - // hosts (redaction rule). Lets us trace "who widened/narrowed web reach" - // without leaking the allowlist contents. - let after_count = config.http_request.allowed_domains.len(); - let after_allow_all = config.http_request.allowed_domains.iter().any(|d| d == "*"); - tracing::info!( - before_count, - after_count, - before_allow_all, - after_allow_all, - "[config] http_request.allowed_domains updated" - ); - } - config.save().await.map_err(|e| e.to_string())?; - let snapshot = snapshot_config_json(config)?; - Ok(RpcOutcome::new( - snapshot, - vec![format!( - "search settings saved to {}", - config.config_path.display() - )], - )) -} - -pub async fn load_and_apply_search_settings( - update: SearchSettingsPatch, -) -> Result, String> { - let mut config = load_config_with_timeout().await?; - apply_search_settings(&mut config, update).await -} - -/// Read the current search engine settings (with API keys redacted to a -/// presence boolean so the UI can show "configured" without ever rendering -/// the raw secret). -pub async fn get_search_settings() -> Result, String> { - let config = load_config_with_timeout().await?; - let result = serde_json::json!({ - "engine": config.search.requested_engine_str(), - "effective_engine": match config.search.effective_engine() { - crate::openhuman::config::SearchEngine::Disabled => "disabled", - crate::openhuman::config::SearchEngine::Managed => "managed", - crate::openhuman::config::SearchEngine::Parallel => "parallel", - crate::openhuman::config::SearchEngine::Brave => "brave", - crate::openhuman::config::SearchEngine::Querit => "querit", - }, - "max_results": config.search.max_results, - "timeout_secs": config.search.timeout_secs, - "parallel_configured": config.search.parallel.has_key(), - "brave_configured": config.search.brave.has_key(), - "querit_configured": config.search.querit.has_key(), - "allowed_domains": config.http_request.allowed_domains, - "allow_all": config.http_request.allowed_domains.iter().any(|d| d == "*"), - }); - Ok(RpcOutcome::new( - result, - vec!["search settings read".to_string()], - )) -} - -/// Reads dashboard settings exposed to the desktop UI. -pub async fn get_dashboard_settings() -> Result, String> { - let request_id = uuid::Uuid::new_v4().to_string(); - tracing::debug!( - target: "openhuman_core::config", - request_id = %request_id, - method = "openhuman.config_get_dashboard_settings", - "OPENHUMAN: get_dashboard_settings entry" - ); - tracing::debug!( - target: "openhuman_core::config", - request_id = %request_id, - method = "openhuman.config_get_dashboard_settings", - "OPENHUMAN: get_dashboard_settings loading config" - ); - - let config = load_config_with_timeout().await.map_err(|error| { - tracing::warn!( - target: "openhuman_core::config", - request_id = %request_id, - method = "openhuman.config_get_dashboard_settings", - error = %error, - "OPENHUMAN: get_dashboard_settings config load failed" - ); - error - })?; - - tracing::debug!( - target: "openhuman_core::config", - request_id = %request_id, - method = "openhuman.config_get_dashboard_settings", - "OPENHUMAN: get_dashboard_settings serializing dashboard settings" - ); - let result = serde_json::to_value(&config.dashboard).map_err(|error| { - let message = error.to_string(); - tracing::warn!( - target: "openhuman_core::config", - request_id = %request_id, - method = "openhuman.config_get_dashboard_settings", - error = %message, - "OPENHUMAN: get_dashboard_settings serialization failed" - ); - message - })?; - - tracing::debug!( - target: "openhuman_core::config", - request_id = %request_id, - method = "openhuman.config_get_dashboard_settings", - "OPENHUMAN: get_dashboard_settings exit" - ); - Ok(RpcOutcome::new( - result, - vec!["dashboard settings read".to_string()], - )) -} - -/// Loads the configuration, applies browser settings updates, and saves it. -pub async fn load_and_apply_browser_settings( - update: BrowserSettingsPatch, -) -> Result, String> { - let mut config = load_config_with_timeout().await?; - apply_browser_settings(&mut config, update).await -} - -/// Updates the local-AI runtime + per-feature usage flags in the configuration. -pub async fn apply_local_ai_settings( - config: &mut Config, - update: LocalAiSettingsPatch, -) -> Result, String> { - if let Some(v) = update.runtime_enabled { - config.local_ai.runtime_enabled = v; - } - if let Some(v) = update.opt_in_confirmed { - config.local_ai.opt_in_confirmed = v; - } - if let Some(provider) = update.provider { - config.local_ai.provider = - crate::openhuman::inference::local::provider::normalize_provider(&provider); - } - if let Some(base_url) = update.base_url { - config.local_ai.base_url = match base_url { - None => None, - Some(base_url) if base_url.trim().is_empty() => None, - Some(base_url) - if crate::openhuman::inference::local::provider::provider_from_config(config) - == crate::openhuman::inference::local::provider::LocalAiProvider::Ollama => - { - Some(crate::openhuman::inference::local::validate_ollama_url( - &base_url, - )?) - } - Some(base_url) => Some(base_url.trim().trim_end_matches('/').to_string()), - }; - } - if let Some(model_id) = update.model_id { - config.local_ai.model_id = model_id.trim().to_string(); - } - if let Some(chat_model_id) = update.chat_model_id { - config.local_ai.chat_model_id = chat_model_id.trim().to_string(); - } - if let Some(v) = update.usage_embeddings { - config.local_ai.usage.embeddings = v; - } - if let Some(v) = update.usage_heartbeat { - config.local_ai.usage.heartbeat = v; - } - if let Some(v) = update.usage_learning_reflection { - config.local_ai.usage.learning_reflection = v; - } - if let Some(v) = update.usage_subconscious { - config.local_ai.usage.subconscious = v; - } - config.save().await.map_err(|e| e.to_string())?; - let snapshot = snapshot_config_json(config)?; - Ok(RpcOutcome::new( - snapshot, - vec![format!( - "local AI settings saved to {}", - config.config_path.display() - )], - )) -} - -/// Loads the configuration, applies local-AI settings updates, and saves it. -pub async fn load_and_apply_local_ai_settings( - update: LocalAiSettingsPatch, -) -> Result, String> { - let mut config = load_config_with_timeout().await?; - apply_local_ai_settings(&mut config, update).await -} - -/// Updates the Composio trigger-triage settings in the configuration. -pub async fn apply_composio_trigger_settings( - config: &mut Config, - update: ComposioTriggerSettingsPatch, -) -> Result, String> { - if let Some(v) = update.triage_disabled { - config.composio.triage_disabled = v; - tracing::debug!( - triage_disabled = v, - "[config][composio] triage_disabled updated" - ); - } - if let Some(toolkits) = update.triage_disabled_toolkits { - tracing::debug!( - count = toolkits.len(), - "[config][composio] triage_disabled_toolkits updated" - ); - config.composio.triage_disabled_toolkits = toolkits; - } - config.save().await.map_err(|e| e.to_string())?; - let snapshot = snapshot_config_json(config)?; - Ok(RpcOutcome::new( - snapshot, - vec![format!( - "composio trigger settings saved to {}", - config.config_path.display() - )], - )) -} - -/// Loads the configuration, applies composio trigger settings, and saves it. -pub async fn load_and_apply_composio_trigger_settings( - update: ComposioTriggerSettingsPatch, -) -> Result, String> { - let mut config = load_config_with_timeout().await?; - apply_composio_trigger_settings(&mut config, update).await -} - -/// Reads the current composio trigger-triage settings. -pub async fn get_composio_trigger_settings() -> Result, String> { - let config = load_config_with_timeout().await?; - let result = serde_json::json!({ - "triage_disabled": config.composio.triage_disabled, - "triage_disabled_toolkits": config.composio.triage_disabled_toolkits, - }); - Ok(RpcOutcome::new( - result, - vec!["composio trigger settings read".to_string()], - )) -} - -/// Resolves the effective API URL from configuration or defaults. -pub async fn load_and_resolve_api_url() -> Result, String> { - let config = load_config_with_timeout().await?; - let resolved = crate::api::config::effective_api_url(&config.api_url); - Ok(RpcOutcome::new(json!({ "api_url": resolved }), Vec::new())) -} - -/// Resolves a workspace onboarding flag, creating or checking its existence. -pub async fn workspace_onboarding_flag_resolve( - flag_name: Option, - default_name: &str, -) -> Result, String> { - let name = flag_name.unwrap_or_else(|| default_name.to_string()); - let trimmed = name.trim(); - if trimmed.is_empty() - || trimmed.contains('/') - || trimmed.contains('\\') - || trimmed.contains("..") - { - return Err("Invalid onboarding flag name".to_string()); - } - let workspace_dir = match load_config_with_timeout().await { - Ok(cfg) => cfg.workspace_dir, - Err(_) => fallback_workspace_dir(), - }; - workspace_onboarding_flag_exists(workspace_dir, trimmed) -} - -/// Returns the current state of runtime-only flags. -pub fn get_runtime_flags() -> RpcOutcome { - RpcOutcome::single_log(runtime_flags(), "runtime flags read") -} - -fn runtime_flags() -> RuntimeFlagsOut { - RuntimeFlagsOut { - browser_allow_all: env_flag_enabled(BROWSER_ALLOW_ALL_ENV), - log_prompts: env_flag_enabled("OPENHUMAN_LOG_PROMPTS"), - } -} - -/// Updates the `OPENHUMAN_BROWSER_ALLOW_ALL` environment flag. -/// -/// **Security note:** when enabled, this disables the browser tool's -/// per-domain allowlist for the entire process. Both transitions are -/// audit-logged at WARN level with a `[SECURITY]` prefix so operators -/// (and `journalctl -g '\[SECURITY\]'` style scrapes) can spot -/// allowlist toggles in the live log stream. -/// -/// `is_private_host` checks still apply to the resolved IP, so this -/// flag does not unlock loopback / RFC1918 destinations. -pub fn set_browser_allow_all(enabled: bool) -> Result, String> { - if enabled && !env_flag_enabled(BROWSER_ALLOW_ALL_RPC_ENABLE_ENV) { - tracing::warn!( - "[SECURITY] refused browser allow-all enable via RPC: \ - set {BROWSER_ALLOW_ALL_ENV}=1 at startup or explicitly set \ - {BROWSER_ALLOW_ALL_RPC_ENABLE_ENV}=1 before using the runtime toggle" - ); - return Err(format!( - "Refusing to enable {BROWSER_ALLOW_ALL_ENV} via RPC. Start OpenHuman with \ - {BROWSER_ALLOW_ALL_ENV}=1, or set {BROWSER_ALLOW_ALL_RPC_ENABLE_ENV}=1 for an \ - explicit operator-approved runtime override." - )); - } - - let was_enabled = env_flag_enabled(BROWSER_ALLOW_ALL_ENV); - if enabled { - unsafe { - std::env::set_var(BROWSER_ALLOW_ALL_ENV, "1"); - } - } else { - unsafe { - std::env::remove_var(BROWSER_ALLOW_ALL_ENV); - } - } - let flags = runtime_flags(); - let now_enabled = flags.browser_allow_all; - - if was_enabled != now_enabled { - if now_enabled { - tracing::warn!( - "[SECURITY] browser allow-all enabled via RPC: \ - per-domain allowlist is now bypassed for all sessions \ - (private-host check still applies)" - ); - } else { - tracing::info!( - "[SECURITY] browser allow-all disabled via RPC: \ - per-domain allowlist re-enforced" - ); - } - } - - let log_msg = if now_enabled { - "[SECURITY] browser allow-all flag set to enabled" - } else { - "[SECURITY] browser allow-all flag set to disabled" - }; - Ok(RpcOutcome::single_log(flags, log_msg)) -} - -/// Checks if a specific onboarding flag file exists in the workspace. -pub fn workspace_onboarding_flag_exists( - workspace_dir: PathBuf, - flag_name: &str, -) -> Result, String> { - let trimmed = flag_name.trim(); - if trimmed.is_empty() - || trimmed.contains('/') - || trimmed.contains('\\') - || trimmed.contains("..") - { - return Err("Invalid onboarding flag name".to_string()); - } - Ok(RpcOutcome::single_log( - workspace_dir.join(trimmed).is_file(), - "onboarding flag checked", - )) -} - -/// Creates or removes an onboarding flag file in the workspace. -pub async fn workspace_onboarding_flag_set( - flag_name: Option, - default_name: &str, - value: bool, -) -> Result, String> { - let name = flag_name.unwrap_or_else(|| default_name.to_string()); - let trimmed = name.trim(); - if trimmed.is_empty() - || trimmed.contains('/') - || trimmed.contains('\\') - || trimmed.contains("..") - { - return Err("Invalid onboarding flag name".to_string()); - } - let workspace_dir = match load_config_with_timeout().await { - Ok(cfg) => cfg.workspace_dir, - Err(_) => fallback_workspace_dir(), - }; - let flag_path = workspace_dir.join(trimmed); - if value { - if let Some(parent) = flag_path.parent() { - std::fs::create_dir_all(parent) - .map_err(|e| format!("Failed to create workspace dir: {e}"))?; - } - std::fs::write(&flag_path, "") - .map_err(|e| format!("Failed to create onboarding flag: {e}"))?; - } else if flag_path.is_file() { - std::fs::remove_file(&flag_path) - .map_err(|e| format!("Failed to remove onboarding flag: {e}"))?; - } - Ok(RpcOutcome::single_log( - flag_path.is_file(), - "onboarding flag updated", - )) -} - -/// Returns whether the onboarding process has been marked as completed. -pub async fn get_onboarding_completed() -> Result, String> { - let config = load_config_with_timeout().await?; - Ok(RpcOutcome::single_log( - config.onboarding_completed, - "onboarding_completed read from config", - )) -} - -/// Updates and persists the onboarding completion status. -/// -/// On a false→true transition, seeds the recurring morning-briefing -/// cron job via [`crate::openhuman::cron::seed::seed_proactive_agents`]. -pub async fn set_onboarding_completed(value: bool) -> Result, String> { - tracing::debug!(value, "[onboarding] set_onboarding_completed called"); - let mut config = load_config_with_timeout().await?; - let was_completed = config.onboarding_completed; - config.onboarding_completed = value; - - config.save().await.map_err(|e| e.to_string())?; - - if value && !was_completed { - tracing::debug!( - "[onboarding] false→true transition detected — seeding cron jobs (welcome is renderer-triggered)" - ); - let seed_config = config.clone(); - tokio::task::spawn_blocking(move || { - if let Err(e) = crate::openhuman::cron::seed::seed_proactive_agents(&seed_config) { - tracing::warn!("[onboarding] failed to seed proactive agent cron jobs: {e}"); - } - }); - } else { - tracing::debug!( - was_completed, - value, - "[onboarding] no transition — skipping proactive seeding" - ); - } - - Ok(RpcOutcome::single_log( - config.onboarding_completed, - "onboarding_completed saved to config", - )) -} - -// ── Dictation settings ─────────────────────────────────────────────── - -/// Represents a partial update to dictation-related settings. -pub struct DictationSettingsPatch { - pub enabled: Option, - pub hotkey: Option, - pub activation_mode: Option, - pub llm_refinement: Option, - pub streaming: Option, - pub streaming_interval_ms: Option, -} - -/// Returns the current dictation settings as a JSON object. -pub async fn get_dictation_settings() -> Result, String> { - let config = load_config_with_timeout().await?; - let result = json!({ - "enabled": config.dictation.enabled, - "hotkey": config.dictation.hotkey, - "activation_mode": config.dictation.activation_mode, - "llm_refinement": config.dictation.llm_refinement, - "streaming": config.dictation.streaming, - "streaming_interval_ms": config.dictation.streaming_interval_ms, - }); - Ok(RpcOutcome::new( - result, - vec!["dictation settings read".to_string()], - )) -} - -/// Loads configuration, applies dictation settings updates, and saves it. -pub async fn load_and_apply_dictation_settings( - update: DictationSettingsPatch, -) -> Result, String> { - let mut config = load_config_with_timeout().await?; - if let Some(enabled) = update.enabled { - config.dictation.enabled = enabled; - } - if let Some(hotkey) = update.hotkey { - config.dictation.hotkey = hotkey; - } - if let Some(mode) = update.activation_mode { - match mode.as_str() { - "toggle" => { - config.dictation.activation_mode = - crate::openhuman::config::DictationActivationMode::Toggle; - } - "push" => { - config.dictation.activation_mode = - crate::openhuman::config::DictationActivationMode::Push; - } - _ => { - return Err(format!( - "invalid activation_mode: {mode} (valid: toggle, push)" - )) - } - } - } - if let Some(llm_refinement) = update.llm_refinement { - config.dictation.llm_refinement = llm_refinement; - } - if let Some(streaming) = update.streaming { - config.dictation.streaming = streaming; - } - if let Some(interval) = update.streaming_interval_ms { - config.dictation.streaming_interval_ms = interval; - } - config.save().await.map_err(|e| e.to_string())?; - let snapshot = snapshot_config_json(&config)?; - Ok(RpcOutcome::new( - snapshot, - vec![format!( - "dictation settings saved to {}", - config.config_path.display() - )], - )) -} - -// ── Voice server settings ─────────────────────────────────────────── - -/// Represents a partial update to voice server related settings. -pub struct VoiceServerSettingsPatch { - pub auto_start: Option, - pub hotkey: Option, - pub activation_mode: Option, - pub skip_cleanup: Option, - pub min_duration_secs: Option, - pub silence_threshold: Option, - pub custom_dictionary: Option>, - pub always_on_enabled: Option, - pub wake_word: Option, -} - -/// Returns the current voice server settings as a JSON object. -pub async fn get_voice_server_settings() -> Result, String> { - let config = load_config_with_timeout().await?; - let result = json!({ - "auto_start": config.voice_server.auto_start, - "hotkey": config.voice_server.hotkey, - "activation_mode": config.voice_server.activation_mode, - "skip_cleanup": config.voice_server.skip_cleanup, - "min_duration_secs": config.voice_server.min_duration_secs, - "silence_threshold": config.voice_server.silence_threshold, - "custom_dictionary": config.voice_server.custom_dictionary, - "always_on_enabled": config.voice_server.always_on_enabled, - "wake_word": config.voice_server.wake_word, - }); - Ok(RpcOutcome::new( - result, - vec!["voice server settings read".to_string()], - )) -} - -/// Loads configuration, applies voice server settings updates, and saves it. -pub async fn load_and_apply_voice_server_settings( - update: VoiceServerSettingsPatch, -) -> Result, String> { - let mut config = load_config_with_timeout().await?; - if let Some(auto_start) = update.auto_start { - config.voice_server.auto_start = auto_start; - } - if let Some(hotkey) = update.hotkey { - config.voice_server.hotkey = hotkey; - } - if let Some(mode) = update.activation_mode { - match mode.as_str() { - "tap" => { - config.voice_server.activation_mode = - crate::openhuman::config::VoiceActivationMode::Tap; - } - "push" => { - config.voice_server.activation_mode = - crate::openhuman::config::VoiceActivationMode::Push; - } - _ => { - return Err(format!( - "invalid activation_mode: {mode} (valid: tap, push)" - )) - } - } - } - if let Some(skip_cleanup) = update.skip_cleanup { - config.voice_server.skip_cleanup = skip_cleanup; - } - if let Some(min_duration_secs) = update.min_duration_secs { - config.voice_server.min_duration_secs = min_duration_secs.max(0.0); - } - if let Some(silence_threshold) = update.silence_threshold { - config.voice_server.silence_threshold = silence_threshold.max(0.0); - } - if let Some(custom_dictionary) = update.custom_dictionary { - config.voice_server.custom_dictionary = custom_dictionary; - } - if let Some(always_on_enabled) = update.always_on_enabled { - config.voice_server.always_on_enabled = always_on_enabled; - } - if let Some(wake_word) = update.wake_word { - // Trim so a whitespace-only value collapses to the documented - // "empty = no wake word" case rather than a non-empty no-match token. - config.voice_server.wake_word = wake_word.trim().to_string(); - } - config.save().await.map_err(|e| e.to_string())?; - let snapshot = snapshot_config_json(&config)?; - Ok(RpcOutcome::new( - snapshot, - vec![format!( - "voice server settings saved to {}", - config.config_path.display() - )], - )) -} - -/// Returns the operational status of the agent server. -pub fn agent_server_status() -> RpcOutcome { - let running = crate::openhuman::service::mock::mock_agent_running().unwrap_or(true); - log::info!("[config] agent_server_status requested: running={running}"); - let payload = json!({ - "running": running, - "url": core_rpc_url_from_env(), - }); - RpcOutcome::single_log(payload, "agent server status checked") -} - -/// Deletes all local data directories and workspace markers. -/// -/// Runs **inside the core's tokio task**, which means the running core -/// holds open handles to SQLite databases, log files, the Sentry session -/// store, etc. On Windows, `remove_dir_all` therefore fails with -/// `ERROR_SHARING_VIOLATION` (os error 32) — see OPENHUMAN-TAURI-AF. -/// -/// GUI callers must use the Tauri-side `reset_local_data` command instead: -/// it stops the embedded core via `CoreProcessHandle::shutdown` (dropping -/// the file handles), removes the directories from the Tauri host process, -/// and restarts the core. This JSON-RPC method is kept for headless / CLI -/// callers where in-process removal is acceptable (POSIX file semantics -/// tolerate unlinking open files; on Windows the CLI invocation runs -/// without the core attached, so no handle is in the way). -pub async fn reset_local_data() -> Result, String> { - let config = load_config_with_timeout().await?; - let current_openhuman_dir = config_openhuman_dir(&config); - let default_openhuman_dir = default_openhuman_dir(); - reset_local_data_for_paths(¤t_openhuman_dir, &default_openhuman_dir).await -} - -/// Reports the resolved paths that `reset_local_data` would remove, without -/// performing any filesystem changes. -/// -/// Lets the Tauri-side `reset_local_data` command discover the active -/// workspace dir, the default `~/.openhuman` dir (which can differ when -/// `OPENHUMAN_WORKSPACE` is set or a staging build is in use), and the -/// active workspace marker file **before** the core sidecar is shut down — -/// after which the Tauri shell removes them while no process holds open -/// handles. See OPENHUMAN-TAURI-AF for the Windows file-locking failure -/// that motivated the split. -pub async fn get_data_paths() -> Result, String> { - let config = load_config_with_timeout().await?; - let current_openhuman_dir = config_openhuman_dir(&config); - let default_openhuman_dir = default_openhuman_dir(); - let active_workspace_marker = active_workspace_marker_path(&default_openhuman_dir); - Ok(RpcOutcome::new( - json!({ - "current_openhuman_dir": current_openhuman_dir.display().to_string(), - "default_openhuman_dir": default_openhuman_dir.display().to_string(), - "active_workspace_marker_path": active_workspace_marker.display().to_string(), - }), - vec![format!( - "data paths resolved (current={}, default={})", - current_openhuman_dir.display(), - default_openhuman_dir.display() - )], - )) -} - -/// Reports the agent's filesystem roots so the UI can render them live -/// instead of hard-coding strings that drift away from `Config`. -/// -/// Returns three string paths: -/// -/// * `action_dir` — the agent's read/write root (`Config.action_dir`). -/// Defaults to `default_action_dir()` (`~/OpenHuman/projects` via -/// `default_projects_dir()`); overridable via `OPENHUMAN_ACTION_DIR`. -/// Acting tools (`shell`, `node_exec`, `npm_exec`, `file_write`, -/// `edit_file`, `apply_patch`, `git_operations`) default their CWD here. -/// * `workspace_dir` — internal product state (`Config.workspace_dir`, -/// typically `~/.openhuman/users//workspace`). Agent-blocked via -/// [`SecurityPolicy::is_workspace_internal_path`]. -/// * `projects_dir` — the default projects home -/// (`default_projects_dir()`, `~/OpenHuman/projects`), injected as a -/// ReadWrite trusted root at startup. Same as `action_dir` when the -/// user hasn't set `OPENHUMAN_ACTION_DIR`. -/// * `action_dir_source` — `"env"` / `"override"` / `"default"`, so the UI can -/// gate editability (env-pinned ⇒ read-only). -/// -/// Distinct from [`get_data_paths`], which reports the `openhuman_dir` -/// roots that `reset_local_data` would remove and is consumed only by -/// the Tauri reset flow. -pub async fn get_agent_paths() -> Result, String> { - let config = load_config_with_timeout().await?; - Ok(RpcOutcome::new( - agent_paths_payload(&config), - vec![format!( - "agent paths resolved (action={}, workspace={}, source={})", - config.action_dir.display(), - config.workspace_dir.display(), - action_dir_source(&config), - )], - )) -} - -// ── Sandbox settings ───────────────────────────────────────────────────────── - -/// Partial update for the `[security.sandbox]` + `[runtime.docker]` blocks. -#[derive(Debug, Clone, Default)] -pub struct SandboxSettingsPatch { - pub backend: Option, - pub enabled: Option, - pub docker_image: Option, - pub docker_memory_limit_mb: Option, - pub docker_cpu_limit: Option, - pub env_passthrough: Option>, -} - -pub async fn get_sandbox_settings() -> Result, String> { - let config = load_config_with_timeout().await?; - let sandbox = &config.sandbox; - let docker = &config.runtime.docker; - - let docker_available = is_docker_available().await; - - let backend_str = match sandbox.backend { - crate::openhuman::config::SandboxBackend::Auto => "auto", - crate::openhuman::config::SandboxBackend::Landlock => "landlock", - crate::openhuman::config::SandboxBackend::Firejail => "firejail", - crate::openhuman::config::SandboxBackend::Bubblewrap => "bubblewrap", - crate::openhuman::config::SandboxBackend::Docker => "docker", - crate::openhuman::config::SandboxBackend::None => "none", - }; - - let detected_backend = detect_os_sandbox_backend(); - - let value = json!({ - "enabled": sandbox.enabled.unwrap_or(true), - "backend": backend_str, - "docker_image": docker.image, - "docker_memory_limit_mb": docker.memory_limit_mb, - "docker_cpu_limit": docker.cpu_limit, - "docker_available": docker_available, - "detected_backend": detected_backend, - "env_passthrough": crate::openhuman::sandbox::ops::SANDBOX_ENV_PASSTHROUGH, - }); - log::debug!("[config][sandbox] get_sandbox_settings: backend={backend_str}, docker_available={docker_available}"); - Ok(RpcOutcome::single_log(value, "sandbox settings read")) -} - -pub async fn apply_sandbox_settings( - config: &mut Config, - update: SandboxSettingsPatch, -) -> Result, String> { - if let Some(ref backend) = update.backend { - config.sandbox.backend = match backend.as_str() { - "auto" => crate::openhuman::config::SandboxBackend::Auto, - "landlock" => crate::openhuman::config::SandboxBackend::Landlock, - "firejail" => crate::openhuman::config::SandboxBackend::Firejail, - "bubblewrap" => crate::openhuman::config::SandboxBackend::Bubblewrap, - "docker" => crate::openhuman::config::SandboxBackend::Docker, - "none" => crate::openhuman::config::SandboxBackend::None, - other => { - log::warn!("[config][sandbox] rejected unknown backend: {other}"); - return Err(format!( - "unknown sandbox backend '{other}'; valid: auto, landlock, firejail, bubblewrap, docker, none" - )); - } - }; - } - if let Some(enabled) = update.enabled { - config.sandbox.enabled = Some(enabled); - } - if let Some(ref image) = update.docker_image { - let trimmed = image.trim(); - if trimmed.is_empty() { - return Err("docker_image must not be blank".into()); - } - config.runtime.docker.image = trimmed.to_string(); - } - if let Some(memory) = update.docker_memory_limit_mb { - config.runtime.docker.memory_limit_mb = Some(memory); - } - if let Some(cpu) = update.docker_cpu_limit { - if cpu <= 0.0 { - return Err("docker_cpu_limit must be positive".into()); - } - config.runtime.docker.cpu_limit = Some(cpu); - } - if let Some(ref passthrough) = update.env_passthrough { - log::debug!( - "[config][sandbox] env_passthrough update: {} vars", - passthrough.len() - ); - } - - config.save().await.map_err(|e| e.to_string())?; - - log::debug!( - "[config][sandbox] sandbox settings saved to {}", - config.config_path.display() - ); - let snapshot = snapshot_config_json(config)?; - Ok(RpcOutcome::new( - snapshot, - vec![format!( - "sandbox settings saved to {}", - config.config_path.display() - )], - )) -} - -pub async fn load_and_apply_sandbox_settings( - update: SandboxSettingsPatch, -) -> Result, String> { - let mut config = load_config_with_timeout().await?; - apply_sandbox_settings(&mut config, update).await -} - -async fn is_docker_available() -> bool { - let fut = tokio::process::Command::new("docker") - .arg("info") - .stdout(std::process::Stdio::null()) - .stderr(std::process::Stdio::null()) - .status(); - match tokio::time::timeout(std::time::Duration::from_secs(5), fut).await { - Ok(Ok(status)) => status.success(), - _ => false, - } -} - -fn detect_os_sandbox_backend() -> &'static str { - #[cfg(target_os = "linux")] - { - if std::path::Path::new("/sys/kernel/security/landlock").exists() { - return "landlock"; - } - if std::process::Command::new("firejail") - .arg("--version") - .stdout(std::process::Stdio::null()) - .stderr(std::process::Stdio::null()) - .status() - .is_ok() - { - return "firejail"; - } - if std::process::Command::new("bwrap") - .arg("--version") - .stdout(std::process::Stdio::null()) - .stderr(std::process::Stdio::null()) - .status() - .is_ok() - { - return "bubblewrap"; - } - "none" - } - #[cfg(target_os = "macos")] - { - "seatbelt" - } - #[cfg(target_os = "windows")] - { - "appcontainer" - } - #[cfg(not(any(target_os = "linux", target_os = "macos", target_os = "windows")))] - { - "none" - } -} - -#[cfg(test)] -#[path = "ops_tests.rs"] -mod tests; diff --git a/src/openhuman/config/ops/agent.rs b/src/openhuman/config/ops/agent.rs new file mode 100644 index 0000000000..1b06dcefab --- /dev/null +++ b/src/openhuman/config/ops/agent.rs @@ -0,0 +1,656 @@ +//! Agent, autonomy, paths, activity-level, and memory-sync config operations. + +use std::path::{Path, PathBuf}; + +use serde_json::json; + +use crate::openhuman::config::Config; +use crate::rpc::RpcOutcome; + +use super::loader::{load_config_with_timeout, snapshot_config_json}; + +/// Partial update for the `[autonomy]` block — the agent's filesystem access +/// mode. Each `None` field is left unchanged. `trusted_roots`, `allowed_commands`, +/// `forbidden_paths`, and `auto_approve`, when `Some`, REPLACE the corresponding +/// array wholesale. +#[derive(Debug, Clone, Default)] +pub struct AutonomySettingsPatch { + /// `"readonly" | "supervised" | "full"` (case-insensitive). + pub level: Option, + pub workspace_only: Option, + pub allowed_commands: Option>, + pub forbidden_paths: Option>, + pub trusted_roots: Option>, + pub allow_tool_install: Option, + pub max_actions_per_hour: Option, + /// "Always allow" allowlist — tool names the gate skips prompting for. + pub auto_approve: Option>, + pub require_task_plan_approval: Option, +} + +/// Partial update for the `[agent]` block. Currently carries the single +/// user-facing `agent_timeout_secs` knob (the tool/action wall-clock timeout); +/// other `AgentConfig` fields are not yet UI-exposed. `None` leaves the value +/// unchanged. +#[derive(Debug, Clone, Default)] +pub struct AgentSettingsPatch { + /// Tool/action wall-clock timeout in seconds. Validated to + /// `tool_timeout::MIN_TIMEOUT_SECS..=tool_timeout::MAX_TIMEOUT_SECS`. + pub agent_timeout_secs: Option, +} + +/// Partial update for the agent's editable filesystem roots. +/// +/// Only `action_dir` is editable today (issue #3240). `workspace_dir` and +/// `projects_dir` are intentionally read-only and not part of this patch. +#[derive(Debug, Clone, Default)] +pub struct AgentPathsPatch { + /// New action sandbox root. `Some("")`/whitespace clears the override and + /// reverts to the default; `Some(path)` sets it; `None` leaves it unchanged. + pub action_dir: Option, +} + +/// Partial update for the agent activity level (0–4). +#[derive(Debug, Clone, Default)] +pub struct ActivityLevelSettingsPatch { + /// "off" | "minimal" | "moderate" | "active" | "always_on" (or "0"-"4"). + pub level: Option, +} + +/// Patch for the global memory-sync cadence (#3302). +/// +/// `sync_interval_secs` carries the new value to store in +/// [`Config::memory_sync_interval_secs`]: +/// - omitted / `null` → reset to "use the default cadence" (`None`) +/// - `0` → "Manual only" (periodic auto-sync disabled) +/// - `n > 0` → sync every `n` seconds (applied per source as a floor over the +/// provider default by the scheduler) +#[derive(Debug, Default)] +pub struct MemorySyncSettingsPatch { + pub sync_interval_secs: Option, +} + +/// Updates the `[autonomy]` (agent access mode) settings in the configuration. +/// +/// After saving, publishes a `DomainEvent::System(AutonomyConfigChanged)` so that +/// live agent sessions can rebuild their `SecurityPolicy` without a core restart +/// (see `channels::runtime`). Returns the updated config snapshot. +pub async fn apply_autonomy_settings( + config: &mut Config, + update: AutonomySettingsPatch, +) -> Result, String> { + use crate::openhuman::security::AutonomyLevel; + + if let Some(level) = update.level { + config.autonomy.level = match level.trim().to_ascii_lowercase().as_str() { + "readonly" | "read_only" | "read-only" => AutonomyLevel::ReadOnly, + "supervised" => AutonomyLevel::Supervised, + "full" => AutonomyLevel::Full, + other => { + return Err(format!( + "invalid autonomy level '{other}' (expected readonly | supervised | full)" + )) + } + }; + } + if let Some(workspace_only) = update.workspace_only { + config.autonomy.workspace_only = workspace_only; + } + if let Some(allowed_commands) = update.allowed_commands { + config.autonomy.allowed_commands = allowed_commands; + } + if let Some(forbidden_paths) = update.forbidden_paths { + config.autonomy.forbidden_paths = forbidden_paths; + } + if let Some(trusted_roots) = update.trusted_roots { + config.autonomy.trusted_roots = trusted_roots; + } + if let Some(allow_tool_install) = update.allow_tool_install { + config.autonomy.allow_tool_install = allow_tool_install; + } + if let Some(max_actions_per_hour) = update.max_actions_per_hour { + if max_actions_per_hour == 0 { + return Err(format!( + "max_actions_per_hour must be at least 1 (got {max_actions_per_hour})" + )); + } + config.autonomy.max_actions_per_hour = max_actions_per_hour; + } + if let Some(auto_approve) = update.auto_approve { + config.autonomy.auto_approve = auto_approve; + } + if let Some(require_task_plan_approval) = update.require_task_plan_approval { + config.autonomy.require_task_plan_approval = require_task_plan_approval; + } + + config.save().await.map_err(|e| e.to_string())?; + + crate::openhuman::security::live_policy::reload_from(&config.autonomy); + crate::core::event_bus::publish_global( + crate::core::event_bus::DomainEvent::AutonomyConfigChanged, + ); + + let snapshot = snapshot_config_json(config)?; + Ok(RpcOutcome::new( + snapshot, + vec![format!( + "autonomy settings saved to {}", + config.config_path.display() + )], + )) +} + +/// Loads the configuration, applies autonomy settings updates, and saves it. +pub async fn load_and_apply_autonomy_settings( + update: AutonomySettingsPatch, +) -> Result, String> { + let mut config = load_config_with_timeout().await?; + apply_autonomy_settings(&mut config, update).await +} + +/// Returns the current `[autonomy]` settings block as JSON (no secrets). +pub async fn get_autonomy_settings() -> Result, String> { + let config = load_config_with_timeout().await?; + let value = serde_json::to_value(&config.autonomy).map_err(|e| e.to_string())?; + Ok(RpcOutcome::single_log(value, "autonomy settings read")) +} + +fn auto_approve_write_lock() -> &'static tokio::sync::Mutex<()> { + static LOCK: std::sync::OnceLock> = std::sync::OnceLock::new(); + LOCK.get_or_init(|| tokio::sync::Mutex::new(())) +} + +/// Append `tool_name` to `autonomy.auto_approve` ("Always allow") and persist + +/// reload the live policy. Idempotent — a no-op (no disk write) when the tool is +/// already allow-listed. Backs the `ApproveAlwaysForTool` approval decision. +pub async fn add_auto_approve_tool(tool_name: &str) -> Result<(), String> { + let _guard = auto_approve_write_lock().lock().await; + let mut config = load_config_with_timeout().await?; + if config.autonomy.auto_approve.iter().any(|t| t == tool_name) { + tracing::debug!( + tool = tool_name, + "[config:auto_approve] tool already allow-listed; nothing to persist" + ); + return Ok(()); + } + let mut next = config.autonomy.auto_approve.clone(); + next.push(tool_name.to_string()); + let patch = AutonomySettingsPatch { + auto_approve: Some(next), + ..AutonomySettingsPatch::default() + }; + apply_autonomy_settings(&mut config, patch) + .await + .map(|_| ()) +} + +/// Updates the `[agent]` block (currently the `agent_timeout_secs` tool/action +/// wall-clock timeout). +/// +/// After persisting, pushes the new value into the live +/// [`crate::openhuman::tool_timeout`] runtime so subsequent tool calls honour +/// it without a core restart. The `OPENHUMAN_TOOL_TIMEOUT_SECS` env var, when +/// set, still overrides the config value (the push is a no-op in that case). +/// Returns the updated config snapshot. +pub async fn apply_agent_settings( + config: &mut Config, + update: AgentSettingsPatch, +) -> Result, String> { + use crate::openhuman::tool_timeout::{MAX_TIMEOUT_SECS, MIN_TIMEOUT_SECS}; + + if let Some(timeout_secs) = update.agent_timeout_secs { + if !(MIN_TIMEOUT_SECS..=MAX_TIMEOUT_SECS).contains(&timeout_secs) { + log::warn!( + "[config][agent] rejected agent_timeout_secs={timeout_secs} (valid {MIN_TIMEOUT_SECS}..={MAX_TIMEOUT_SECS})" + ); + return Err(format!( + "agent_timeout_secs must be between {MIN_TIMEOUT_SECS} and {MAX_TIMEOUT_SECS} seconds (got {timeout_secs})" + )); + } + config.agent.agent_timeout_secs = timeout_secs; + } + + config.save().await.map_err(|e| e.to_string())?; + + let effective = + crate::openhuman::tool_timeout::set_tool_timeout_secs(config.agent.agent_timeout_secs); + log::debug!( + "[config][agent] agent settings saved; agent_timeout_secs={} effective={}s", + config.agent.agent_timeout_secs, + effective + ); + + let snapshot = snapshot_config_json(config)?; + Ok(RpcOutcome::new( + snapshot, + vec![format!( + "agent settings saved to {}", + config.config_path.display() + )], + )) +} + +/// Loads the configuration, applies agent settings updates, and saves it. +pub async fn load_and_apply_agent_settings( + update: AgentSettingsPatch, +) -> Result, String> { + let mut config = load_config_with_timeout().await?; + apply_agent_settings(&mut config, update).await +} + +/// Returns the agent execution settings (currently the action timeout) plus the +/// runtime-effective value and whether the `OPENHUMAN_TOOL_TIMEOUT_SECS` env var +/// is overriding the configured value, so the UI can explain a no-op control. +pub async fn get_agent_settings() -> Result, String> { + let config = load_config_with_timeout().await?; + crate::openhuman::tool_timeout::set_tool_timeout_secs(config.agent.agent_timeout_secs); + let value = serde_json::json!({ + "agent_timeout_secs": config.agent.agent_timeout_secs, + "effective_timeout_secs": crate::openhuman::tool_timeout::tool_execution_timeout_secs(), + "env_override": crate::openhuman::tool_timeout::env_override_active(), + "min_timeout_secs": crate::openhuman::tool_timeout::MIN_TIMEOUT_SECS, + "max_timeout_secs": crate::openhuman::tool_timeout::MAX_TIMEOUT_SECS, + }); + Ok(RpcOutcome::single_log(value, "agent settings read")) +} + +/// Expand a leading `~/` to the user's home directory, building the path +/// component-by-component so the result uses the platform-native separator +/// throughout. A naive `format!("{}/{rest}", home)` — or even `home.join(rest)` +/// — leaves the embedded `/` inside `rest`, yielding a mixed-separator path like +/// `C:\Users\Harry/OpenHuman/projects` on Windows, which `CreateProcessW` +/// rejects with `ERROR_DIRECTORY` (os error 267) when used as a process CWD. +/// See issue #3353 (RC-B). +/// +/// This is the single source of truth for `~/` expansion; `SecurityPolicy:: +/// expand_tilde` delegates here so policy and config stay byte-for-byte +/// consistent. +pub fn expand_tilde(path: &str) -> String { + let Some(rest) = path.strip_prefix("~/") else { + return path.to_string(); + }; + let Some(home) = dirs::home_dir() else { + return path.to_string(); + }; + let mut out = home; + for part in rest.split('/') { + if !part.is_empty() { + out.push(part); + } + } + out.to_string_lossy().into_owned() +} + +/// Redact a path for logging by replacing the user's home-directory prefix with +/// `~`. Keeps the path *shape* (e.g. `~/OpenHuman/projects`) useful for +/// diagnosis while not leaking the OS username / full home path (PII). Paths +/// outside the home dir are returned unchanged. +pub fn redact_home(path: &Path) -> String { + let s = path.to_string_lossy(); + if let Some(home) = dirs::home_dir() { + let home = home.to_string_lossy(); + if !home.is_empty() { + if let Some(rest) = s.strip_prefix(home.as_ref()) { + return format!("~{rest}"); + } + } + } + s.into_owned() +} + +/// Ensure the agent's action sandbox + default projects home exist and the +/// projects dir is registered as a `ReadWrite` trusted root. Idempotent — safe +/// to call from every boot path (web-chat-only `bootstrap_core_runtime` **and** +/// `start_channels`). +/// +/// Without this on the always-run boot, a fresh desktop install with no +/// messaging integrations leaves `~/OpenHuman/projects` uncreated (the only +/// other creation lived inside the integration-gated `start_channels`), so the +/// shell tool's `current_dir` fails with `ERROR_DIRECTORY` (os error 267) on +/// Windows / `ENOENT` on Unix. See issue #3353 (RC-A). +pub async fn ensure_agent_dirs(config: &mut Config) { + use crate::openhuman::security::{TrustedAccess, TrustedRoot}; + + let projects_dir = crate::openhuman::config::default_projects_dir(); + if let Err(e) = tokio::fs::create_dir_all(&projects_dir).await { + tracing::warn!( + dir = %redact_home(&projects_dir), + error = %e, + "[startup] could not create default projects dir" + ); + } + let projects_path = projects_dir.to_string_lossy().to_string(); + if !config + .autonomy + .trusted_roots + .iter() + .any(|r| r.path == projects_path) + { + config.autonomy.trusted_roots.push(TrustedRoot { + path: projects_path, + access: TrustedAccess::ReadWrite, + }); + } + + let action_dir = config.action_dir.clone(); + if let Err(e) = tokio::fs::create_dir_all(&action_dir).await { + tracing::warn!( + dir = %redact_home(&action_dir), + error = %e, + "[startup] could not create action sandbox dir" + ); + } + tracing::info!( + workspace = %redact_home(&config.workspace_dir), + action = %redact_home(&action_dir), + "[startup] workspace (internal state) and action sandbox (tool cwd) directories configured" + ); +} + +/// Ensure `dir` is usable as a process working directory: it must exist (we +/// attempt to create it if missing — covers a dir deleted after launch) and +/// resolve to a directory. Returns a descriptive error naming the path and the +/// Settings location to fix it, instead of letting the OS surface an opaque +/// `ERROR_DIRECTORY` (os error 267) from `CreateProcessW`. See issue #3353 +/// (Fix 2). Cheap stat-only calls on the happy path. +pub fn ensure_usable_cwd(dir: &Path) -> anyhow::Result<()> { + if !dir.exists() { + std::fs::create_dir_all(dir).map_err(|e| { + anyhow::anyhow!( + "Working directory '{}' does not exist and could not be created: {e}. \ + Set a valid path in Settings → Agent access → Working directory.", + dir.display() + ) + })?; + } + if !dir.is_dir() { + anyhow::bail!( + "Working directory '{}' is not a directory. \ + Set a valid path in Settings → Agent access → Working directory.", + dir.display() + ); + } + Ok(()) +} + +fn action_dir_source(config: &Config) -> &'static str { + if crate::openhuman::config::action_dir_env_override().is_some() { + "env" + } else if config.action_dir_override.is_some() { + "override" + } else { + "default" + } +} + +fn agent_paths_payload(config: &Config) -> serde_json::Value { + let projects_dir = crate::openhuman::config::default_projects_dir(); + json!({ + "action_dir": config.action_dir.display().to_string(), + "workspace_dir": config.workspace_dir.display().to_string(), + "projects_dir": projects_dir.display().to_string(), + "action_dir_source": action_dir_source(config), + }) +} + +/// Applies an edit to the agent's `action_dir` sandbox root. +/// +/// Validation (fail-closed): the path is trimmed and `~`-expanded; it must be +/// **absolute**; it must not be an existing *file*; and it must not equal +/// `workspace_dir` (which holds memory DBs / tokens and must never become the +/// agent-writable root). A missing directory is auto-created (mirroring the +/// startup auto-create in `channels/runtime/startup.rs`). An empty input clears +/// the override and reverts `action_dir` to the default. +/// +/// On success the override is persisted (`action_dir_override`), `action_dir` is +/// recomputed from the precedence chain, the live `SecurityPolicy` is hot-swapped +/// (`live_policy::set_action_dir`), and `DomainEvent::AgentPathsChanged` is +/// published. Returns the same payload shape as [`get_agent_paths`]. +/// +/// When `OPENHUMAN_ACTION_DIR` is set the env var wins: the override is still +/// persisted, but the effective `action_dir` (and the returned `action_dir`) +/// continues to reflect the env value, and `action_dir_source` reports `"env"`. +pub async fn apply_agent_paths_settings( + config: &mut Config, + update: AgentPathsPatch, +) -> Result, String> { + let mut notes: Vec = Vec::new(); + + if let Some(raw) = update.action_dir { + let trimmed = raw.trim(); + log::debug!( + "[config][agent_paths] apply action_dir edit (input_len={})", + trimmed.len() + ); + + if trimmed.is_empty() { + config.action_dir_override = None; + notes.push("action_dir override cleared (reverted to default)".to_string()); + } else { + let expanded = expand_tilde(trimmed); + let candidate = PathBuf::from(&expanded); + + if !candidate.is_absolute() { + return Err(format!( + "action_dir must be an absolute path (got '{expanded}')" + )); + } + + if candidate.is_file() { + return Err(format!( + "action_dir must be a directory, not a file: {expanded}" + )); + } + + if paths_equal(&candidate, &config.workspace_dir) { + return Err( + "action_dir must not equal the internal workspace directory".to_string() + ); + } + + if !candidate.exists() { + tokio::fs::create_dir_all(&candidate) + .await + .map_err(|e| format!("failed to create action_dir {expanded}: {e}"))?; + notes.push(format!("created action_dir {expanded}")); + } + + config.action_dir_override = Some(candidate); + notes.push(format!("action_dir override set to {expanded}")); + } + + config.action_dir = + crate::openhuman::config::resolve_action_dir(&config.action_dir_override); + + config.save().await.map_err(|e| e.to_string())?; + + crate::openhuman::security::live_policy::set_action_dir(config.action_dir.clone()); + crate::core::event_bus::publish_global( + crate::core::event_bus::DomainEvent::AgentPathsChanged, + ); + + log::debug!( + "[config][agent_paths] action_dir now '{}' (source={})", + config.action_dir.display(), + action_dir_source(config) + ); + } + + Ok(RpcOutcome::new(agent_paths_payload(config), notes)) +} + +/// Loads the configuration, applies agent-paths updates, and saves it. +pub async fn load_and_apply_agent_paths_settings( + update: AgentPathsPatch, +) -> Result, String> { + let mut config = load_config_with_timeout().await?; + apply_agent_paths_settings(&mut config, update).await +} + +fn paths_equal(a: &Path, b: &Path) -> bool { + match (a.canonicalize(), b.canonicalize()) { + (Ok(ca), Ok(cb)) => ca == cb, + _ => a == b, + } +} + +/// Reports the agent's filesystem roots so the UI can render them live +/// instead of hard-coding strings that drift away from `Config`. +pub async fn get_agent_paths() -> Result, String> { + let config = load_config_with_timeout().await?; + Ok(RpcOutcome::new( + agent_paths_payload(&config), + vec![format!( + "agent paths resolved (action={}, workspace={}, source={})", + config.action_dir.display(), + config.workspace_dir.display(), + action_dir_source(&config), + )], + )) +} + +/// Returns the current activity level and its derived settings. +pub async fn get_activity_level_settings() -> Result, String> { + let config = load_config_with_timeout().await?; + let level = config.agent_activity_level; + let (cost_min, cost_max) = level.estimated_monthly_cost_range(); + let value = serde_json::json!({ + "level": level as u8, + "level_label": level.as_str(), + "sync_interval_secs": level.sync_interval_secs(), + "heartbeat_enabled": level.heartbeat_enabled(), + "subconscious_enabled": level.subconscious_enabled(), + "token_budget_per_cycle": level.token_budget_per_cycle(), + "estimated_monthly_cost_min_usd": cost_min, + "estimated_monthly_cost_max_usd": cost_max, + }); + Ok(RpcOutcome::single_log( + value, + "activity level settings read", + )) +} + +/// Updates the agent activity level and pushes it into the scheduler gate. +pub async fn apply_activity_level_settings( + config: &mut Config, + update: ActivityLevelSettingsPatch, +) -> Result, String> { + use crate::openhuman::config::schema::activity_level::AgentActivityLevel; + use crate::openhuman::config::SchedulerGateMode; + + if let Some(level_str) = update.level { + let level = AgentActivityLevel::from_str_opt(&level_str).ok_or_else(|| { + format!( + "invalid activity level '{}' \ + (expected off|minimal|moderate|active|always_on or 0-4)", + level_str + ) + })?; + config.agent_activity_level = level; + } + + let level = config.agent_activity_level; + let gate_mode = match level { + AgentActivityLevel::Off => SchedulerGateMode::Off, + AgentActivityLevel::Minimal | AgentActivityLevel::Moderate => SchedulerGateMode::Auto, + AgentActivityLevel::Active | AgentActivityLevel::AlwaysOn => SchedulerGateMode::AlwaysOn, + }; + config.scheduler_gate.mode = gate_mode; + + config.save().await.map_err(|e| e.to_string())?; + + let gate_cfg = config.scheduler_gate.clone(); + crate::openhuman::scheduler_gate::gate::update_config(gate_cfg); + + tracing::info!( + level = %level.as_str(), + gate_mode = %gate_mode.as_str(), + "[config:activity_level] activity level updated" + ); + + let (cost_min, cost_max) = level.estimated_monthly_cost_range(); + let value = serde_json::json!({ + "level": level as u8, + "level_label": level.as_str(), + "sync_interval_secs": level.sync_interval_secs(), + "heartbeat_enabled": level.heartbeat_enabled(), + "subconscious_enabled": level.subconscious_enabled(), + "token_budget_per_cycle": level.token_budget_per_cycle(), + "estimated_monthly_cost_min_usd": cost_min, + "estimated_monthly_cost_max_usd": cost_max, + }); + Ok(RpcOutcome::new( + value, + vec![format!( + "activity level set to '{}' — saved to {}", + level.as_str(), + config.config_path.display() + )], + )) +} + +/// Loads the configuration, applies activity level settings, and saves it. +pub async fn load_and_apply_activity_level_settings( + update: ActivityLevelSettingsPatch, +) -> Result, String> { + let mut config = load_config_with_timeout().await?; + apply_activity_level_settings(&mut config, update).await +} + +fn memory_sync_settings_value(stored: Option) -> serde_json::Value { + let is_manual = stored == Some(0); + let is_default = stored.is_none(); + let selected_secs = + stored.unwrap_or(crate::openhuman::config::DEFAULT_MEMORY_SYNC_INTERVAL_SECS); + json!({ + "sync_interval_secs": stored, + "selected_secs": selected_secs, + "is_manual": is_manual, + "is_default": is_default, + "default_secs": crate::openhuman::config::DEFAULT_MEMORY_SYNC_INTERVAL_SECS, + "presets": crate::openhuman::config::MEMORY_SYNC_INTERVAL_PRESETS_SECS, + }) +} + +/// Returns the current global memory-sync cadence and its derived view. +pub async fn get_memory_sync_settings() -> Result, String> { + let config = load_config_with_timeout().await?; + let value = memory_sync_settings_value(config.memory_sync_interval_secs); + Ok(RpcOutcome::single_log(value, "memory sync settings read")) +} + +/// Updates the global memory-sync cadence and persists it. The running +/// scheduler reads `config.memory_sync_interval_secs` fresh on each tick, so +/// the new cadence takes effect from the next tick without a restart. +pub async fn apply_memory_sync_settings( + config: &mut Config, + update: MemorySyncSettingsPatch, +) -> Result, String> { + config.memory_sync_interval_secs = update.sync_interval_secs; + config.save().await.map_err(|e| e.to_string())?; + + tracing::info!( + sync_interval_secs = ?config.memory_sync_interval_secs, + "[config:memory_sync] memory sync interval updated" + ); + + let stored = config.memory_sync_interval_secs; + let value = memory_sync_settings_value(stored); + let msg = match stored { + Some(0) => "memory sync set to Manual only".to_string(), + Some(n) => format!("memory sync interval set to {n}s"), + None => "memory sync interval reset to default".to_string(), + }; + Ok(RpcOutcome::new( + value, + vec![format!("{msg} — saved to {}", config.config_path.display())], + )) +} + +/// Loads the configuration, applies memory-sync settings, and saves it. +pub async fn load_and_apply_memory_sync_settings( + update: MemorySyncSettingsPatch, +) -> Result, String> { + let mut config = load_config_with_timeout().await?; + apply_memory_sync_settings(&mut config, update).await +} diff --git a/src/openhuman/config/ops/loader.rs b/src/openhuman/config/ops/loader.rs new file mode 100644 index 0000000000..e7b9eb5efc --- /dev/null +++ b/src/openhuman/config/ops/loader.rs @@ -0,0 +1,521 @@ +//! Config loading, snapshotting, and core runtime-flag helpers. + +use std::path::{Path, PathBuf}; + +use serde::Serialize; +use serde_json::json; + +use crate::openhuman::config::Config; +use crate::rpc::RpcOutcome; + +pub(crate) fn env_flag_enabled(key: &str) -> bool { + matches!( + std::env::var(key).ok().as_deref(), + Some("1") | Some("true") | Some("TRUE") | Some("yes") | Some("YES") + ) +} + +/// Returns the core RPC URL from environment variables or a default value. +pub fn core_rpc_url_from_env() -> String { + std::env::var("OPENHUMAN_CORE_RPC_URL") + .unwrap_or_else(|_| "http://127.0.0.1:7788/rpc".to_string()) +} + +pub(super) const CONFIG_LOAD_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(30); + +/// Loads persisted config with a 30s timeout. +/// +/// This is used by JSON-RPC and CLI handlers to ensure they don't hang +/// indefinitely if disk I/O is blocked. +/// +/// The TOML parse itself runs on the blocking pool via +/// `parse_config_with_recovery` (see `src/openhuman/config/schema/load.rs`) +/// so the recursive-descent parser's serde Visitor frames don't compound +/// with whatever deep async tower called us. That's the stack-overflow +/// fix from `crahs.log` (2026-05-17); a per-call cache here would shave +/// the disk read on hot paths but proved racy across the in-process +/// integration tests (re-used workspace paths, concurrent server tasks +/// loading mid-mutation), so it isn't worth it. +pub async fn load_config_with_timeout() -> Result { + match tokio::time::timeout(CONFIG_LOAD_TIMEOUT, Config::load_or_init()).await { + Ok(Ok(mut config)) => { + normalize_loaded_config(&mut config).await; + Ok(config) + } + Ok(Err(e)) => Err(e.to_string()), + Err(_) => Err("Config loading timed out".to_string()), + } +} + +/// Reloads the config file represented by an existing runtime snapshot. +/// +/// Use this for long-lived objects that need fresh config values while +/// staying anchored to their original user/workspace. Unlike +/// [`load_config_with_timeout`], this does not re-resolve the process-global +/// `OPENHUMAN_WORKSPACE` env var on every call. +pub async fn reload_config_snapshot_with_timeout(snapshot: &Config) -> Result { + match tokio::time::timeout( + CONFIG_LOAD_TIMEOUT, + Config::load_from_config_path(&snapshot.config_path, &snapshot.workspace_dir), + ) + .await + { + Ok(Ok(mut config)) => { + normalize_loaded_config(&mut config).await; + Ok(config) + } + Ok(Err(e)) => Err(e.to_string()), + Err(_) => Err("Config loading timed out".to_string()), + } +} + +async fn normalize_loaded_config(_config: &mut Config) { + // No-op: welcome-agent routing normalization removed. The welcome agent + // has been deleted; all chat turns route directly to the orchestrator. + // The `chat_onboarding_completed` field in Config is retained for + // backward-compatible deserialization of existing config.toml files + // but is no longer read by routing logic. +} + +/// Returns the default workspace directory fallback (~/.openhuman/workspace). +pub(crate) fn fallback_workspace_dir() -> PathBuf { + crate::openhuman::config::default_root_openhuman_dir() + .unwrap_or_else(|_| env_scoped_fallback_root_dir()) + .join("workspace") +} + +/// Returns the default OpenHuman configuration directory (~/.openhuman). +pub(crate) fn default_openhuman_dir() -> PathBuf { + crate::openhuman::config::default_root_openhuman_dir() + .unwrap_or_else(|_| env_scoped_fallback_root_dir()) +} + +pub(crate) fn env_scoped_fallback_root_dir() -> PathBuf { + let suffix = if crate::api::config::is_staging_app_env( + crate::api::config::app_env_from_env().as_deref(), + ) { + "-staging" + } else { + "" + }; + PathBuf::from(format!(".openhuman{suffix}")) +} + +/// Returns the path to the active workspace marker file. +pub(crate) fn active_workspace_marker_path(default_openhuman_dir: &Path) -> PathBuf { + default_openhuman_dir.join("active_workspace.toml") +} + +/// Returns the parent directory of the config file. +pub(crate) fn config_openhuman_dir(config: &Config) -> PathBuf { + config + .config_path + .parent() + .map_or_else(|| PathBuf::from("."), PathBuf::from) +} + +pub(crate) fn is_windows_file_lock_error(error: &std::io::Error) -> bool { + cfg!(windows) && matches!(error.raw_os_error(), Some(32 | 33)) +} + +pub(crate) fn reset_local_data_remove_error(path: &Path, error: &std::io::Error) -> String { + if is_windows_file_lock_error(error) { + tracing::warn!( + path = %path.display(), + error = %error, + "[config] reset_local_data: Windows file lock blocked local data deletion" + ); + return format!( + "Failed to remove {} because it is locked by another OpenHuman window or process. Close all OpenHuman windows and try again. ({error})", + path.display() + ); + } + + format!("Failed to remove {}: {error}", path.display()) +} + +pub(crate) fn reset_local_data_marker_remove_error(path: &Path, error: &std::io::Error) -> String { + if is_windows_file_lock_error(error) { + tracing::warn!( + marker = %path.display(), + error = %error, + "[config] reset_local_data: Windows file lock blocked active workspace marker deletion" + ); + return format!( + "Failed to remove active workspace marker {} because it is locked by another OpenHuman window or process. Close all OpenHuman windows and try again. ({error})", + path.display() + ); + } + + format!("Failed to remove active workspace marker: {error}") +} + +/// Internal helper to reset local data by removing specific directories and markers. +pub(crate) async fn reset_local_data_for_paths( + current_openhuman_dir: &Path, + default_openhuman_dir: &Path, +) -> Result, String> { + let active_workspace_marker = active_workspace_marker_path(default_openhuman_dir); + tracing::debug!( + current_dir = %current_openhuman_dir.display(), + default_dir = %default_openhuman_dir.display(), + marker = %active_workspace_marker.display(), + "[config] reset_local_data: starting" + ); + + let mut removed_paths = Vec::new(); + + if active_workspace_marker.exists() { + if let Err(error) = tokio::fs::remove_file(&active_workspace_marker).await { + return Err(reset_local_data_marker_remove_error( + &active_workspace_marker, + &error, + )); + } + tracing::debug!( + marker = %active_workspace_marker.display(), + "[config] reset_local_data: removed active workspace marker" + ); + removed_paths.push(active_workspace_marker.display().to_string()); + } + + for target_dir in [current_openhuman_dir, default_openhuman_dir] { + if !target_dir.exists() { + tracing::debug!( + dir = %target_dir.display(), + "[config] reset_local_data: directory already absent" + ); + continue; + } + + if let Err(error) = tokio::fs::remove_dir_all(target_dir).await { + return Err(reset_local_data_remove_error(target_dir, &error)); + } + tracing::debug!( + dir = %target_dir.display(), + "[config] reset_local_data: removed directory" + ); + removed_paths.push(target_dir.display().to_string()); + } + + Ok(RpcOutcome::new( + json!({ + "removed_paths": removed_paths, + "current_openhuman_dir": current_openhuman_dir.display().to_string(), + "default_openhuman_dir": default_openhuman_dir.display().to_string(), + }), + vec![ + format!( + "reset local data for active config dir {}", + current_openhuman_dir.display() + ), + format!( + "removed default data dir {} if present", + default_openhuman_dir.display() + ), + ], + )) +} + +/// Serializes the current configuration into a JSON snapshot for the UI. +pub fn snapshot_config_json(config: &Config) -> Result { + let value = serde_json::to_value(config).map_err(|e| e.to_string())?; + Ok(json!({ + "config": value, + "workspace_dir": config.workspace_dir.display().to_string(), + "config_path": config.config_path.display().to_string(), + })) +} + +/// Serializes the client-facing AI config slice consumed by the settings UI. +pub fn client_config_json(config: &Config) -> serde_json::Value { + let app_version = + std::env::var("OPENHUMAN_APP_VERSION").unwrap_or_else(|_| "unknown".to_string()); + let api_key_set = config + .api_key + .as_deref() + .map(|k| !k.trim().is_empty()) + .unwrap_or(false); + let model_routes: Vec = config + .model_routes + .iter() + .map(|r| serde_json::json!({ "hint": r.hint, "model": r.model })) + .collect(); + let cloud_providers: Vec = config + .cloud_providers + .iter() + .map(|c| { + serde_json::json!({ + "id": c.id, + "slug": c.slug, + "label": c.label, + "endpoint": c.endpoint, + "auth_style": c.auth_style.as_str(), + }) + }) + .collect(); + + serde_json::json!({ + "api_url": config.api_url, + "inference_url": config.inference_url, + "default_model": config.default_model, + "app_version": app_version, + "api_key_set": api_key_set, + "model_routes": model_routes, + "cloud_providers": cloud_providers, + "primary_cloud": config.primary_cloud, + "chat_provider": config.chat_provider, + "reasoning_provider": config.reasoning_provider, + "agentic_provider": config.agentic_provider, + "coding_provider": config.coding_provider, + "memory_provider": config.memory_provider, + "embeddings_provider": config.embeddings_provider, + "heartbeat_provider": config.heartbeat_provider, + "learning_provider": config.learning_provider, + "subconscious_provider": config.subconscious_provider, + "voice_providers": config.voice_providers.iter().map(|v| { + serde_json::json!({ + "id": v.id, + "slug": v.slug, + "label": v.label, + "endpoint": v.endpoint, + "auth_style": v.auth_style.as_str(), + "capability": v.capability.as_str(), + "stt_api_style": v.stt_api_style, + "tts_api_style": v.tts_api_style, + "default_stt_model": v.default_stt_model, + "default_tts_voice": v.default_tts_voice, + }) + }).collect::>(), + "stt_provider": config.stt_provider, + "tts_provider": config.tts_provider, + }) +} + +/// Loads config and returns the client-facing AI config slice. +pub async fn load_and_get_client_config_snapshot() -> Result, String> +{ + let config = load_config_with_timeout().await?; + let snapshot = client_config_json(&config); + Ok(RpcOutcome::new( + snapshot, + vec!["client config read".to_string()], + )) +} + +/// Returns a full configuration snapshot for the UI. +pub async fn get_config_snapshot(config: &Config) -> Result, String> { + let snapshot = snapshot_config_json(config)?; + Ok(RpcOutcome::new( + snapshot, + vec![format!( + "config loaded from {}", + config.config_path.display() + )], + )) +} + +/// Loads the configuration from disk and returns a snapshot. +pub async fn load_and_get_config_snapshot() -> Result, String> { + let config = load_config_with_timeout().await?; + get_config_snapshot(&config).await +} + +#[derive(Debug, Clone, Serialize)] +pub struct RuntimeFlagsOut { + pub browser_allow_all: bool, + pub log_prompts: bool, +} + +pub(crate) const BROWSER_ALLOW_ALL_ENV: &str = "OPENHUMAN_BROWSER_ALLOW_ALL"; +pub(crate) const BROWSER_ALLOW_ALL_RPC_ENABLE_ENV: &str = "OPENHUMAN_BROWSER_ALLOW_ALL_RPC_ENABLE"; + +/// Returns the current state of runtime-only flags. +pub fn get_runtime_flags() -> RpcOutcome { + RpcOutcome::single_log(runtime_flags(), "runtime flags read") +} + +pub(crate) fn runtime_flags() -> RuntimeFlagsOut { + RuntimeFlagsOut { + browser_allow_all: env_flag_enabled(BROWSER_ALLOW_ALL_ENV), + log_prompts: env_flag_enabled("OPENHUMAN_LOG_PROMPTS"), + } +} + +/// Updates the `OPENHUMAN_BROWSER_ALLOW_ALL` environment flag. +/// +/// **Security note:** when enabled, this disables the browser tool's +/// per-domain allowlist for the entire process. Both transitions are +/// audit-logged at WARN level with a `[SECURITY]` prefix so operators +/// (and `journalctl -g '\[SECURITY\]'` style scrapes) can spot +/// allowlist toggles in the live log stream. +/// +/// `is_private_host` checks still apply to the resolved IP, so this +/// flag does not unlock loopback / RFC1918 destinations. +pub fn set_browser_allow_all(enabled: bool) -> Result, String> { + if enabled && !env_flag_enabled(BROWSER_ALLOW_ALL_RPC_ENABLE_ENV) { + tracing::warn!( + "[SECURITY] refused browser allow-all enable via RPC: \ + set {BROWSER_ALLOW_ALL_ENV}=1 at startup or explicitly set \ + {BROWSER_ALLOW_ALL_RPC_ENABLE_ENV}=1 before using the runtime toggle" + ); + return Err(format!( + "Refusing to enable {BROWSER_ALLOW_ALL_ENV} via RPC. Start OpenHuman with \ + {BROWSER_ALLOW_ALL_ENV}=1, or set {BROWSER_ALLOW_ALL_RPC_ENABLE_ENV}=1 for an \ + explicit operator-approved runtime override." + )); + } + + let was_enabled = env_flag_enabled(BROWSER_ALLOW_ALL_ENV); + if enabled { + unsafe { + std::env::set_var(BROWSER_ALLOW_ALL_ENV, "1"); + } + } else { + unsafe { + std::env::remove_var(BROWSER_ALLOW_ALL_ENV); + } + } + let flags = runtime_flags(); + let now_enabled = flags.browser_allow_all; + + if was_enabled != now_enabled { + if now_enabled { + tracing::warn!( + "[SECURITY] browser allow-all enabled via RPC: \ + per-domain allowlist is now bypassed for all sessions \ + (private-host check still applies)" + ); + } else { + tracing::info!( + "[SECURITY] browser allow-all disabled via RPC: \ + per-domain allowlist re-enforced" + ); + } + } + + let log_msg = if now_enabled { + "[SECURITY] browser allow-all flag set to enabled" + } else { + "[SECURITY] browser allow-all flag set to disabled" + }; + Ok(RpcOutcome::single_log(flags, log_msg)) +} + +/// Returns the operational status of the agent server. +pub fn agent_server_status() -> RpcOutcome { + let running = crate::openhuman::service::mock::mock_agent_running().unwrap_or(true); + log::info!("[config] agent_server_status requested: running={running}"); + let payload = json!({ + "running": running, + "url": core_rpc_url_from_env(), + }); + RpcOutcome::single_log(payload, "agent server status checked") +} + +/// Reads dashboard settings exposed to the desktop UI. +pub async fn get_dashboard_settings() -> Result, String> { + let request_id = uuid::Uuid::new_v4().to_string(); + tracing::debug!( + target: "openhuman_core::config", + request_id = %request_id, + method = "openhuman.config_get_dashboard_settings", + "OPENHUMAN: get_dashboard_settings entry" + ); + tracing::debug!( + target: "openhuman_core::config", + request_id = %request_id, + method = "openhuman.config_get_dashboard_settings", + "OPENHUMAN: get_dashboard_settings loading config" + ); + + let config = load_config_with_timeout().await.map_err(|error| { + tracing::warn!( + target: "openhuman_core::config", + request_id = %request_id, + method = "openhuman.config_get_dashboard_settings", + error = %error, + "OPENHUMAN: get_dashboard_settings config load failed" + ); + error + })?; + + tracing::debug!( + target: "openhuman_core::config", + request_id = %request_id, + method = "openhuman.config_get_dashboard_settings", + "OPENHUMAN: get_dashboard_settings serializing dashboard settings" + ); + let result = serde_json::to_value(&config.dashboard).map_err(|error| { + let message = error.to_string(); + tracing::warn!( + target: "openhuman_core::config", + request_id = %request_id, + method = "openhuman.config_get_dashboard_settings", + error = %message, + "OPENHUMAN: get_dashboard_settings serialization failed" + ); + message + })?; + + tracing::debug!( + target: "openhuman_core::config", + request_id = %request_id, + method = "openhuman.config_get_dashboard_settings", + "OPENHUMAN: get_dashboard_settings exit" + ); + Ok(RpcOutcome::new( + result, + vec!["dashboard settings read".to_string()], + )) +} + +/// Deletes all local data directories and workspace markers. +/// +/// Runs **inside the core's tokio task**, which means the running core +/// holds open handles to SQLite databases, log files, the Sentry session +/// store, etc. On Windows, `remove_dir_all` therefore fails with +/// `ERROR_SHARING_VIOLATION` (os error 32) — see OPENHUMAN-TAURI-AF. +/// +/// GUI callers must use the Tauri-side `reset_local_data` command instead: +/// it stops the embedded core via `CoreProcessHandle::shutdown` (dropping +/// the file handles), removes the directories from the Tauri host process, +/// and restarts the core. This JSON-RPC method is kept for headless / CLI +/// callers where in-process removal is acceptable (POSIX file semantics +/// tolerate unlinking open files; on Windows the CLI invocation runs +/// without the core attached, so no handle is in the way). +pub async fn reset_local_data() -> Result, String> { + let config = load_config_with_timeout().await?; + let current_openhuman_dir = config_openhuman_dir(&config); + let default_openhuman_dir = default_openhuman_dir(); + reset_local_data_for_paths(¤t_openhuman_dir, &default_openhuman_dir).await +} + +/// Reports the resolved paths that `reset_local_data` would remove, without +/// performing any filesystem changes. +/// +/// Lets the Tauri-side `reset_local_data` command discover the active +/// workspace dir, the default `~/.openhuman` dir (which can differ when +/// `OPENHUMAN_WORKSPACE` is set or a staging build is in use), and the +/// active workspace marker file **before** the core sidecar is shut down — +/// after which the Tauri shell removes them while no process holds open +/// handles. See OPENHUMAN-TAURI-AF for the Windows file-locking failure +/// that motivated the split. +pub async fn get_data_paths() -> Result, String> { + let config = load_config_with_timeout().await?; + let current_openhuman_dir = config_openhuman_dir(&config); + let default_openhuman_dir = default_openhuman_dir(); + let active_workspace_marker = active_workspace_marker_path(&default_openhuman_dir); + Ok(RpcOutcome::new( + json!({ + "current_openhuman_dir": current_openhuman_dir.display().to_string(), + "default_openhuman_dir": default_openhuman_dir.display().to_string(), + "active_workspace_marker_path": active_workspace_marker.display().to_string(), + }), + vec![format!( + "data paths resolved (current={}, default={})", + current_openhuman_dir.display(), + default_openhuman_dir.display() + )], + )) +} diff --git a/src/openhuman/config/ops/mod.rs b/src/openhuman/config/ops/mod.rs new file mode 100644 index 0000000000..14513070c6 --- /dev/null +++ b/src/openhuman/config/ops/mod.rs @@ -0,0 +1,71 @@ +//! JSON-RPC / CLI controller surface for persisted config and runtime flags. + +mod agent; +mod loader; +mod model; +mod sandbox; +mod ui; + +// ── Public re-exports (preserving the flat external API) ───────────────────── + +pub use agent::redact_home; +pub use agent::{ + add_auto_approve_tool, apply_activity_level_settings, apply_agent_paths_settings, + apply_agent_settings, apply_autonomy_settings, apply_memory_sync_settings, ensure_agent_dirs, + ensure_usable_cwd, expand_tilde, get_activity_level_settings, get_agent_paths, + get_agent_settings, get_autonomy_settings, get_memory_sync_settings, + load_and_apply_activity_level_settings, load_and_apply_agent_paths_settings, + load_and_apply_agent_settings, load_and_apply_autonomy_settings, + load_and_apply_memory_sync_settings, ActivityLevelSettingsPatch, AgentPathsPatch, + AgentSettingsPatch, AutonomySettingsPatch, MemorySyncSettingsPatch, +}; + +pub use loader::{ + agent_server_status, client_config_json, core_rpc_url_from_env, get_config_snapshot, + get_dashboard_settings, get_data_paths, get_runtime_flags, load_and_get_client_config_snapshot, + load_and_get_config_snapshot, load_config_with_timeout, reload_config_snapshot_with_timeout, + reset_local_data, set_browser_allow_all, snapshot_config_json, RuntimeFlagsOut, +}; +// expose internal helpers needed by tests (ops_tests.rs uses super::*) +#[cfg(test)] +pub(crate) use crate::openhuman::config::Config; +#[cfg(test)] +pub(crate) use loader::{ + active_workspace_marker_path, config_openhuman_dir, default_openhuman_dir, env_flag_enabled, + fallback_workspace_dir, reset_local_data_for_paths, reset_local_data_remove_error, + BROWSER_ALLOW_ALL_ENV, BROWSER_ALLOW_ALL_RPC_ENABLE_ENV, +}; +#[cfg(test)] +pub(crate) use std::path::PathBuf; + +pub use model::{ + apply_composio_trigger_settings, apply_local_ai_settings, apply_memory_settings, + apply_model_settings, apply_runtime_settings, get_composio_trigger_settings, + load_and_apply_composio_trigger_settings, load_and_apply_local_ai_settings, + load_and_apply_memory_settings, load_and_apply_model_settings, load_and_apply_runtime_settings, + load_and_resolve_api_url, ComposioTriggerSettingsPatch, LocalAiSettingsPatch, + MemorySettingsPatch, ModelSettingsPatch, RuntimeSettingsPatch, +}; + +pub use sandbox::{ + apply_sandbox_settings, get_sandbox_settings, load_and_apply_sandbox_settings, + SandboxSettingsPatch, +}; + +pub use ui::{ + apply_analytics_settings, apply_browser_settings, apply_meet_settings, + apply_screen_intelligence_settings, apply_search_settings, get_dictation_settings, + get_onboarding_completed, get_search_settings, get_voice_server_settings, + load_and_apply_analytics_settings, load_and_apply_browser_settings, + load_and_apply_dictation_settings, load_and_apply_meet_settings, + load_and_apply_screen_intelligence_settings, load_and_apply_search_settings, + load_and_apply_voice_server_settings, set_onboarding_completed, + workspace_onboarding_flag_exists, workspace_onboarding_flag_resolve, + workspace_onboarding_flag_set, AnalyticsSettingsPatch, BrowserSettingsPatch, + DictationSettingsPatch, MeetSettingsPatch, ScreenIntelligenceSettingsPatch, + SearchSettingsPatch, VoiceServerSettingsPatch, +}; + +#[cfg(test)] +#[path = "../ops_tests.rs"] +mod tests; diff --git a/src/openhuman/config/ops/model.rs b/src/openhuman/config/ops/model.rs new file mode 100644 index 0000000000..d1b0db1e14 --- /dev/null +++ b/src/openhuman/config/ops/model.rs @@ -0,0 +1,456 @@ +//! Model/provider config operations: AI providers, memory, runtime, local AI, Composio. + +use crate::openhuman::config::Config; +use crate::rpc::RpcOutcome; + +use super::loader::{load_config_with_timeout, snapshot_config_json}; + +#[derive(Debug, Clone, Default)] +pub struct ModelSettingsPatch { + pub api_url: Option, + /// Custom OpenAI-compatible LLM endpoint. Empty string clears the + /// override (inference falls back through the OpenHuman backend). + pub inference_url: Option, + pub api_key: Option, + pub default_model: Option, + pub default_temperature: Option, + /// When `Some`, REPLACES the entire `config.model_routes` array with the + /// supplied (hint, model) pairs. Pass `Some(vec![])` to clear all routes + /// (e.g. when switching back to the OpenHuman backend whose built-in + /// router picks per-task models on its own). Leave `None` to keep the + /// current routes untouched. + pub model_routes: Option>, + /// When `Some`, REPLACES the entire `config.cloud_providers` array with + /// the supplied entries (each lacking the API key — those live in + /// `auth-profiles.json` via [`crate::openhuman::credentials::AuthService`]). + /// Pass `Some(vec![])` to clear all third-party cloud providers. + pub cloud_providers: + Option>, + /// Id of the `cloud_providers` entry used when a workload routes to + /// `"cloud"`. Empty string clears (factory falls back to OpenHuman). + pub primary_cloud: Option, + pub chat_provider: Option, + pub reasoning_provider: Option, + pub agentic_provider: Option, + pub coding_provider: Option, + pub memory_provider: Option, + pub embeddings_provider: Option, + pub heartbeat_provider: Option, + pub learning_provider: Option, + pub subconscious_provider: Option, +} + +#[derive(Debug, Clone, Default)] +pub struct MemorySettingsPatch { + pub backend: Option, + pub auto_save: Option, + pub embedding_provider: Option, + pub embedding_model: Option, + pub embedding_dimensions: Option, + /// Stepped user-facing memory-context window preset (see + /// [`crate::openhuman::config::schema::agent::MemoryContextWindow`]). + /// Accepts `"minimal" | "balanced" | "extended" | "maximum"`. + /// Unknown values are silently ignored so old clients can keep + /// posting partial patches. + pub memory_window: Option, +} + +#[derive(Debug, Clone, Default)] +pub struct RuntimeSettingsPatch { + pub kind: Option, + pub reasoning_enabled: Option, +} + +#[derive(Debug, Clone, Default)] +pub struct LocalAiSettingsPatch { + pub runtime_enabled: Option, + /// MVP opt-in marker. Bootstrap hard-overrides status to "disabled" + /// when this is `false`, regardless of `runtime_enabled`. The unified + /// AI panel ties the two together (both flip on enable, both flip + /// off on disable) so a single toggle gives the user the obvious + /// behaviour without needing to apply a preset first. + pub opt_in_confirmed: Option, + pub provider: Option, + pub base_url: Option>, + pub model_id: Option, + pub chat_model_id: Option, + pub usage_embeddings: Option, + pub usage_heartbeat: Option, + pub usage_learning_reflection: Option, + pub usage_subconscious: Option, +} + +#[derive(Debug, Clone, Default)] +pub struct ComposioTriggerSettingsPatch { + /// When `Some(true)`, disables triage for all toolkits. + pub triage_disabled: Option, + /// When `Some(v)`, replaces the per-toolkit opt-out list entirely. + pub triage_disabled_toolkits: Option>, +} + +/// Updates the model-related settings in the configuration. +pub async fn apply_model_settings( + config: &mut Config, + update: ModelSettingsPatch, +) -> Result, String> { + if let Some(api_url) = update.api_url { + config.api_url = if api_url.trim().is_empty() { + None + } else { + Some(api_url) + }; + } + if let Some(inference_url) = update.inference_url { + config.inference_url = if inference_url.trim().is_empty() { + None + } else { + Some(inference_url.trim().to_string()) + }; + } + if let Some(api_key) = update.api_key { + let trimmed_key = api_key.trim(); + config.api_key = if trimmed_key.is_empty() { + None + } else { + Some(trimmed_key.to_string()) + }; + } + if let Some(model) = update.default_model { + let trimmed = model.trim(); + config.default_model = if trimmed.is_empty() { + None + } else { + Some(trimmed.to_string()) + }; + if let Some(ref m) = config.default_model { + if !crate::openhuman::inference::provider::factory::is_known_openhuman_tier(m) { + log::warn!( + "[config][model-settings] default_model '{}' is not a recognized \ + OpenHuman backend tier — it will be replaced with the platform \ + default at inference time.", + m + ); + } + } + } + if let Some(temp) = update.default_temperature { + config.default_temperature = temp; + } + if let Some(routes) = update.model_routes { + config.model_routes = routes; + } + if let Some(providers) = update.cloud_providers { + use crate::openhuman::config::schema::cloud_providers::is_slug_reserved; + let preserved: Vec<_> = config + .cloud_providers + .iter() + .filter(|e| is_slug_reserved(e.slug.trim())) + .cloned() + .collect(); + log::debug!( + "[config] apply_model_settings: preserving {} reserved cloud provider(s) before overwrite", + preserved.len() + ); + config.cloud_providers = providers; + let before_reinject = config.cloud_providers.len(); + for entry in preserved { + let preserved_slug = entry.slug.trim(); + if !config + .cloud_providers + .iter() + .any(|e| e.slug.trim() == preserved_slug) + { + config.cloud_providers.push(entry); + } + } + log::debug!( + "[config] apply_model_settings: reinjected {} reserved cloud provider(s)", + config.cloud_providers.len() - before_reinject + ); + } + if let Some(primary) = update.primary_cloud { + let trimmed = primary.trim(); + config.primary_cloud = if trimmed.is_empty() { + None + } else { + Some(trimmed.to_string()) + }; + } + + let normalise_provider = |s: String| -> Option { + let t = s.trim(); + if t.is_empty() { + None + } else { + Some(t.to_string()) + } + }; + if let Some(s) = update.chat_provider { + config.chat_provider = normalise_provider(s); + } + if let Some(s) = update.reasoning_provider { + config.reasoning_provider = normalise_provider(s); + } + if let Some(s) = update.agentic_provider { + config.agentic_provider = normalise_provider(s); + } + if let Some(s) = update.coding_provider { + config.coding_provider = normalise_provider(s); + } + if let Some(s) = update.memory_provider { + config.memory_provider = normalise_provider(s); + } + if let Some(s) = update.embeddings_provider { + config.embeddings_provider = normalise_provider(s); + } + if let Some(s) = update.heartbeat_provider { + config.heartbeat_provider = normalise_provider(s); + } + if let Some(s) = update.learning_provider { + config.learning_provider = normalise_provider(s); + } + if let Some(s) = update.subconscious_provider { + config.subconscious_provider = normalise_provider(s); + } + + config.save().await.map_err(|e| e.to_string())?; + // #1574 §4: the AIPanel workload matrix changes the embedder via THIS + // (model-settings) path — `embeddings_provider` above — not the + // memory-settings path. Trigger the same idempotent re-embed backfill + // so a UI embedder switch recovers prior memory under the new + // signature. Coverage-gated + non-fatal: if the active signature did + // not actually change, this enqueues nothing. + crate::openhuman::memory_queue::ensure_reembed_backfill(config); + let snapshot = snapshot_config_json(config)?; + Ok(RpcOutcome::new( + snapshot, + vec![format!( + "model settings saved to {}", + config.config_path.display() + )], + )) +} + +/// Loads the configuration, applies model settings updates, and saves it. +pub async fn load_and_apply_model_settings( + update: ModelSettingsPatch, +) -> Result, String> { + let mut config = load_config_with_timeout().await?; + apply_model_settings(&mut config, update).await +} + +/// Updates the memory-related settings in the configuration. +pub async fn apply_memory_settings( + config: &mut Config, + update: MemorySettingsPatch, +) -> Result, String> { + if let Some(backend) = update.backend { + config.memory.backend = backend; + } + if let Some(auto_save) = update.auto_save { + config.memory.auto_save = auto_save; + } + if let Some(provider) = update.embedding_provider { + config.memory.embedding_provider = provider; + } + if let Some(model) = update.embedding_model { + config.memory.embedding_model = model; + } + if let Some(dimensions) = update.embedding_dimensions { + config.memory.embedding_dimensions = dimensions; + } + if let Some(window_label) = update.memory_window.as_deref() { + if let Some(window) = + crate::openhuman::config::schema::MemoryContextWindow::from_str_opt(window_label) + { + config.agent.memory_window = Some(window); + } else { + tracing::warn!( + requested = window_label, + "[config] unknown memory_window preset — leaving existing setting unchanged" + ); + } + } + config.save().await.map_err(|e| e.to_string())?; + // #1574 §4: the embedder may have just changed (provider/model/dims). + // Ensure a re-embed backfill chain exists for the new active signature + // so prior memory becomes retrievable again instead of silently going + // dark. Idempotent + non-fatal (covered space enqueues nothing; errors + // are logged, never fail the settings save). §7's migration is + // one-shot so it does not cover a later switch — this does. + crate::openhuman::memory_queue::ensure_reembed_backfill(config); + let snapshot = snapshot_config_json(config)?; + Ok(RpcOutcome::new( + snapshot, + vec![format!( + "memory settings saved to {}", + config.config_path.display() + )], + )) +} + +/// Loads the configuration, applies memory settings updates, and saves it. +pub async fn load_and_apply_memory_settings( + update: MemorySettingsPatch, +) -> Result, String> { + let mut config = load_config_with_timeout().await?; + apply_memory_settings(&mut config, update).await +} + +/// Updates the runtime-related settings in the configuration. +pub async fn apply_runtime_settings( + config: &mut Config, + update: RuntimeSettingsPatch, +) -> Result, String> { + if let Some(kind) = update.kind { + config.runtime.kind = kind; + } + if let Some(reasoning_enabled) = update.reasoning_enabled { + config.runtime.reasoning_enabled = Some(reasoning_enabled); + } + config.save().await.map_err(|e| e.to_string())?; + let snapshot = snapshot_config_json(config)?; + Ok(RpcOutcome::new( + snapshot, + vec![format!( + "runtime settings saved to {}", + config.config_path.display() + )], + )) +} + +/// Loads the configuration, applies runtime settings updates, and saves it. +pub async fn load_and_apply_runtime_settings( + update: RuntimeSettingsPatch, +) -> Result, String> { + let mut config = load_config_with_timeout().await?; + apply_runtime_settings(&mut config, update).await +} + +/// Updates the local-AI runtime + per-feature usage flags in the configuration. +pub async fn apply_local_ai_settings( + config: &mut Config, + update: LocalAiSettingsPatch, +) -> Result, String> { + if let Some(v) = update.runtime_enabled { + config.local_ai.runtime_enabled = v; + } + if let Some(v) = update.opt_in_confirmed { + config.local_ai.opt_in_confirmed = v; + } + if let Some(provider) = update.provider { + config.local_ai.provider = + crate::openhuman::inference::local::provider::normalize_provider(&provider); + } + if let Some(base_url) = update.base_url { + config.local_ai.base_url = match base_url { + None => None, + Some(base_url) if base_url.trim().is_empty() => None, + Some(base_url) + if crate::openhuman::inference::local::provider::provider_from_config(config) + == crate::openhuman::inference::local::provider::LocalAiProvider::Ollama => + { + Some(crate::openhuman::inference::local::validate_ollama_url( + &base_url, + )?) + } + Some(base_url) => Some(base_url.trim().trim_end_matches('/').to_string()), + }; + } + if let Some(model_id) = update.model_id { + config.local_ai.model_id = model_id.trim().to_string(); + } + if let Some(chat_model_id) = update.chat_model_id { + config.local_ai.chat_model_id = chat_model_id.trim().to_string(); + } + if let Some(v) = update.usage_embeddings { + config.local_ai.usage.embeddings = v; + } + if let Some(v) = update.usage_heartbeat { + config.local_ai.usage.heartbeat = v; + } + if let Some(v) = update.usage_learning_reflection { + config.local_ai.usage.learning_reflection = v; + } + if let Some(v) = update.usage_subconscious { + config.local_ai.usage.subconscious = v; + } + config.save().await.map_err(|e| e.to_string())?; + let snapshot = snapshot_config_json(config)?; + Ok(RpcOutcome::new( + snapshot, + vec![format!( + "local AI settings saved to {}", + config.config_path.display() + )], + )) +} + +/// Loads the configuration, applies local-AI settings updates, and saves it. +pub async fn load_and_apply_local_ai_settings( + update: LocalAiSettingsPatch, +) -> Result, String> { + let mut config = load_config_with_timeout().await?; + apply_local_ai_settings(&mut config, update).await +} + +/// Updates the Composio trigger-triage settings in the configuration. +pub async fn apply_composio_trigger_settings( + config: &mut Config, + update: ComposioTriggerSettingsPatch, +) -> Result, String> { + if let Some(v) = update.triage_disabled { + config.composio.triage_disabled = v; + tracing::debug!( + triage_disabled = v, + "[config][composio] triage_disabled updated" + ); + } + if let Some(toolkits) = update.triage_disabled_toolkits { + tracing::debug!( + count = toolkits.len(), + "[config][composio] triage_disabled_toolkits updated" + ); + config.composio.triage_disabled_toolkits = toolkits; + } + config.save().await.map_err(|e| e.to_string())?; + let snapshot = snapshot_config_json(config)?; + Ok(RpcOutcome::new( + snapshot, + vec![format!( + "composio trigger settings saved to {}", + config.config_path.display() + )], + )) +} + +/// Loads the configuration, applies composio trigger settings, and saves it. +pub async fn load_and_apply_composio_trigger_settings( + update: ComposioTriggerSettingsPatch, +) -> Result, String> { + let mut config = load_config_with_timeout().await?; + apply_composio_trigger_settings(&mut config, update).await +} + +/// Reads the current composio trigger-triage settings. +pub async fn get_composio_trigger_settings() -> Result, String> { + let config = load_config_with_timeout().await?; + let result = serde_json::json!({ + "triage_disabled": config.composio.triage_disabled, + "triage_disabled_toolkits": config.composio.triage_disabled_toolkits, + }); + Ok(RpcOutcome::new( + result, + vec!["composio trigger settings read".to_string()], + )) +} + +/// Resolves the effective API URL from configuration or defaults. +pub async fn load_and_resolve_api_url() -> Result, String> { + let config = load_config_with_timeout().await?; + let resolved = crate::api::config::effective_api_url(&config.api_url); + Ok(RpcOutcome::new( + serde_json::json!({ "api_url": resolved }), + Vec::new(), + )) +} diff --git a/src/openhuman/config/ops/sandbox.rs b/src/openhuman/config/ops/sandbox.rs new file mode 100644 index 0000000000..18b9ca6351 --- /dev/null +++ b/src/openhuman/config/ops/sandbox.rs @@ -0,0 +1,170 @@ +//! Sandbox / Docker runtime config operations. + +use crate::openhuman::config::Config; +use crate::rpc::RpcOutcome; + +use super::loader::{load_config_with_timeout, snapshot_config_json}; + +/// Partial update for the `[security.sandbox]` + `[runtime.docker]` blocks. +#[derive(Debug, Clone, Default)] +pub struct SandboxSettingsPatch { + pub backend: Option, + pub enabled: Option, + pub docker_image: Option, + pub docker_memory_limit_mb: Option, + pub docker_cpu_limit: Option, + pub env_passthrough: Option>, +} + +pub async fn get_sandbox_settings() -> Result, String> { + let config = load_config_with_timeout().await?; + let sandbox = &config.sandbox; + let docker = &config.runtime.docker; + + let docker_available = is_docker_available().await; + + let backend_str = match sandbox.backend { + crate::openhuman::config::SandboxBackend::Auto => "auto", + crate::openhuman::config::SandboxBackend::Landlock => "landlock", + crate::openhuman::config::SandboxBackend::Firejail => "firejail", + crate::openhuman::config::SandboxBackend::Bubblewrap => "bubblewrap", + crate::openhuman::config::SandboxBackend::Docker => "docker", + crate::openhuman::config::SandboxBackend::None => "none", + }; + + let detected_backend = detect_os_sandbox_backend(); + + let value = serde_json::json!({ + "enabled": sandbox.enabled.unwrap_or(true), + "backend": backend_str, + "docker_image": docker.image, + "docker_memory_limit_mb": docker.memory_limit_mb, + "docker_cpu_limit": docker.cpu_limit, + "docker_available": docker_available, + "detected_backend": detected_backend, + "env_passthrough": crate::openhuman::sandbox::ops::SANDBOX_ENV_PASSTHROUGH, + }); + log::debug!("[config][sandbox] get_sandbox_settings: backend={backend_str}, docker_available={docker_available}"); + Ok(RpcOutcome::single_log(value, "sandbox settings read")) +} + +pub async fn apply_sandbox_settings( + config: &mut Config, + update: SandboxSettingsPatch, +) -> Result, String> { + if let Some(ref backend) = update.backend { + config.sandbox.backend = match backend.as_str() { + "auto" => crate::openhuman::config::SandboxBackend::Auto, + "landlock" => crate::openhuman::config::SandboxBackend::Landlock, + "firejail" => crate::openhuman::config::SandboxBackend::Firejail, + "bubblewrap" => crate::openhuman::config::SandboxBackend::Bubblewrap, + "docker" => crate::openhuman::config::SandboxBackend::Docker, + "none" => crate::openhuman::config::SandboxBackend::None, + other => { + log::warn!("[config][sandbox] rejected unknown backend: {other}"); + return Err(format!( + "unknown sandbox backend '{other}'; valid: auto, landlock, firejail, bubblewrap, docker, none" + )); + } + }; + } + if let Some(enabled) = update.enabled { + config.sandbox.enabled = Some(enabled); + } + if let Some(ref image) = update.docker_image { + let trimmed = image.trim(); + if trimmed.is_empty() { + return Err("docker_image must not be blank".into()); + } + config.runtime.docker.image = trimmed.to_string(); + } + if let Some(memory) = update.docker_memory_limit_mb { + config.runtime.docker.memory_limit_mb = Some(memory); + } + if let Some(cpu) = update.docker_cpu_limit { + if cpu <= 0.0 { + return Err("docker_cpu_limit must be positive".into()); + } + config.runtime.docker.cpu_limit = Some(cpu); + } + if let Some(ref passthrough) = update.env_passthrough { + log::debug!( + "[config][sandbox] env_passthrough update: {} vars", + passthrough.len() + ); + } + + config.save().await.map_err(|e| e.to_string())?; + + log::debug!( + "[config][sandbox] sandbox settings saved to {}", + config.config_path.display() + ); + let snapshot = snapshot_config_json(config)?; + Ok(RpcOutcome::new( + snapshot, + vec![format!( + "sandbox settings saved to {}", + config.config_path.display() + )], + )) +} + +pub async fn load_and_apply_sandbox_settings( + update: SandboxSettingsPatch, +) -> Result, String> { + let mut config = load_config_with_timeout().await?; + apply_sandbox_settings(&mut config, update).await +} + +async fn is_docker_available() -> bool { + let fut = tokio::process::Command::new("docker") + .arg("info") + .stdout(std::process::Stdio::null()) + .stderr(std::process::Stdio::null()) + .status(); + match tokio::time::timeout(std::time::Duration::from_secs(5), fut).await { + Ok(Ok(status)) => status.success(), + _ => false, + } +} + +fn detect_os_sandbox_backend() -> &'static str { + #[cfg(target_os = "linux")] + { + if std::path::Path::new("/sys/kernel/security/landlock").exists() { + return "landlock"; + } + if std::process::Command::new("firejail") + .arg("--version") + .stdout(std::process::Stdio::null()) + .stderr(std::process::Stdio::null()) + .status() + .is_ok() + { + return "firejail"; + } + if std::process::Command::new("bwrap") + .arg("--version") + .stdout(std::process::Stdio::null()) + .stderr(std::process::Stdio::null()) + .status() + .is_ok() + { + return "bubblewrap"; + } + "none" + } + #[cfg(target_os = "macos")] + { + "seatbelt" + } + #[cfg(target_os = "windows")] + { + "appcontainer" + } + #[cfg(not(any(target_os = "linux", target_os = "macos", target_os = "windows")))] + { + "none" + } +} diff --git a/src/openhuman/config/ops/ui.rs b/src/openhuman/config/ops/ui.rs new file mode 100644 index 0000000000..52898c3d4c --- /dev/null +++ b/src/openhuman/config/ops/ui.rs @@ -0,0 +1,628 @@ +//! UI-facing config operations: browser, screen intelligence, analytics, meet, +//! search, dictation, voice server, onboarding flags. + +use serde_json::json; + +use crate::openhuman::config::Config; +use crate::openhuman::screen_intelligence; +use crate::rpc::RpcOutcome; + +use super::loader::{fallback_workspace_dir, load_config_with_timeout, snapshot_config_json}; + +#[derive(Debug, Clone, Default)] +pub struct BrowserSettingsPatch { + pub enabled: Option, +} + +#[derive(Debug, Clone, Default)] +pub struct ScreenIntelligenceSettingsPatch { + pub enabled: Option, + pub capture_policy: Option, + pub policy_mode: Option, + pub baseline_fps: Option, + pub vision_enabled: Option, + pub autocomplete_enabled: Option, + pub use_vision_model: Option, + pub keep_screenshots: Option, + pub allowlist: Option>, + pub denylist: Option>, +} + +#[derive(Debug, Clone, Default)] +pub struct AnalyticsSettingsPatch { + pub enabled: Option, +} + +#[derive(Debug, Clone, Default)] +pub struct MeetSettingsPatch { + pub auto_orchestrator_handoff: Option, +} + +#[derive(Debug, Clone, Default)] +pub struct SearchSettingsPatch { + /// One of `disabled` | `managed` | `parallel` | `brave` | `querit`. + /// Empty/unknown values are rejected by `apply_search_settings`. + /// Runtime fallback to `managed` applies only to persisted/legacy config + /// values resolved by `SearchConfig::effective_engine()`. + pub engine: Option, + /// 1..=20. Clamped silently at apply time. + pub max_results: Option, + /// Per-request timeout in seconds (default 15). + pub timeout_secs: Option, + /// Parallel API key. An empty string clears the stored key. + pub parallel_api_key: Option, + /// Brave Search API key. An empty string clears the stored key. + pub brave_api_key: Option, + /// Querit API key. An empty string clears the stored key. + pub querit_api_key: Option, + /// Websites the assistant may open/read (`web_fetch` / `curl`), as a + /// host allowlist. Entries are exact hosts (`reuters.com`), which also + /// match their subdomains, or `"*"` for all public sites. Empty list + /// blocks all web access. Mirrors `[http_request].allowed_domains`. + pub allowed_domains: Option>, + /// Convenience toggle for the "Allow all sites" switch. `Some(true)` + /// sets the allowlist to `["*"]`; `Some(false)` drops the wildcard while + /// keeping any explicit hosts. Applied after `allowed_domains`. + pub allow_all: Option, +} + +/// Represents a partial update to dictation-related settings. +pub struct DictationSettingsPatch { + pub enabled: Option, + pub hotkey: Option, + pub activation_mode: Option, + pub llm_refinement: Option, + pub streaming: Option, + pub streaming_interval_ms: Option, +} + +/// Represents a partial update to voice server related settings. +pub struct VoiceServerSettingsPatch { + pub auto_start: Option, + pub hotkey: Option, + pub activation_mode: Option, + pub skip_cleanup: Option, + pub min_duration_secs: Option, + pub silence_threshold: Option, + pub custom_dictionary: Option>, + pub always_on_enabled: Option, + pub wake_word: Option, +} + +/// Updates the browser-related settings in the configuration. +pub async fn apply_browser_settings( + config: &mut Config, + update: BrowserSettingsPatch, +) -> Result, String> { + if let Some(enabled) = update.enabled { + config.browser.enabled = enabled; + } + config.save().await.map_err(|e| e.to_string())?; + let snapshot = snapshot_config_json(config)?; + Ok(RpcOutcome::new( + snapshot, + vec![format!( + "browser settings saved to {}", + config.config_path.display() + )], + )) +} + +/// Loads the configuration, applies browser settings updates, and saves it. +pub async fn load_and_apply_browser_settings( + update: BrowserSettingsPatch, +) -> Result, String> { + let mut config = load_config_with_timeout().await?; + apply_browser_settings(&mut config, update).await +} + +/// Updates the screen intelligence settings in the configuration. +pub async fn apply_screen_intelligence_settings( + config: &mut Config, + update: ScreenIntelligenceSettingsPatch, +) -> Result, String> { + if let Some(enabled) = update.enabled { + config.screen_intelligence.enabled = enabled; + } + if let Some(capture_policy) = update.capture_policy { + config.screen_intelligence.capture_policy = capture_policy; + } + if let Some(policy_mode) = update.policy_mode { + config.screen_intelligence.policy_mode = policy_mode; + } + if let Some(baseline_fps) = update.baseline_fps { + config.screen_intelligence.baseline_fps = baseline_fps.clamp(0.2, 30.0); + } + if let Some(vision_enabled) = update.vision_enabled { + config.screen_intelligence.vision_enabled = vision_enabled; + } + if let Some(autocomplete_enabled) = update.autocomplete_enabled { + config.screen_intelligence.autocomplete_enabled = autocomplete_enabled; + } + if let Some(use_vision_model) = update.use_vision_model { + config.screen_intelligence.use_vision_model = use_vision_model; + } + if let Some(keep_screenshots) = update.keep_screenshots { + config.screen_intelligence.keep_screenshots = keep_screenshots; + } + if let Some(allowlist) = update.allowlist { + config.screen_intelligence.allowlist = allowlist; + } + if let Some(denylist) = update.denylist { + config.screen_intelligence.denylist = denylist; + } + + config.save().await.map_err(|e| e.to_string())?; + let _ = screen_intelligence::global_engine() + .apply_config(config.screen_intelligence.clone()) + .await; + + let snapshot = snapshot_config_json(config)?; + Ok(RpcOutcome::new( + snapshot, + vec![format!( + "screen intelligence settings saved to {}", + config.config_path.display() + )], + )) +} + +/// Loads the configuration, applies screen intelligence settings updates, and saves it. +pub async fn load_and_apply_screen_intelligence_settings( + update: ScreenIntelligenceSettingsPatch, +) -> Result, String> { + let mut config = load_config_with_timeout().await?; + apply_screen_intelligence_settings(&mut config, update).await +} + +/// Updates the analytics-related settings in the configuration. +pub async fn apply_analytics_settings( + config: &mut Config, + update: AnalyticsSettingsPatch, +) -> Result, String> { + if let Some(enabled) = update.enabled { + config.observability.analytics_enabled = enabled; + } + config.save().await.map_err(|e| e.to_string())?; + let snapshot = snapshot_config_json(config)?; + Ok(RpcOutcome::new( + snapshot, + vec![format!( + "analytics settings saved to {}", + config.config_path.display() + )], + )) +} + +/// Loads the configuration, applies analytics settings updates, and saves it. +pub async fn load_and_apply_analytics_settings( + update: AnalyticsSettingsPatch, +) -> Result, String> { + let mut config = load_config_with_timeout().await?; + apply_analytics_settings(&mut config, update).await +} + +/// Updates the Google Meet integration settings in the configuration. +pub async fn apply_meet_settings( + config: &mut Config, + update: MeetSettingsPatch, +) -> Result, String> { + if let Some(enabled) = update.auto_orchestrator_handoff { + config.meet.auto_orchestrator_handoff = enabled; + } + config.save().await.map_err(|e| e.to_string())?; + let snapshot = snapshot_config_json(config)?; + Ok(RpcOutcome::new( + snapshot, + vec![format!( + "meet settings saved to {}", + config.config_path.display() + )], + )) +} + +/// Loads the configuration, applies meet settings updates, and saves it. +pub async fn load_and_apply_meet_settings( + update: MeetSettingsPatch, +) -> Result, String> { + let mut config = load_config_with_timeout().await?; + apply_meet_settings(&mut config, update).await +} + +/// Updates the search engine configuration. Empty API-key strings clear the +/// stored value rather than treat empty-string as "credential present". +pub async fn apply_search_settings( + config: &mut Config, + update: SearchSettingsPatch, +) -> Result, String> { + if let Some(engine) = update.engine { + let trimmed = engine.trim(); + match trimmed { + "disabled" | "managed" | "parallel" | "brave" | "querit" => { + config.search.engine = trimmed.to_string(); + } + other => { + return Err(format!( + "engine must be one of disabled/managed/parallel/brave/querit (got {other:?})" + )); + } + } + } + if let Some(n) = update.max_results { + if !(1..=20).contains(&n) { + return Err(format!("max_results must be between 1 and 20 (got {n})")); + } + config.search.max_results = n; + } + if let Some(secs) = update.timeout_secs { + if !(1..=120).contains(&secs) { + return Err(format!( + "timeout_secs must be between 1 and 120 (got {secs})" + )); + } + config.search.timeout_secs = secs; + } + if let Some(raw) = update.parallel_api_key { + let trimmed = raw.trim(); + config.search.parallel.api_key = if trimmed.is_empty() { + None + } else { + Some(trimmed.to_string()) + }; + } + if let Some(raw) = update.brave_api_key { + let trimmed = raw.trim(); + config.search.brave.api_key = if trimmed.is_empty() { + None + } else { + Some(trimmed.to_string()) + }; + } + if let Some(raw) = update.querit_api_key { + let trimmed = raw.trim(); + config.search.querit.api_key = if trimmed.is_empty() { + None + } else { + Some(trimmed.to_string()) + }; + } + let allowlist_touched = update.allowed_domains.is_some() || update.allow_all.is_some(); + let before_count = config.http_request.allowed_domains.len(); + let before_allow_all = config.http_request.allowed_domains.iter().any(|d| d == "*"); + if let Some(domains) = update.allowed_domains { + let mut cleaned: Vec = domains + .into_iter() + .map(|d| d.trim().to_string()) + .filter(|d| !d.is_empty()) + .collect(); + cleaned.sort(); + cleaned.dedup(); + config.http_request.allowed_domains = cleaned; + } + if let Some(allow_all) = update.allow_all { + if allow_all { + config.http_request.allowed_domains = vec!["*".to_string()]; + } else { + config.http_request.allowed_domains.retain(|d| d != "*"); + } + } + if allowlist_touched { + let after_count = config.http_request.allowed_domains.len(); + let after_allow_all = config.http_request.allowed_domains.iter().any(|d| d == "*"); + tracing::info!( + before_count, + after_count, + before_allow_all, + after_allow_all, + "[config] http_request.allowed_domains updated" + ); + } + config.save().await.map_err(|e| e.to_string())?; + let snapshot = snapshot_config_json(config)?; + Ok(RpcOutcome::new( + snapshot, + vec![format!( + "search settings saved to {}", + config.config_path.display() + )], + )) +} + +pub async fn load_and_apply_search_settings( + update: SearchSettingsPatch, +) -> Result, String> { + let mut config = load_config_with_timeout().await?; + apply_search_settings(&mut config, update).await +} + +/// Read the current search engine settings (with API keys redacted to a +/// presence boolean so the UI can show "configured" without ever rendering +/// the raw secret). +pub async fn get_search_settings() -> Result, String> { + let config = load_config_with_timeout().await?; + let result = serde_json::json!({ + "engine": config.search.requested_engine_str(), + "effective_engine": match config.search.effective_engine() { + crate::openhuman::config::SearchEngine::Disabled => "disabled", + crate::openhuman::config::SearchEngine::Managed => "managed", + crate::openhuman::config::SearchEngine::Parallel => "parallel", + crate::openhuman::config::SearchEngine::Brave => "brave", + crate::openhuman::config::SearchEngine::Querit => "querit", + }, + "max_results": config.search.max_results, + "timeout_secs": config.search.timeout_secs, + "parallel_configured": config.search.parallel.has_key(), + "brave_configured": config.search.brave.has_key(), + "querit_configured": config.search.querit.has_key(), + "allowed_domains": config.http_request.allowed_domains, + "allow_all": config.http_request.allowed_domains.iter().any(|d| d == "*"), + }); + Ok(RpcOutcome::new( + result, + vec!["search settings read".to_string()], + )) +} + +/// Resolves a workspace onboarding flag, creating or checking its existence. +pub async fn workspace_onboarding_flag_resolve( + flag_name: Option, + default_name: &str, +) -> Result, String> { + let name = flag_name.unwrap_or_else(|| default_name.to_string()); + let trimmed = name.trim(); + if trimmed.is_empty() + || trimmed.contains('/') + || trimmed.contains('\\') + || trimmed.contains("..") + { + return Err("Invalid onboarding flag name".to_string()); + } + let workspace_dir = match load_config_with_timeout().await { + Ok(cfg) => cfg.workspace_dir, + Err(_) => fallback_workspace_dir(), + }; + workspace_onboarding_flag_exists(workspace_dir, trimmed) +} + +/// Checks if a specific onboarding flag file exists in the workspace. +pub fn workspace_onboarding_flag_exists( + workspace_dir: std::path::PathBuf, + flag_name: &str, +) -> Result, String> { + let trimmed = flag_name.trim(); + if trimmed.is_empty() + || trimmed.contains('/') + || trimmed.contains('\\') + || trimmed.contains("..") + { + return Err("Invalid onboarding flag name".to_string()); + } + Ok(RpcOutcome::single_log( + workspace_dir.join(trimmed).is_file(), + "onboarding flag checked", + )) +} + +/// Creates or removes an onboarding flag file in the workspace. +pub async fn workspace_onboarding_flag_set( + flag_name: Option, + default_name: &str, + value: bool, +) -> Result, String> { + let name = flag_name.unwrap_or_else(|| default_name.to_string()); + let trimmed = name.trim(); + if trimmed.is_empty() + || trimmed.contains('/') + || trimmed.contains('\\') + || trimmed.contains("..") + { + return Err("Invalid onboarding flag name".to_string()); + } + let workspace_dir = match load_config_with_timeout().await { + Ok(cfg) => cfg.workspace_dir, + Err(_) => fallback_workspace_dir(), + }; + let flag_path = workspace_dir.join(trimmed); + if value { + if let Some(parent) = flag_path.parent() { + std::fs::create_dir_all(parent) + .map_err(|e| format!("Failed to create workspace dir: {e}"))?; + } + std::fs::write(&flag_path, "") + .map_err(|e| format!("Failed to create onboarding flag: {e}"))?; + } else if flag_path.is_file() { + std::fs::remove_file(&flag_path) + .map_err(|e| format!("Failed to remove onboarding flag: {e}"))?; + } + Ok(RpcOutcome::single_log( + flag_path.is_file(), + "onboarding flag updated", + )) +} + +/// Returns whether the onboarding process has been marked as completed. +pub async fn get_onboarding_completed() -> Result, String> { + let config = load_config_with_timeout().await?; + Ok(RpcOutcome::single_log( + config.onboarding_completed, + "onboarding_completed read from config", + )) +} + +/// Updates and persists the onboarding completion status. +/// +/// On a false→true transition, seeds the recurring morning-briefing +/// cron job via [`crate::openhuman::cron::seed::seed_proactive_agents`]. +pub async fn set_onboarding_completed(value: bool) -> Result, String> { + tracing::debug!(value, "[onboarding] set_onboarding_completed called"); + let mut config = load_config_with_timeout().await?; + let was_completed = config.onboarding_completed; + config.onboarding_completed = value; + + config.save().await.map_err(|e| e.to_string())?; + + if value && !was_completed { + tracing::debug!( + "[onboarding] false→true transition detected — seeding cron jobs (welcome is renderer-triggered)" + ); + let seed_config = config.clone(); + tokio::task::spawn_blocking(move || { + if let Err(e) = crate::openhuman::cron::seed::seed_proactive_agents(&seed_config) { + tracing::warn!("[onboarding] failed to seed proactive agent cron jobs: {e}"); + } + }); + } else { + tracing::debug!( + was_completed, + value, + "[onboarding] no transition — skipping proactive seeding" + ); + } + + Ok(RpcOutcome::single_log( + config.onboarding_completed, + "onboarding_completed saved to config", + )) +} + +/// Returns the current dictation settings as a JSON object. +pub async fn get_dictation_settings() -> Result, String> { + let config = load_config_with_timeout().await?; + let result = json!({ + "enabled": config.dictation.enabled, + "hotkey": config.dictation.hotkey, + "activation_mode": config.dictation.activation_mode, + "llm_refinement": config.dictation.llm_refinement, + "streaming": config.dictation.streaming, + "streaming_interval_ms": config.dictation.streaming_interval_ms, + }); + Ok(RpcOutcome::new( + result, + vec!["dictation settings read".to_string()], + )) +} + +/// Loads configuration, applies dictation settings updates, and saves it. +pub async fn load_and_apply_dictation_settings( + update: DictationSettingsPatch, +) -> Result, String> { + let mut config = load_config_with_timeout().await?; + if let Some(enabled) = update.enabled { + config.dictation.enabled = enabled; + } + if let Some(hotkey) = update.hotkey { + config.dictation.hotkey = hotkey; + } + if let Some(mode) = update.activation_mode { + match mode.as_str() { + "toggle" => { + config.dictation.activation_mode = + crate::openhuman::config::DictationActivationMode::Toggle; + } + "push" => { + config.dictation.activation_mode = + crate::openhuman::config::DictationActivationMode::Push; + } + _ => { + return Err(format!( + "invalid activation_mode: {mode} (valid: toggle, push)" + )) + } + } + } + if let Some(llm_refinement) = update.llm_refinement { + config.dictation.llm_refinement = llm_refinement; + } + if let Some(streaming) = update.streaming { + config.dictation.streaming = streaming; + } + if let Some(interval) = update.streaming_interval_ms { + config.dictation.streaming_interval_ms = interval; + } + config.save().await.map_err(|e| e.to_string())?; + let snapshot = snapshot_config_json(&config)?; + Ok(RpcOutcome::new( + snapshot, + vec![format!( + "dictation settings saved to {}", + config.config_path.display() + )], + )) +} + +/// Returns the current voice server settings as a JSON object. +pub async fn get_voice_server_settings() -> Result, String> { + let config = load_config_with_timeout().await?; + let result = json!({ + "auto_start": config.voice_server.auto_start, + "hotkey": config.voice_server.hotkey, + "activation_mode": config.voice_server.activation_mode, + "skip_cleanup": config.voice_server.skip_cleanup, + "min_duration_secs": config.voice_server.min_duration_secs, + "silence_threshold": config.voice_server.silence_threshold, + "custom_dictionary": config.voice_server.custom_dictionary, + "always_on_enabled": config.voice_server.always_on_enabled, + "wake_word": config.voice_server.wake_word, + }); + Ok(RpcOutcome::new( + result, + vec!["voice server settings read".to_string()], + )) +} + +/// Loads configuration, applies voice server settings updates, and saves it. +pub async fn load_and_apply_voice_server_settings( + update: VoiceServerSettingsPatch, +) -> Result, String> { + let mut config = load_config_with_timeout().await?; + if let Some(auto_start) = update.auto_start { + config.voice_server.auto_start = auto_start; + } + if let Some(hotkey) = update.hotkey { + config.voice_server.hotkey = hotkey; + } + if let Some(mode) = update.activation_mode { + match mode.as_str() { + "tap" => { + config.voice_server.activation_mode = + crate::openhuman::config::VoiceActivationMode::Tap; + } + "push" => { + config.voice_server.activation_mode = + crate::openhuman::config::VoiceActivationMode::Push; + } + _ => { + return Err(format!( + "invalid activation_mode: {mode} (valid: tap, push)" + )) + } + } + } + if let Some(skip_cleanup) = update.skip_cleanup { + config.voice_server.skip_cleanup = skip_cleanup; + } + if let Some(min_duration_secs) = update.min_duration_secs { + config.voice_server.min_duration_secs = min_duration_secs.max(0.0); + } + if let Some(silence_threshold) = update.silence_threshold { + config.voice_server.silence_threshold = silence_threshold.max(0.0); + } + if let Some(custom_dictionary) = update.custom_dictionary { + config.voice_server.custom_dictionary = custom_dictionary; + } + if let Some(always_on_enabled) = update.always_on_enabled { + config.voice_server.always_on_enabled = always_on_enabled; + } + if let Some(wake_word) = update.wake_word { + config.voice_server.wake_word = wake_word.trim().to_string(); + } + config.save().await.map_err(|e| e.to_string())?; + let snapshot = snapshot_config_json(&config)?; + Ok(RpcOutcome::new( + snapshot, + vec![format!( + "voice server settings saved to {}", + config.config_path.display() + )], + )) +} diff --git a/src/openhuman/config/schema/load.rs b/src/openhuman/config/schema/load.rs deleted file mode 100644 index 464da83e37..0000000000 --- a/src/openhuman/config/schema/load.rs +++ /dev/null @@ -1,2290 +0,0 @@ -//! Config load/save and environment variable overrides. - -use super::{ - proxy::{ - normalize_no_proxy_list, normalize_proxy_url_option, normalize_service_list, - parse_proxy_enabled, parse_proxy_scope, set_runtime_proxy_config, ProxyScope, - }, - Config, UpdateRestartStrategy, -}; -use anyhow::{Context, Result}; -use directories::UserDirs; -use serde::{Deserialize, Serialize}; -use std::collections::HashSet; -use std::path::{Path, PathBuf}; -use std::sync::{Mutex, OnceLock}; -use tokio::fs::{self, File, OpenOptions}; -use tokio::io::AsyncWriteExt; - -/// Read-only environment lookup used by [`Config::apply_env_overrides`]. The -/// seam lets unit tests exercise the overlay without mutating the process -/// environment (which is racy under parallel tests and requires a shared -/// `TEST_ENV_LOCK`). -/// -/// Production code uses [`ProcessEnv`], which delegates to `std::env`. -pub(crate) trait EnvLookup { - /// Equivalent to `std::env::var(key).ok()`. - fn get(&self, key: &str) -> Option; - - /// Equivalent to `std::env::var_os(key).is_some()`. Used to distinguish - /// "variable not present" from "variable set to empty" where it matters - /// (see `OPENHUMAN_CONTEXT_TOOL_RESULT_BUDGET_BYTES` below). - fn contains(&self, key: &str) -> bool { - self.get(key).is_some() - } - - /// Looks up the first non-`None` value across `keys`, preserving the - /// precedence used by the manual `or_else` chains throughout this - /// module (e.g. `OPENHUMAN_FOO` wins over the bare `FOO` alias). - fn get_any(&self, keys: &[&str]) -> Option { - keys.iter().find_map(|k| self.get(k)) - } -} - -/// Default [`EnvLookup`] implementation backed by `std::env`. -pub(crate) struct ProcessEnv; - -impl EnvLookup for ProcessEnv { - fn get(&self, key: &str) -> Option { - std::env::var(key).ok() - } - - fn contains(&self, key: &str) -> bool { - std::env::var_os(key).is_some() - } -} - -/// Process env lookup that preserves every override except -/// `OPENHUMAN_WORKSPACE`. -struct ProcessEnvWithoutWorkspace; - -impl EnvLookup for ProcessEnvWithoutWorkspace { - fn get(&self, key: &str) -> Option { - if key == "OPENHUMAN_WORKSPACE" { - None - } else { - ProcessEnv.get(key) - } - } - - fn contains(&self, key: &str) -> bool { - if key == "OPENHUMAN_WORKSPACE" { - false - } else { - ProcessEnv.contains(key) - } - } -} - -fn default_config_and_workspace_dirs() -> Result<(PathBuf, PathBuf)> { - let config_dir = default_config_dir()?; - Ok((config_dir.clone(), config_dir.join("workspace"))) -} - -/// Parse a boolean env-var value. Accepts the usual truthy/falsy tokens -/// (`1/true/yes/on` and `0/false/no/off`, case-insensitive). Returns `None` -/// on unrecognised values and logs a warning so silent mis-spellings don't -/// invisibly leave the config unchanged. -fn parse_env_bool(name: &str, raw: &str) -> Option { - match raw.trim().to_ascii_lowercase().as_str() { - "1" | "true" | "yes" | "on" => Some(true), - "0" | "false" | "no" | "off" => Some(false), - _ => { - tracing::warn!( - env = %name, - value = %raw, - "invalid boolean env override ignored; expected 1/true/yes/on or 0/false/no/off" - ); - None - } - } -} - -const ACTIVE_WORKSPACE_STATE_FILE: &str = "active_workspace.toml"; -static WARNED_WORLD_READABLE_CONFIGS: OnceLock>> = OnceLock::new(); - -#[derive(Debug, Serialize, Deserialize)] -struct ActiveWorkspaceState { - config_dir: String, -} - -fn default_config_dir() -> Result { - default_root_openhuman_dir() -} - -fn default_root_dir_name() -> &'static str { - if crate::api::config::is_staging_app_env(crate::api::config::app_env_from_env().as_deref()) { - ".openhuman-staging" - } else { - ".openhuman" - } -} - -/// Returns the root openhuman directory (`~/.openhuman`), independent of any -/// per-user scoping. Used to locate `active_user.toml` and the shared -/// `users/` tree. -pub fn default_root_openhuman_dir() -> Result { - let home = UserDirs::new() - .map(|u| u.home_dir().to_path_buf()) - .context("Could not find home directory")?; - Ok(home.join(default_root_dir_name())) -} - -/// Environment override for the agent's default projects directory. -pub const PROJECTS_DIR_ENV_VAR: &str = "OPENHUMAN_PROJECTS_DIR"; - -/// Environment override for the agent action sandbox directory. -pub const ACTION_DIR_ENV_VAR: &str = "OPENHUMAN_ACTION_DIR"; - -/// Environment override for the global memory-sync cadence (seconds). -/// `0` means "Manual only". See issue #3302 and -/// [`Config::memory_sync_interval_secs`]. -pub const MEMORY_SYNC_INTERVAL_SECS_ENV_VAR: &str = "OPENHUMAN_MEMORY_SYNC_INTERVAL_SECS"; - -/// The agent's default **projects home** — a visible, read-write directory -/// (`~/OpenHuman/projects`) where the coding agent creates and saves projects, -/// kept distinct from the hidden internal state dir (`~/.openhuman/workspace`, -/// which also holds `memory_tree` etc.). Overridable via `OPENHUMAN_PROJECTS_DIR`; -/// falls back to `./OpenHuman/projects` only when the home dir can't be resolved. -pub fn default_projects_dir() -> PathBuf { - if let Ok(p) = std::env::var(PROJECTS_DIR_ENV_VAR) { - let trimmed = p.trim(); - if !trimmed.is_empty() { - return PathBuf::from(trimmed); - } - } - UserDirs::new() - .map(|u| u.home_dir().to_path_buf()) - .unwrap_or_else(|| PathBuf::from(".")) - .join("OpenHuman") - .join("projects") -} - -/// The agent's default **action sandbox** — the directory where shell, file, -/// and git tools run by default. Separate from the internal workspace state -/// dir. Defaults to `default_projects_dir()` (`~/OpenHuman/projects`); -/// overridable via `OPENHUMAN_ACTION_DIR`. -/// The `OPENHUMAN_ACTION_DIR` env override, when set to a non-empty value. -/// -/// Returns `None` when the variable is unset or blank (a common shape from -/// shells that pass through a declared-but-unset variable). The trim mirrors -/// [`default_action_dir`] so an empty env var never pins `action_dir`. -pub fn action_dir_env_override() -> Option { - let raw = std::env::var(ACTION_DIR_ENV_VAR).ok()?; - let trimmed = raw.trim(); - if trimmed.is_empty() { - None - } else { - Some(PathBuf::from(trimmed)) - } -} - -/// Resolve the effective `action_dir` from the precedence chain: -/// env `OPENHUMAN_ACTION_DIR` > persisted `action_dir_override` > default -/// projects dir. Keeping the env var first means existing env-driven -/// deployments are unaffected by a UI-set override. -pub fn resolve_action_dir(action_dir_override: &Option) -> PathBuf { - if let Some(env_dir) = action_dir_env_override() { - return env_dir; - } - if let Some(over) = action_dir_override { - if !over.as_os_str().is_empty() && over.is_absolute() { - return over.clone(); - } - tracing::warn!( - value = %over.display(), - "[config] ignoring invalid action_dir_override; expected non-empty absolute path" - ); - } - default_projects_dir() -} - -pub fn default_action_dir() -> PathBuf { - if let Ok(p) = std::env::var(ACTION_DIR_ENV_VAR) { - let trimmed = p.trim(); - if !trimmed.is_empty() { - return PathBuf::from(trimmed); - } - } - default_projects_dir() -} - -fn active_workspace_state_path(default_dir: &Path) -> PathBuf { - default_dir.join(ACTIVE_WORKSPACE_STATE_FILE) -} - -async fn load_persisted_workspace_dirs( - default_config_dir: &Path, -) -> Result> { - let state_path = active_workspace_state_path(default_config_dir); - if !state_path.exists() { - return Ok(None); - } - - let contents = match fs::read_to_string(&state_path).await { - Ok(contents) => contents, - Err(error) => { - tracing::warn!( - "Failed to read active workspace marker {}: {error}", - state_path.display() - ); - return Ok(None); - } - }; - - let state: ActiveWorkspaceState = match toml::from_str(&contents) { - Ok(state) => state, - Err(error) => { - tracing::warn!( - "Failed to parse active workspace marker {}: {error}", - state_path.display() - ); - return Ok(None); - } - }; - - let raw_config_dir = state.config_dir.trim(); - if raw_config_dir.is_empty() { - tracing::warn!( - "Ignoring active workspace marker {} because config_dir is empty", - state_path.display() - ); - return Ok(None); - } - - let parsed_dir = PathBuf::from(raw_config_dir); - let config_dir = if parsed_dir.is_absolute() { - parsed_dir - } else { - default_config_dir.join(parsed_dir) - }; - Ok(Some((config_dir.clone(), config_dir.join("workspace")))) -} - -pub(crate) async fn persist_active_workspace_config_dir(config_dir: &Path) -> Result<()> { - let default_config_dir = default_config_dir()?; - let state_path = active_workspace_state_path(&default_config_dir); - - if config_dir == default_config_dir { - if state_path.exists() { - fs::remove_file(&state_path).await.with_context(|| { - format!( - "Failed to clear active workspace marker: {}", - state_path.display() - ) - })?; - } - return Ok(()); - } - - fs::create_dir_all(&default_config_dir) - .await - .with_context(|| { - format!( - "Failed to create default config directory: {}", - default_config_dir.display() - ) - })?; - - let state = ActiveWorkspaceState { - config_dir: config_dir.to_string_lossy().into_owned(), - }; - let serialized = - toml::to_string_pretty(&state).context("Failed to serialize active workspace marker")?; - - let temp_path = default_config_dir.join(format!( - ".{ACTIVE_WORKSPACE_STATE_FILE}.tmp-{}", - uuid::Uuid::new_v4() - )); - fs::write(&temp_path, serialized).await.with_context(|| { - format!( - "Failed to write temporary active workspace marker: {}", - temp_path.display() - ) - })?; - - if let Err(error) = fs::rename(&temp_path, &state_path).await { - let _ = fs::remove_file(&temp_path).await; - anyhow::bail!( - "Failed to atomically persist active workspace marker {}: {error}", - state_path.display() - ); - } - - sync_directory(&default_config_dir).await?; - Ok(()) -} - -fn resolve_config_dir_for_workspace(workspace_dir: &Path) -> (PathBuf, PathBuf) { - let workspace_config_dir = workspace_dir.to_path_buf(); - if workspace_config_dir.join("config.toml").exists() { - return ( - workspace_config_dir.clone(), - workspace_config_dir.join("workspace"), - ); - } - - let legacy_config_dir = workspace_dir - .parent() - .map(|parent| parent.join(".openhuman")); - if let Some(legacy_dir) = legacy_config_dir { - if legacy_dir.join("config.toml").exists() { - return (legacy_dir, workspace_config_dir); - } - - if workspace_dir - .file_name() - .is_some_and(|name| name == std::ffi::OsStr::new("workspace")) - { - return (legacy_dir, workspace_config_dir); - } - } - - ( - workspace_config_dir.clone(), - workspace_config_dir.join("workspace"), - ) -} - -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -enum ConfigResolutionSource { - EnvWorkspace, - ActiveWorkspaceMarker, - ActiveUser, - DefaultConfigDir, -} - -impl ConfigResolutionSource { - const fn as_str(self) -> &'static str { - match self { - Self::EnvWorkspace => "OPENHUMAN_WORKSPACE", - Self::ActiveWorkspaceMarker => "active_workspace.toml", - Self::ActiveUser => "active_user.toml", - Self::DefaultConfigDir => "default", - } - } -} - -async fn resolve_runtime_config_dirs( - default_openhuman_dir: &Path, - default_workspace_dir: &Path, -) -> Result<(PathBuf, PathBuf, ConfigResolutionSource)> { - resolve_runtime_config_dirs_with(default_openhuman_dir, default_workspace_dir, &ProcessEnv) - .await -} - -/// Env-injectable variant of [`resolve_runtime_config_dirs`]. Accepts any -/// [`EnvLookup`] so unit tests can exercise the `OPENHUMAN_WORKSPACE` -/// override path without mutating the process environment. -async fn resolve_runtime_config_dirs_with( - default_openhuman_dir: &Path, - default_workspace_dir: &Path, - env: &(dyn EnvLookup + Send + Sync), -) -> Result<(PathBuf, PathBuf, ConfigResolutionSource)> { - // 1. Explicit env override always wins. - if let Some(custom_workspace) = env.get("OPENHUMAN_WORKSPACE") { - if !custom_workspace.is_empty() { - let (openhuman_dir, workspace_dir) = - resolve_config_dir_for_workspace(&PathBuf::from(custom_workspace)); - return Ok(( - openhuman_dir, - workspace_dir, - ConfigResolutionSource::EnvWorkspace, - )); - } - } - - resolve_config_dirs_ignoring_env(default_openhuman_dir, default_workspace_dir).await -} - -/// Same as [`resolve_runtime_config_dirs`] but skips the -/// `OPENHUMAN_WORKSPACE` env var override. Used by -/// [`Config::load_from_default_paths`] so callers can reliably load -/// the real user config without mutating the process environment. -async fn resolve_config_dirs_ignoring_env( - default_openhuman_dir: &Path, - default_workspace_dir: &Path, -) -> Result<(PathBuf, PathBuf, ConfigResolutionSource)> { - // 2. Active user — scopes the entire openhuman dir to a per-user directory - // so that config, auth, encryption, and workspace are all user-isolated. - if let Some(user_id) = read_active_user_id(default_openhuman_dir) { - let user_dir = user_openhuman_dir(default_openhuman_dir, &user_id); - let user_workspace = user_dir.join("workspace"); - tracing::debug!( - user_id = %user_id, - user_dir = %user_dir.display(), - "Config dirs resolved via active_user.toml" - ); - return Ok((user_dir, user_workspace, ConfigResolutionSource::ActiveUser)); - } - - // 3. Active workspace marker (legacy / multi-workspace). - if let Some((openhuman_dir, workspace_dir)) = - load_persisted_workspace_dirs(default_openhuman_dir).await? - { - return Ok(( - openhuman_dir, - workspace_dir, - ConfigResolutionSource::ActiveWorkspaceMarker, - )); - } - - // 4. Default: no login yet. Encapsulate config/memory/state under the - // pre-login user directory so everything is user-scoped from the very - // first init. On first real login, this directory is migrated to the - // authenticated user id (see `credentials::ops::store_session`). - let user_dir = pre_login_user_dir(default_openhuman_dir); - let user_workspace = user_dir.join("workspace"); - tracing::debug!( - user_id = %PRE_LOGIN_USER_ID, - user_dir = %user_dir.display(), - default_workspace_dir = %default_workspace_dir.display(), - "Config dirs resolved to pre-login user directory (no active user, no workspace marker)" - ); - Ok(( - user_dir, - user_workspace, - ConfigResolutionSource::DefaultConfigDir, - )) -} - -fn decrypt_optional_secret( - store: &crate::openhuman::keyring::SecretStore, - value: &mut Option, - field_name: &str, -) -> Result<()> { - if let Some(raw) = value.clone() { - if crate::openhuman::keyring::SecretStore::is_encrypted(&raw) { - match store.decrypt(&raw) { - Ok(plaintext) => *value = Some(plaintext), - Err(e) => { - // Decryption key is inaccessible (e.g. rotated, keyring reset, or - // migrated across machines). Clear the field so config loads - // successfully — the affected integration will be disabled until - // the user re-enters the credential. A hard error here would block - // every config load and make the app unusable. - log::warn!( - "[config] Failed to decrypt {field_name} — field cleared (key inaccessible): {e}" - ); - crate::openhuman::keyring_consent::policy::notify_decrypt_failure( - field_name, - &e.to_string(), - ); - *value = None; - } - } - } - } - Ok(()) -} - -fn encrypt_optional_secret( - store: &crate::openhuman::keyring::SecretStore, - value: &mut Option, - field_name: &str, -) -> Result<()> { - if let Some(raw) = value.clone() { - if !crate::openhuman::keyring::SecretStore::is_encrypted(&raw) { - *value = Some( - store - .encrypt(&raw) - .with_context(|| format!("Failed to encrypt {field_name}"))?, - ); - } - } - Ok(()) -} - -/// Decrypt all secret fields in the configuration that are marked as encrypted. -/// -/// Called during config load when `secrets.encrypt` is true. Only decrypts -/// values that have the `enc:` or `enc2:` prefix; plaintext values are -/// returned as-is. This is a no-op when encryption is disabled. -fn decrypt_config_secrets(config: &mut Config, openhuman_dir: &Path) -> Result<()> { - if !config.secrets.encrypt { - return Ok(()); - } - let store = crate::openhuman::keyring::SecretStore::new(openhuman_dir, true); - - decrypt_optional_secret(&store, &mut config.api_key, "api_key")?; - - // Search engines: BYO API keys for direct providers. - decrypt_optional_secret( - &store, - &mut config.search.parallel.api_key, - "search.parallel.api_key", - )?; - decrypt_optional_secret( - &store, - &mut config.search.brave.api_key, - "search.brave.api_key", - )?; - decrypt_optional_secret( - &store, - &mut config.search.querit.api_key, - "search.querit.api_key", - )?; - - // Channels: decrypt every optional secret field. - // - // For required (non-Option) secret fields we wrap the value in a - // temporary Option, run `decrypt_optional_secret`, then write back via - // `unwrap_or_default`. This mirrors the encrypt path and — crucially — - // propagates real decryption errors via `?` instead of silently handing - // ciphertext back to channel code on a corrupted `enc2:` value. - // Plaintext values (no `enc:`/`enc2:` prefix) are passed through - // untouched by `SecretStore::decrypt`, so configs written by pre-#1900 - // builds continue to load correctly. - let ch = &mut config.channels_config; - if let Some(ref mut tg) = ch.telegram { - let mut tok = Some(tg.bot_token.clone()); - decrypt_optional_secret(&store, &mut tok, "telegram.bot_token")?; - tg.bot_token = tok.unwrap_or_default(); - } - if let Some(ref mut d) = ch.discord { - let mut tok = Some(d.bot_token.clone()); - decrypt_optional_secret(&store, &mut tok, "discord.bot_token")?; - d.bot_token = tok.unwrap_or_default(); - } - if let Some(ref mut s) = ch.slack { - let mut tok = Some(s.bot_token.clone()); - decrypt_optional_secret(&store, &mut tok, "slack.bot_token")?; - s.bot_token = tok.unwrap_or_default(); - decrypt_optional_secret(&store, &mut s.app_token, "slack.app_token")?; - } - if let Some(ref mut m) = ch.mattermost { - let mut tok = Some(m.bot_token.clone()); - decrypt_optional_secret(&store, &mut tok, "mattermost.bot_token")?; - m.bot_token = tok.unwrap_or_default(); - } - if let Some(ref mut w) = ch.webhook { - decrypt_optional_secret(&store, &mut w.secret, "webhook.secret")?; - } - if let Some(ref mut mx) = ch.matrix { - let mut tok = Some(mx.access_token.clone()); - decrypt_optional_secret(&store, &mut tok, "matrix.access_token")?; - mx.access_token = tok.unwrap_or_default(); - } - if let Some(ref mut wa) = ch.whatsapp { - decrypt_optional_secret(&store, &mut wa.access_token, "whatsapp.access_token")?; - decrypt_optional_secret(&store, &mut wa.verify_token, "whatsapp.verify_token")?; - decrypt_optional_secret(&store, &mut wa.app_secret, "whatsapp.app_secret")?; - } - if let Some(ref mut lq) = ch.linq { - let mut tok = Some(lq.api_token.clone()); - decrypt_optional_secret(&store, &mut tok, "linq.api_token")?; - lq.api_token = tok.unwrap_or_default(); - } - if let Some(ref mut irc) = ch.irc { - decrypt_optional_secret(&store, &mut irc.server_password, "irc.server_password")?; - decrypt_optional_secret(&store, &mut irc.nickserv_password, "irc.nickserv_password")?; - decrypt_optional_secret(&store, &mut irc.sasl_password, "irc.sasl_password")?; - } - if let Some(ref mut lk) = ch.lark { - let mut tok = Some(lk.app_secret.clone()); - decrypt_optional_secret(&store, &mut tok, "lark.app_secret")?; - lk.app_secret = tok.unwrap_or_default(); - decrypt_optional_secret(&store, &mut lk.encrypt_key, "lark.encrypt_key")?; - decrypt_optional_secret( - &store, - &mut lk.verification_token, - "lark.verification_token", - )?; - } - if let Some(ref mut dt) = ch.dingtalk { - let mut tok = Some(dt.client_secret.clone()); - decrypt_optional_secret(&store, &mut tok, "dingtalk.client_secret")?; - dt.client_secret = tok.unwrap_or_default(); - } - if let Some(ref mut qq) = ch.qq { - let mut tok = Some(qq.app_secret.clone()); - decrypt_optional_secret(&store, &mut tok, "qq.app_secret")?; - qq.app_secret = tok.unwrap_or_default(); - } - - Ok(()) -} - -/// Encrypt all secret fields in the configuration before writing to disk. -/// -/// Called during `Config::save()` when `secrets.encrypt` is true. Only -/// encrypts values that are NOT already encrypted. This is a no-op when -/// encryption is disabled. -fn encrypt_config_secrets(config: &mut Config) -> Result<()> { - if !config.secrets.encrypt { - return Ok(()); - } - let parent_dir = config - .config_path - .parent() - .context("Config path must have a parent directory")?; - let store = crate::openhuman::keyring::SecretStore::new(parent_dir, true); - - encrypt_optional_secret(&store, &mut config.api_key, "api_key")?; - - encrypt_optional_secret( - &store, - &mut config.search.parallel.api_key, - "search.parallel.api_key", - )?; - encrypt_optional_secret( - &store, - &mut config.search.brave.api_key, - "search.brave.api_key", - )?; - encrypt_optional_secret( - &store, - &mut config.search.querit.api_key, - "search.querit.api_key", - )?; - - let ch = &mut config.channels_config; - if let Some(ref mut tg) = ch.telegram { - let mut tok = Some(tg.bot_token.clone()); - encrypt_optional_secret(&store, &mut tok, "telegram.bot_token")?; - tg.bot_token = tok.unwrap_or_default(); - } - if let Some(ref mut d) = ch.discord { - let mut tok = Some(d.bot_token.clone()); - encrypt_optional_secret(&store, &mut tok, "discord.bot_token")?; - d.bot_token = tok.unwrap_or_default(); - } - if let Some(ref mut s) = ch.slack { - let mut tok = Some(s.bot_token.clone()); - encrypt_optional_secret(&store, &mut tok, "slack.bot_token")?; - s.bot_token = tok.unwrap_or_default(); - encrypt_optional_secret(&store, &mut s.app_token, "slack.app_token")?; - } - if let Some(ref mut m) = ch.mattermost { - let mut tok = Some(m.bot_token.clone()); - encrypt_optional_secret(&store, &mut tok, "mattermost.bot_token")?; - m.bot_token = tok.unwrap_or_default(); - } - if let Some(ref mut w) = ch.webhook { - encrypt_optional_secret(&store, &mut w.secret, "webhook.secret")?; - } - if let Some(ref mut mx) = ch.matrix { - let mut tok = Some(mx.access_token.clone()); - encrypt_optional_secret(&store, &mut tok, "matrix.access_token")?; - mx.access_token = tok.unwrap_or_default(); - } - if let Some(ref mut wa) = ch.whatsapp { - encrypt_optional_secret(&store, &mut wa.access_token, "whatsapp.access_token")?; - encrypt_optional_secret(&store, &mut wa.verify_token, "whatsapp.verify_token")?; - encrypt_optional_secret(&store, &mut wa.app_secret, "whatsapp.app_secret")?; - } - if let Some(ref mut lq) = ch.linq { - let mut tok = Some(lq.api_token.clone()); - encrypt_optional_secret(&store, &mut tok, "linq.api_token")?; - lq.api_token = tok.unwrap_or_default(); - } - if let Some(ref mut irc) = ch.irc { - encrypt_optional_secret(&store, &mut irc.server_password, "irc.server_password")?; - encrypt_optional_secret(&store, &mut irc.nickserv_password, "irc.nickserv_password")?; - encrypt_optional_secret(&store, &mut irc.sasl_password, "irc.sasl_password")?; - } - if let Some(ref mut lk) = ch.lark { - let mut tok = Some(lk.app_secret.clone()); - encrypt_optional_secret(&store, &mut tok, "lark.app_secret")?; - lk.app_secret = tok.unwrap_or_default(); - encrypt_optional_secret(&store, &mut lk.encrypt_key, "lark.encrypt_key")?; - encrypt_optional_secret( - &store, - &mut lk.verification_token, - "lark.verification_token", - )?; - } - if let Some(ref mut dt) = ch.dingtalk { - let mut tok = Some(dt.client_secret.clone()); - encrypt_optional_secret(&store, &mut tok, "dingtalk.client_secret")?; - dt.client_secret = tok.unwrap_or_default(); - } - if let Some(ref mut qq) = ch.qq { - let mut tok = Some(qq.app_secret.clone()); - encrypt_optional_secret(&store, &mut tok, "qq.app_secret")?; - qq.app_secret = tok.unwrap_or_default(); - } - - Ok(()) -} - -#[path = "load_user_state.rs"] -mod load_user_state; -#[cfg(test)] -pub(crate) use load_user_state::ACTIVE_USER_STATE_FILE; -pub use load_user_state::{ - clear_active_user, pre_login_user_dir, read_active_user_id, user_openhuman_dir, - write_active_user_id, PRE_LOGIN_USER_ID, -}; - -async fn parse_config_with_recovery(config_path: &Path, contents: &str) -> (Config, bool) { - let parse_err = match parse_toml_off_worker(contents.to_string()).await { - Ok(config) => { - tracing::debug!( - path = %config_path.display(), - "[config] Config parsed successfully" - ); - return (config, false); - } - Err(parse_err) => parse_err, - }; - - let backup_path = config_path.with_extension("toml.bak"); - if tokio::fs::try_exists(&backup_path).await.unwrap_or(false) { - tracing::warn!( - path = %config_path.display(), - backup = %backup_path.display(), - error = %parse_err, - "[config] Config file is corrupted — attempting recovery from backup" - ); - match fs::read_to_string(&backup_path).await { - Ok(bak_contents) => match parse_toml_off_worker(bak_contents).await { - Ok(bak_config) => { - tracing::info!( - path = %config_path.display(), - backup = %backup_path.display(), - "[config] Recovered config from backup" - ); - return (bak_config, true); - } - Err(bak_err) => { - tracing::warn!( - path = %config_path.display(), - backup = %backup_path.display(), - error = %bak_err, - "[config] Backup is also corrupted; resetting to defaults" - ); - } - }, - Err(read_err) => { - tracing::warn!( - path = %config_path.display(), - backup = %backup_path.display(), - error = %read_err, - "[config] Failed to read backup; resetting to defaults" - ); - } - } - } else { - tracing::warn!( - path = %config_path.display(), - error = %parse_err, - "[config] Config file is corrupted (no backup found); resetting to defaults" - ); - } - - (Config::default(), true) -} - -/// Run `toml::from_str::` on a blocking-pool thread so the -/// parser's stack consumption is independent of how deep the calling -/// async tower is. See [`parse_config_with_recovery`] for the rationale. -/// -/// Returns the parse error stringified (rather than `toml::de::Error`) -/// because the rare blocking-pool join failure has no corresponding -/// typed variant and is only ever surfaced as a log line / corruption -/// fallback. Callers only need the message. -async fn parse_toml_off_worker(contents: String) -> Result { - match tokio::task::spawn_blocking(move || toml::from_str::(&contents)).await { - Ok(Ok(config)) => Ok(config), - Ok(Err(parse_err)) => Err(parse_err.to_string()), - Err(join_err) => Err(format!("blocking-pool parse join failed: {join_err}")), - } -} - -/// Older builds (#1342) wrote the user's custom OpenAI-compatible URL into -/// `config.api_url`, double-purposing it as both the OpenHuman product -/// backend URL AND the inference URL. That broke auth/billing/voice as -/// soon as someone picked a non-OpenHuman provider. We now keep them in -/// separate fields; on load, detect that legacy shape (any `api_url` whose -/// path looks like a chat-completions endpoint) and move it. -fn migrate_legacy_inference_url(config: &mut Config) { - if config.inference_url.is_some() { - return; - } - let Some(url) = config.api_url.as_deref() else { - return; - }; - let trimmed = url.trim().trim_end_matches('/'); - if !trimmed.ends_with("/chat/completions") { - return; - } - // OpenHuman's hosted backend exposes inference at `/openai/v1/chat/completions`; - // when api_url points there, the derived inference URL is already correct — - // just clear api_url so it falls back to the default base. For everything - // else, move the legacy value into inference_url. - let is_openhuman_backend = trimmed.starts_with("https://api.tinyhumans.ai/") - || trimmed.starts_with("https://staging-api.tinyhumans.ai/"); - let moved = if is_openhuman_backend { - None - } else { - Some(trimmed.to_string()) - }; - // Log the URL with userinfo (basic-auth creds) and query string stripped - // so credentials embedded by callers — `https://user:token@host/v1/...` - // or `?api_key=...` — don't end up in log files / Sentry breadcrumbs. - let logged = match moved.as_deref() { - None => "".to_string(), - Some(u) => redact_url_for_log(u), - }; - tracing::info!( - "[config][migrate] splitting legacy api_url -> inference_url (api_url cleared, inference_url={})", - logged - ); - config.inference_url = moved; - config.api_url = None; -} - -/// Strip userinfo (basic-auth) and query string from a URL string for log -/// emission. Falls back to a coarse `/...` form when parsing fails so -/// we never leak the raw input. Public only so the migration's unit test -/// can assert the behaviour. -pub(super) fn redact_url_for_log(raw: &str) -> String { - if let Ok(mut url) = url::Url::parse(raw) { - let _ = url.set_username(""); - let _ = url.set_password(None); - url.set_query(None); - url.set_fragment(None); - return url.to_string(); - } - // Unparseable — keep the scheme+host hint, drop everything after the - // first `?` or `#`, and replace any `:port@host` userinfo with `***`. - let truncated = raw - .split(['?', '#']) - .next() - .unwrap_or(raw) - .trim_end_matches('/'); - if let Some((scheme, rest)) = truncated.split_once("://") { - if let Some((_, host_path)) = rest.split_once('@') { - return format!("{scheme}://***@{host_path}"); - } - return format!("{scheme}://{rest}"); - } - "".to_string() -} - -/// Migrate `cloud_providers` entries to the new slug-keyed shape and rewrite -/// any per-workload routing strings that still use the old bare-prefix grammar. -/// -/// This is idempotent: entries that already have a slug/label are left -/// untouched. Routing fields that already contain a `:` are assumed to be -/// in the new `:` form. -fn migrate_cloud_provider_slugs(config: &mut Config) { - use super::cloud_providers::{migrate_legacy_fields, AuthStyle}; - - // Step 1: migrate every cloud_providers entry in-place. - for entry in &mut config.cloud_providers { - migrate_legacy_fields(entry); - } - - // Step 2: rewrite per-workload routing strings from legacy bare grammar. - // Build a lookup: legacy type string → first entry with that slug. - // After migration, `entry.slug` is populated from `legacy_type` when it - // was empty, so we can look up by slug now. - let slug_to_id: std::collections::HashMap = config - .cloud_providers - .iter() - .map(|e| (e.slug.clone(), e.id.clone())) - .collect(); - - let legacy_custom_slug = config - .inference_url - .as_deref() - .map(str::trim) - .filter(|url| !url.is_empty() && !looks_like_openhuman_provider_endpoint(url)) - .and_then(|url| { - let normalized = normalize_provider_endpoint(url); - config - .cloud_providers - .iter() - .find(|entry| { - !is_openhuman_provider_entry(entry) - && normalize_provider_endpoint(&entry.endpoint) == normalized - }) - .map(|entry| entry.slug.clone()) - }); - - // Helper: rewrite a single routing field. - // Legacy bare strings are: "cloud", "openhuman", "openai", "anthropic", - // "openrouter", "custom" (no ':'). New strings contain ':'. - let rewrite = |field: &mut Option| { - let raw = match field.as_deref() { - Some(s) if !s.is_empty() => s.to_string(), - _ => return, - }; - // Already in new grammar (contains ':') or is the openhuman sentinel. - if raw.contains(':') || raw == "openhuman" { - return; - } - match raw.as_str() { - "cloud" => { - // "cloud" sentinel: look for the primary or first non-openhuman entry. - // If a legacy external inference_url exists and primary still points - // at OpenHuman, keep routing on that custom provider; that shape was - // written by older builds that preserved the endpoint but defaulted - // primary_cloud to OpenHuman. - let primary_slug = config.primary_cloud.as_deref().and_then(|pid| { - config - .cloud_providers - .iter() - .find(|e| e.id == pid) - .map(|e| e.slug.clone()) - }); - let slug = match primary_slug.as_deref() { - Some("openhuman") => legacy_custom_slug.clone().or(primary_slug), - Some(_) => primary_slug, - None => legacy_custom_slug.clone().or_else(|| { - config - .cloud_providers - .iter() - .find(|entry| !is_openhuman_provider_entry(entry)) - .map(|entry| entry.slug.clone()) - }), - }; - if let Some(s) = slug { - if s == "openhuman" { - tracing::debug!( - "[config][migrate] rewriting routing 'cloud' → 'openhuman'" - ); - *field = Some("openhuman".to_string()); - } else { - tracing::info!( - "[config][migrate] rewriting routing 'cloud' → '{s}:' (empty model)" - ); - *field = Some(format!("{s}:")); - } - } else { - tracing::debug!( - "[config][migrate] routing 'cloud' with no non-openhuman provider → 'openhuman'" - ); - *field = Some("openhuman".to_string()); - } - } - other => { - // Bare type string (e.g. "openai") — find entry by slug. - if slug_to_id.contains_key(other) { - tracing::info!( - "[config][migrate] rewriting bare routing '{}' → '{}:'", - other, - other - ); - *field = Some(format!("{other}:")); - } else if other != "openhuman" { - tracing::warn!( - "[config][migrate] bare routing '{}' has no matching provider entry, \ - falling back to 'openhuman'", - other - ); - *field = Some("openhuman".to_string()); - } - } - } - }; - - rewrite(&mut config.reasoning_provider); - rewrite(&mut config.agentic_provider); - rewrite(&mut config.coding_provider); - rewrite(&mut config.memory_provider); - rewrite(&mut config.embeddings_provider); - rewrite(&mut config.heartbeat_provider); - rewrite(&mut config.learning_provider); - rewrite(&mut config.subconscious_provider); - - fn normalize_provider_endpoint(url: &str) -> String { - url.trim().trim_end_matches('/').to_ascii_lowercase() - } - - fn looks_like_openhuman_provider_endpoint(url: &str) -> bool { - let lower = url.trim().to_ascii_lowercase(); - let without_scheme = lower.split("://").nth(1).unwrap_or(&lower); - let authority = without_scheme.split('/').next().unwrap_or(""); - let host = authority.split('@').next_back().unwrap_or(authority); - let host_no_port = host.split(':').next().unwrap_or(host); - matches!( - host_no_port, - "api.openhuman.ai" | "api.tinyhumans.ai" | "staging-api.tinyhumans.ai" | "openhuman" - ) || host_no_port.ends_with(".openhuman.ai") - || host_no_port.ends_with(".tinyhumans.ai") - } - - fn is_openhuman_provider_entry(entry: &super::cloud_providers::CloudProviderCreds) -> bool { - entry.slug == "openhuman" - || matches!(entry.auth_style, AuthStyle::OpenhumanJwt) - || looks_like_openhuman_provider_endpoint(&entry.endpoint) - } -} - -fn migrate_legacy_autocomplete_disabled_apps(config: &mut Config) { - // Legacy defaults blocked both terminal and code, which prevented Codex/CLI usage. - // Migrate only the exact legacy default so custom user preferences remain untouched. - let mut normalized: Vec = config - .autocomplete - .disabled_apps - .iter() - .map(|value| value.trim().to_ascii_lowercase()) - .filter(|value| !value.is_empty()) - .collect(); - normalized.sort(); - normalized.dedup(); - - if normalized == ["code".to_string(), "terminal".to_string()] { - config.autocomplete.disabled_apps = vec!["code".to_string()]; - } -} - -#[cfg(unix)] -async fn sync_directory(path: &Path) -> Result<()> { - let dir = File::open(path) - .await - .with_context(|| format!("Failed to open directory for fsync: {}", path.display()))?; - dir.sync_all() - .await - .with_context(|| format!("Failed to fsync directory metadata: {}", path.display()))?; - Ok(()) -} - -#[cfg(not(unix))] -async fn sync_directory(_path: &Path) -> Result<()> { - Ok(()) -} - -impl Config { - pub async fn load_or_init() -> Result { - let (default_openhuman_dir, default_workspace_dir) = default_config_and_workspace_dirs()?; - Self::load_or_init_with_env_lookup( - &default_openhuman_dir, - &default_workspace_dir, - &ProcessEnv, - ) - .await - } - - async fn load_or_init_with_env_lookup( - default_openhuman_dir: &Path, - default_workspace_dir: &Path, - env: &(dyn EnvLookup + Send + Sync), - ) -> Result { - let (openhuman_dir, workspace_dir, resolution_source) = - resolve_runtime_config_dirs_with(default_openhuman_dir, default_workspace_dir, env) - .await?; - - let config_path = openhuman_dir.join("config.toml"); - - // Pre-login path: no active user, no workspace marker, no env override, - // and no existing config.toml on disk. Return an in-memory default - // config without creating any directories or writing any files — disk - // state is deferred until the first successful login in - // `credentials::ops::store_session`, which writes `active_user.toml` - // and triggers a reload that materializes the user-scoped directory. - if resolution_source == ConfigResolutionSource::DefaultConfigDir && !config_path.exists() { - let mut config = Config { - config_path: config_path.clone(), - workspace_dir: workspace_dir.clone(), - action_dir: default_action_dir(), - ..Default::default() - }; - config.apply_env_overrides_from(env); - - tracing::debug!( - path = %config.config_path.display(), - workspace = %config.workspace_dir.display(), - source = resolution_source.as_str(), - initialized = false, - persisted = false, - "Config loaded (pre-login, in-memory only — no dirs or files written)" - ); - return Ok(config); - } - - fs::create_dir_all(&openhuman_dir) - .await - .context("Failed to create config directory")?; - fs::create_dir_all(&workspace_dir) - .await - .context("Failed to create workspace directory")?; - - if config_path.exists() { - #[cfg(unix)] - { - use std::{fs::Permissions, os::unix::fs::PermissionsExt}; - if let Ok(meta) = fs::metadata(&config_path).await { - if meta.permissions().mode() & 0o004 != 0 { - let warned = WARNED_WORLD_READABLE_CONFIGS - .get_or_init(|| Mutex::new(HashSet::new())); - // Only attempt to fix paths not yet successfully chmod'd. - // Cache is advanced only on success so a persistent - // failure re-warns and re-attempts on every load. - let already_fixed = warned - .lock() - .unwrap_or_else(|e| e.into_inner()) - .contains(&config_path); - if !already_fixed { - tracing::warn!( - "[config] Config file {:?} is world-readable (mode {:o}); \ - auto-fixing to 600", - config_path, - meta.permissions().mode() & 0o777, - ); - match fs::set_permissions(&config_path, Permissions::from_mode(0o600)) - .await - { - Ok(()) => { - warned - .lock() - .unwrap_or_else(|e| e.into_inner()) - .insert(config_path.clone()); - } - Err(e) => { - tracing::warn!( - path = %config_path.display(), - error = %e, - "[config] failed to auto-fix config file permissions to 600", - ); - } - } - } - } - } - } - - // Sentry OPENHUMAN-TAURI-9R (~8k events, Windows): this read can - // race the atomic-replace in `Config::save` (temp file → - // `fs::rename` over `config_path`). On Windows the in-flight - // rename / a transient AV or indexer handle makes the read fail - // with ERROR_SHARING_VIOLATION (32) / ERROR_ACCESS_DENIED (5) / - // ERROR_DELETE_PENDING (303) even though `config_path.exists()` - // just returned true. `inference_status` polls `load_config` - // frequently, so each coincidence with a save produced one - // "Failed to read config file" event. Retry on the transient - // Windows locking codes (the same class `retry_with_backoff_async` - // already handles for the auth-profile + team_get_usage paths) so - // the read succeeds once the writer releases its handle. - // `is_transient_fs_error` is `false` for every non-Windows error - // (and for NotFound on Windows), so this is a no-op on - // macOS/Linux and never masks a genuinely-unreadable config. - let contents = crate::openhuman::util::retry_with_backoff_async( - "read config file", - 5, - 20, - || async { - fs::read_to_string(&config_path).await.with_context(|| { - format!("Failed to read config file: {}", config_path.display()) - }) - }, - ) - .await?; - let (mut config, config_was_corrupted) = - parse_config_with_recovery(&config_path, &contents).await; - config.config_path = config_path.clone(); - config.workspace_dir = workspace_dir; - // Resolve from the precedence chain (env > persisted override > - // default) now that the override field is loaded from disk. - config.action_dir = resolve_action_dir(&config.action_dir_override); - migrate_legacy_autocomplete_disabled_apps(&mut config); - migrate_legacy_inference_url(&mut config); - migrate_cloud_provider_slugs(&mut config); - config.apply_env_overrides_from(env); - - if config_was_corrupted { - // Rename the corrupted primary away *before* calling save(). - // save() copies config_path → config_path.bak before the - // atomic replace, so if the corrupted file is still at - // config_path it would overwrite the good .bak that we just - // used for recovery. Only call save() when the rename - // succeeds; on failure log and leave recovery for next boot - // rather than destroying the good backup. - let corrupted_path = config_path.with_extension("toml.corrupted"); - match fs::rename(&config_path, &corrupted_path).await { - Ok(()) => { - tracing::debug!( - src = %config_path.display(), - dst = %corrupted_path.display(), - "[config] Renamed corrupted config; persisting recovered config" - ); - if let Err(e) = config.save().await { - tracing::warn!( - path = %config.config_path.display(), - error = %e, - "[config] Failed to persist recovered config to disk" - ); - } - } - Err(e) => { - tracing::warn!( - src = %config_path.display(), - dst = %corrupted_path.display(), - error = %e, - "[config] Failed to rename corrupted config; skipping save to \ - protect the .bak — will retry recovery on next startup" - ); - } - } - } - - tracing::debug!( - path = %config.config_path.display(), - workspace = %config.workspace_dir.display(), - source = resolution_source.as_str(), - initialized = false, - recovered = config_was_corrupted, - "Config loaded" - ); - crate::openhuman::migrations::run_pending(&mut config).await; - decrypt_config_secrets(&mut config, &openhuman_dir)?; - Ok(config) - } else { - // Fresh install: there is no legacy on-disk state, so stamp - // the workspace at the current schema version up front. This - // makes `run_pending` a fast no-op on the first launch - // (nothing to migrate) and keeps the "first launch on this - // workspace" semantics aligned with "current binary built - // this workspace". - let mut config = Config { - config_path: config_path.clone(), - workspace_dir, - action_dir: default_action_dir(), - schema_version: crate::openhuman::migrations::CURRENT_SCHEMA_VERSION, - ..Default::default() - }; - config.save().await?; - - #[cfg(unix)] - { - use std::{fs::Permissions, os::unix::fs::PermissionsExt}; - let _ = fs::set_permissions(&config_path, Permissions::from_mode(0o600)).await; - } - - config.apply_env_overrides_from(env); - - tracing::debug!( - path = %config.config_path.display(), - workspace = %config.workspace_dir.display(), - source = resolution_source.as_str(), - initialized = true, - "Config loaded" - ); - // Defensive: still call run_pending. It will see - // `schema_version == CURRENT` and return immediately, but - // the call site stays symmetric with the existing-config - // branch so a future migration that needs to fire on fresh - // installs (vanishingly unlikely, but possible) doesn't - // require touching this path. - crate::openhuman::migrations::run_pending(&mut config).await; - Ok(config) - } - } - - /// Load config from the default user paths, bypassing the - /// `OPENHUMAN_WORKSPACE` environment variable. - /// - /// This is used by the debug dump to load the real user config - /// for auth token resolution when the dump script overrides - /// `OPENHUMAN_WORKSPACE` to a throwaway temp directory. - pub async fn load_from_default_paths() -> Result { - let (default_openhuman_dir, default_workspace_dir) = default_config_and_workspace_dirs()?; - let (openhuman_dir, workspace_dir, _source) = - resolve_config_dirs_ignoring_env(&default_openhuman_dir, &default_workspace_dir) - .await?; - let config_path = openhuman_dir.join("config.toml"); - - if !config_path.exists() { - let mut config = Config { - config_path, - workspace_dir, - action_dir: default_action_dir(), - ..Default::default() - }; - config.apply_env_overrides(); - return Ok(config); - } - - // NOTE: no backup recovery here by design — this is the debug-dump path only; - // `load_or_init()` is the authoritative startup path that handles corruption. - let raw = fs::read_to_string(&config_path) - .await - .context("reading config.toml from default paths")?; - let (mut config, _was_corrupted) = parse_config_with_recovery(&config_path, &raw).await; - config.config_path = config_path; - config.workspace_dir = workspace_dir; - config.action_dir = resolve_action_dir(&config.action_dir_override); - config.apply_env_overrides(); - decrypt_config_secrets(&mut config, &openhuman_dir)?; - Ok(config) - } - - /// Reload a config from an already-resolved `config.toml` path. - /// - /// This is for long-lived runtime objects that hold a `Config` - /// snapshot and need to observe updates written back to the same - /// file. It deliberately bypasses only `OPENHUMAN_WORKSPACE` - /// resolution: the caller has already been scoped to a user/workspace, - /// and following the process-global workspace env var again can cross - /// streams with unrelated tests or runtime tasks that temporarily - /// repoint it. Other process env overrides still apply. - pub async fn load_from_config_path(config_path: &Path, workspace_dir: &Path) -> Result { - let config_path = config_path.to_path_buf(); - let workspace_dir = workspace_dir.to_path_buf(); - - if !config_path.exists() { - let mut config = Config { - config_path, - workspace_dir, - action_dir: default_action_dir(), - ..Default::default() - }; - config.apply_env_overrides_from(&ProcessEnvWithoutWorkspace); - return Ok(config); - } - - let raw = fs::read_to_string(&config_path) - .await - .with_context(|| format!("reading config.toml from {}", config_path.display()))?; - let (mut config, config_was_corrupted) = - parse_config_with_recovery(&config_path, &raw).await; - config.config_path = config_path; - config.workspace_dir = workspace_dir; - config.action_dir = resolve_action_dir(&config.action_dir_override); - migrate_legacy_autocomplete_disabled_apps(&mut config); - migrate_legacy_inference_url(&mut config); - migrate_cloud_provider_slugs(&mut config); - config.apply_env_overrides_from(&ProcessEnvWithoutWorkspace); - - if config_was_corrupted { - tracing::warn!( - path = %config.config_path.display(), - "[config] Snapshot reload recovered a corrupted config; skipping persistence" - ); - } - - crate::openhuman::migrations::run_pending(&mut config).await; - Ok(config) - } - - pub fn apply_env_overrides(&mut self) { - self.apply_env_overrides_from(&ProcessEnv); - } - - fn apply_env_overrides_from(&mut self, env: &(dyn EnvLookup + Send + Sync)) { - self.apply_env_overlay_with(env); - - // The pure overlay above never mutates process-level state. The - // two side effects below remain here so tests driving - // `apply_env_overlay_with` directly don't clobber the shared - // runtime proxy client cache or mutate `HTTP_PROXY` / etc. on - // the running process. - if self.proxy.enabled && self.proxy.scope == ProxyScope::Environment { - self.proxy.apply_to_process_env(); - } - - set_runtime_proxy_config(self.proxy.clone()); - - // Push the embedding request budget into its process-global limiter so - // every cloud embed (via the shared `OpenAiEmbedding` chokepoint) is - // throttled to the configured rate. Kept here, with the proxy commit, - // so the pure overlay stays side-effect-free for tests. - crate::openhuman::embeddings::rate_limit::set_embedding_rate_limit( - self.memory.embedding_rate_limit_per_min, - ); - } - - /// Pure-ish env overlay: applies overrides read from `env` to `self`. - /// - /// "Pure-ish" because it still emits `tracing` logs and calls - /// `self.proxy.validate()` (which only reads). Crucially, it does - /// **not** write to the process environment nor the - /// `set_runtime_proxy_config` global — those stay in the public - /// [`Self::apply_env_overrides`] wrapper so unit tests can call this - /// with a [`HashMapEnv`] (see tests) without requiring the - /// `TEST_ENV_LOCK` or tainting sibling tests. - pub(crate) fn apply_env_overlay_with(&mut self, env: &E) { - // Only the namespaced `OPENHUMAN_MODEL` is honoured. The bare `MODEL` - // env var used to be accepted as an alias but collides with vendor - // asset-tag env vars (e.g. Dell OptiPlex sets `MODEL=7080`), which - // silently clobbered the LLM model and 400'd every backend call - // (Sentry OPENHUMAN-TAURI-J8). - if let Some(model) = env.get("OPENHUMAN_MODEL") { - // Trim before checking so `OPENHUMAN_MODEL=" "` (a common - // shape from shells that pass through an unset-but-declared - // variable) doesn't clobber the configured default with a - // non-usable value. - let trimmed = model.trim(); - if !trimmed.is_empty() { - self.default_model = Some(trimmed.to_string()); - } - } - - if let Some(workspace) = env.get("OPENHUMAN_WORKSPACE") { - if !workspace.is_empty() { - let (_, workspace_dir) = - resolve_config_dir_for_workspace(&PathBuf::from(workspace)); - self.workspace_dir = workspace_dir; - } - } - - if let Some(v) = env.get("OPENHUMAN_ACTION_DIR") { - let trimmed = v.trim(); - if !trimmed.is_empty() { - self.action_dir = PathBuf::from(trimmed); - } - } - - if let Some(temp_str) = env.get("OPENHUMAN_TEMPERATURE") { - if let Ok(temp) = temp_str.parse::() { - if (0.0..=2.0).contains(&temp) { - self.default_temperature = temp; - } - } - } - - if let Some(raw) = env.get("OPENHUMAN_MAX_ACTIONS_PER_HOUR") { - let trimmed = raw.trim(); - if !trimmed.is_empty() { - match trimmed.parse::() { - Ok(limit) => self.autonomy.max_actions_per_hour = limit, - Err(_) => tracing::warn!( - value = %raw, - "invalid OPENHUMAN_MAX_ACTIONS_PER_HOUR ignored; expected an unsigned integer" - ), - } - } - } - - // Global memory-sync cadence override (#3302). `0` is honoured here - // (unlike the per-provider `OPENHUMAN_COMPOSIO_*_SYNC_INTERVAL_SECS`) - // because it carries the "Manual only" meaning. A non-numeric value - // is warned-and-ignored, leaving the persisted/None value intact. - if let Some(raw) = env.get(MEMORY_SYNC_INTERVAL_SECS_ENV_VAR) { - let trimmed = raw.trim(); - if !trimmed.is_empty() { - match trimmed.parse::() { - Ok(secs) => self.memory_sync_interval_secs = Some(secs), - Err(_) => tracing::warn!( - env = %MEMORY_SYNC_INTERVAL_SECS_ENV_VAR, - value = %raw, - "invalid memory-sync interval ignored; expected an unsigned integer (0 = manual)" - ), - } - } - } - - if let Some(language) = env.get("OPENHUMAN_OUTPUT_LANGUAGE") { - let language = language.trim(); - if !language.is_empty() { - self.output_language = Some(language.to_string()); - } - } - - if let Some(flag) = env.get_any(&["OPENHUMAN_REASONING_ENABLED", "REASONING_ENABLED"]) { - let normalized = flag.trim().to_ascii_lowercase(); - match normalized.as_str() { - "1" | "true" | "yes" | "on" => self.runtime.reasoning_enabled = Some(true), - "0" | "false" | "no" | "off" => self.runtime.reasoning_enabled = Some(false), - _ => {} - } - } - - // Seltz direct-API search. - if let Some(key) = env.get_any(&["OPENHUMAN_SELTZ_API_KEY", "SELTZ_API_KEY"]) { - if !key.is_empty() { - self.seltz.api_key = Some(key); - // Auto-enable when the key is set via env. - self.seltz.enabled = true; - } - } - if let Some(url) = env.get_any(&["OPENHUMAN_SELTZ_API_URL", "SELTZ_API_URL"]) { - if !url.is_empty() { - self.seltz.api_url = Some(url); - } - } - if let Some(max) = env.get_any(&["OPENHUMAN_SELTZ_MAX_RESULTS", "SELTZ_MAX_RESULTS"]) { - if let Ok(n) = max.parse::() { - if (1..=20).contains(&n) { - self.seltz.max_results = n; - } - } - } - - // SearXNG self-hosted search. Unlike Seltz, this needs no API key; - // keep it opt-in because it reaches a user-controlled HTTP endpoint. - if let Some(flag) = env.get_any(&["OPENHUMAN_SEARXNG_ENABLED", "SEARXNG_ENABLED"]) { - if let Some(enabled) = parse_env_bool("OPENHUMAN_SEARXNG_ENABLED", &flag) { - self.searxng.enabled = enabled; - } - } - if let Some(url) = env.get_any(&["OPENHUMAN_SEARXNG_BASE_URL", "SEARXNG_BASE_URL"]) { - let url = url.trim(); - if !url.is_empty() { - self.searxng.base_url = url.to_string(); - } - } - if let Some(max) = env.get_any(&["OPENHUMAN_SEARXNG_MAX_RESULTS", "SEARXNG_MAX_RESULTS"]) { - if let Ok(n) = max.parse::() { - if (1..=50).contains(&n) { - self.searxng.max_results = n; - } - } - } - if let Some(language) = env.get_any(&[ - "OPENHUMAN_SEARXNG_DEFAULT_LANGUAGE", - "SEARXNG_DEFAULT_LANGUAGE", - ]) { - let language = language.trim(); - if !language.is_empty() { - self.searxng.default_language = language.to_string(); - } - } - if let Some(timeout_secs) = env.get_any(&[ - "OPENHUMAN_SEARXNG_TIMEOUT_SECS", - "OPENHUMAN_SEARXNG_TIMEOUT_SECONDS", - "SEARXNG_TIMEOUT_SECS", - "SEARXNG_TIMEOUT_SECONDS", - ]) { - if let Ok(timeout_secs) = timeout_secs.parse::() { - if timeout_secs > 0 { - self.searxng.timeout_secs = timeout_secs; - } - } - } - - // Unified search engine selector. `OPENHUMAN_SEARCH_ENGINE` picks the - // active engine (`disabled` suppresses search tools); per-engine API - // keys auto-route to BYO once set. - if let Some(engine) = env.get_any(&["OPENHUMAN_SEARCH_ENGINE", "SEARCH_ENGINE"]) { - let engine = engine.trim().to_ascii_lowercase(); - if !engine.is_empty() { - self.search.engine = engine; - } - } - if let Some(key) = env.get_any(&["OPENHUMAN_PARALLEL_API_KEY", "PARALLEL_API_KEY"]) { - if !key.trim().is_empty() { - self.search.parallel.api_key = Some(key); - } - } - if let Some(key) = env.get_any(&["OPENHUMAN_BRAVE_API_KEY", "BRAVE_API_KEY"]) { - if !key.trim().is_empty() { - self.search.brave.api_key = Some(key); - } - } - if let Some(key) = env.get_any(&["OPENHUMAN_QUERIT_API_KEY", "QUERIT_API_KEY"]) { - if !key.trim().is_empty() { - self.search.querit.api_key = Some(key); - } - } - if let Some(max) = env.get_any(&["OPENHUMAN_SEARCH_MAX_RESULTS", "SEARCH_MAX_RESULTS"]) { - if let Ok(n) = max.parse::() { - if (1..=20).contains(&n) { - self.search.max_results = n; - } - } - } - if let Some(t) = env.get_any(&["OPENHUMAN_SEARCH_TIMEOUT_SECS", "SEARCH_TIMEOUT_SECS"]) { - if let Ok(n) = t.parse::() { - if n > 0 { - self.search.timeout_secs = n; - } - } - } - - // `OPENHUMAN_WEB_SEARCH_ENABLED` is intentionally ignored — - // web search is unconditionally registered in the tool set. - // Only the result/timeout budget knobs remain environment-configurable. - if env.contains("OPENHUMAN_WEB_SEARCH_ENABLED") { - log::warn!( - "[config] OPENHUMAN_WEB_SEARCH_ENABLED is deprecated and ignored — \ - web search is always registered; provider/API-key overrides were removed." - ); - } - - if let Some(max_results) = - env.get_any(&["OPENHUMAN_WEB_SEARCH_MAX_RESULTS", "WEB_SEARCH_MAX_RESULTS"]) - { - if let Ok(max_results) = max_results.parse::() { - if (1..=10).contains(&max_results) { - self.web_search.max_results = max_results; - } - } - } - - if let Some(timeout_secs) = env.get_any(&[ - "OPENHUMAN_WEB_SEARCH_TIMEOUT_SECS", - "WEB_SEARCH_TIMEOUT_SECS", - ]) { - if let Ok(timeout_secs) = timeout_secs.parse::() { - if timeout_secs > 0 { - self.web_search.timeout_secs = timeout_secs; - } - } - } - - let explicit_proxy_enabled = env - .get("OPENHUMAN_PROXY_ENABLED") - .as_deref() - .and_then(parse_proxy_enabled); - if let Some(enabled) = explicit_proxy_enabled { - self.proxy.enabled = enabled; - } - - let mut proxy_url_overridden = false; - if let Some(proxy_url) = env.get_any(&["OPENHUMAN_HTTP_PROXY", "HTTP_PROXY"]) { - self.proxy.http_proxy = normalize_proxy_url_option(Some(&proxy_url)); - proxy_url_overridden = true; - } - if let Some(proxy_url) = env.get_any(&["OPENHUMAN_HTTPS_PROXY", "HTTPS_PROXY"]) { - self.proxy.https_proxy = normalize_proxy_url_option(Some(&proxy_url)); - proxy_url_overridden = true; - } - if let Some(proxy_url) = env.get_any(&["OPENHUMAN_ALL_PROXY", "ALL_PROXY"]) { - self.proxy.all_proxy = normalize_proxy_url_option(Some(&proxy_url)); - proxy_url_overridden = true; - } - if let Some(no_proxy) = env.get_any(&["OPENHUMAN_NO_PROXY", "NO_PROXY"]) { - self.proxy.no_proxy = normalize_no_proxy_list(vec![no_proxy]); - } - - if explicit_proxy_enabled.is_none() - && proxy_url_overridden - && self.proxy.has_any_proxy_url() - { - self.proxy.enabled = true; - } - - if let Some(scope_raw) = env.get("OPENHUMAN_PROXY_SCOPE") { - let trimmed = scope_raw.trim(); - if !trimmed.is_empty() { - match parse_proxy_scope(trimmed) { - Some(scope) => self.proxy.scope = scope, - None => { - tracing::warn!("Invalid OPENHUMAN_PROXY_SCOPE value {:?} ignored", trimmed); - } - } - } - } - - if let Some(services_raw) = env.get("OPENHUMAN_PROXY_SERVICES") { - self.proxy.services = normalize_service_list(vec![services_raw]); - } - - if let Err(error) = self.proxy.validate() { - tracing::warn!("Invalid proxy configuration ignored: {error}"); - self.proxy.enabled = false; - } - - if let Some(tier_str) = env.get("OPENHUMAN_LOCAL_AI_TIER") { - let tier_str = tier_str.trim().to_ascii_lowercase(); - if !tier_str.is_empty() { - if let Some(tier) = - crate::openhuman::inference::presets::ModelTier::from_str_opt(&tier_str) - { - if tier == crate::openhuman::inference::presets::ModelTier::Custom { - tracing::warn!( - tier = %tier_str, - "ignoring custom OPENHUMAN_LOCAL_AI_TIER; only built-in presets are supported" - ); - } else if !tier.is_mvp_allowed() { - tracing::warn!( - tier = %tier_str, - "ignoring OPENHUMAN_LOCAL_AI_TIER outside the 1B local-model allowlist" - ); - } else { - crate::openhuman::inference::presets::apply_preset_to_config( - &mut self.local_ai, - tier, - ); - tracing::debug!(tier = %tier_str, "applied local AI tier from OPENHUMAN_LOCAL_AI_TIER"); - } - } else { - tracing::warn!( - tier = %tier_str, - "ignoring invalid OPENHUMAN_LOCAL_AI_TIER (valid: ram_2_4gb)" - ); - } - } - } - - // Node runtime overrides - if let Some(flag) = env.get("OPENHUMAN_NODE_ENABLED") { - if let Some(enabled) = parse_env_bool("OPENHUMAN_NODE_ENABLED", &flag) { - self.node.enabled = enabled; - } - } - if let Some(version) = env.get("OPENHUMAN_NODE_VERSION") { - let trimmed = version.trim(); - if !trimmed.is_empty() { - self.node.version = trimmed.to_string(); - } - } - if let Some(dir) = env.get("OPENHUMAN_NODE_CACHE_DIR") { - let trimmed = dir.trim(); - if !trimmed.is_empty() { - self.node.cache_dir = trimmed.to_string(); - } - } - if let Some(flag) = env.get("OPENHUMAN_NODE_PREFER_SYSTEM") { - if let Some(prefer_system) = parse_env_bool("OPENHUMAN_NODE_PREFER_SYSTEM", &flag) { - self.node.prefer_system = prefer_system; - } - } - - // Python runtime overrides - if let Some(flag) = env.get("OPENHUMAN_RUNTIME_PYTHON_ENABLED") { - if let Some(enabled) = parse_env_bool("OPENHUMAN_RUNTIME_PYTHON_ENABLED", &flag) { - self.runtime_python.enabled = enabled; - } - } - if let Some(version) = env.get("OPENHUMAN_RUNTIME_PYTHON_MINIMUM_VERSION") { - let trimmed = version.trim(); - if !trimmed.is_empty() { - self.runtime_python.minimum_version = trimmed.to_string(); - } - } - if let Some(dir) = env.get("OPENHUMAN_RUNTIME_PYTHON_CACHE_DIR") { - self.runtime_python.cache_dir = dir.trim().to_string(); - } - if let Some(tag) = env.get("OPENHUMAN_RUNTIME_PYTHON_MANAGED_RELEASE_TAG") { - self.runtime_python.managed_release_tag = tag.trim().to_string(); - } - if let Some(flag) = env.get("OPENHUMAN_RUNTIME_PYTHON_PREFER_SYSTEM") { - if let Some(prefer_system) = - parse_env_bool("OPENHUMAN_RUNTIME_PYTHON_PREFER_SYSTEM", &flag) - { - self.runtime_python.prefer_system = prefer_system; - } - } - if let Some(command) = env.get("OPENHUMAN_RUNTIME_PYTHON_PREFERRED_COMMAND") { - self.runtime_python.preferred_command = command.trim().to_string(); - } - - // Prefer the namespaced name. `OPENHUMAN_SENTRY_DSN` is the legacy - // unprefixed name kept as a fallback so existing CI vars and local - // `.env` files keep working until the GH org-level variable can be - // renamed in lock-step. - let dsn_value = env - .get("OPENHUMAN_CORE_SENTRY_DSN") - .or_else(|| env.get("OPENHUMAN_SENTRY_DSN")) - .or_else(|| option_env!("OPENHUMAN_CORE_SENTRY_DSN").map(|s| s.to_string())) - .or_else(|| option_env!("OPENHUMAN_SENTRY_DSN").map(|s| s.to_string())); - if let Some(dsn) = dsn_value { - let dsn = dsn.trim(); - if !dsn.is_empty() { - self.observability.sentry_dsn = Some(dsn.to_string()); - } - } - - if let Some(flag) = env.get("OPENHUMAN_ANALYTICS_ENABLED") { - let normalized = flag.trim().to_ascii_lowercase(); - match normalized.as_str() { - "1" | "true" | "yes" | "on" => self.observability.analytics_enabled = true, - "0" | "false" | "no" | "off" => self.observability.analytics_enabled = false, - _ => {} - } - } - - // Learning subsystem overrides - if let Some(flag) = env.get("OPENHUMAN_LEARNING_ENABLED") { - let normalized = flag.trim().to_ascii_lowercase(); - match normalized.as_str() { - "1" | "true" | "yes" | "on" => self.learning.enabled = true, - "0" | "false" | "no" | "off" => self.learning.enabled = false, - _ => {} - } - } - if let Some(flag) = env.get("OPENHUMAN_LEARNING_REFLECTION_ENABLED") { - let normalized = flag.trim().to_ascii_lowercase(); - match normalized.as_str() { - "1" | "true" | "yes" | "on" => self.learning.reflection_enabled = true, - "0" | "false" | "no" | "off" => self.learning.reflection_enabled = false, - _ => {} - } - } - if let Some(flag) = env.get("OPENHUMAN_LEARNING_USER_PROFILE_ENABLED") { - let normalized = flag.trim().to_ascii_lowercase(); - match normalized.as_str() { - "1" | "true" | "yes" | "on" => self.learning.user_profile_enabled = true, - "0" | "false" | "no" | "off" => self.learning.user_profile_enabled = false, - _ => {} - } - } - if let Some(flag) = env.get("OPENHUMAN_LEARNING_TOOL_TRACKING_ENABLED") { - let normalized = flag.trim().to_ascii_lowercase(); - match normalized.as_str() { - "1" | "true" | "yes" | "on" => self.learning.tool_tracking_enabled = true, - "0" | "false" | "no" | "off" => self.learning.tool_tracking_enabled = false, - _ => {} - } - } - if let Some(flag) = env.get("OPENHUMAN_LEARNING_TOOL_MEMORY_CAPTURE_ENABLED") { - if let Some(enabled) = parse_env_bool( - "OPENHUMAN_LEARNING_TOOL_MEMORY_CAPTURE_ENABLED", - flag.as_str(), - ) { - self.learning.tool_memory_capture_enabled = enabled; - } - } - if let Some(flag) = env.get("OPENHUMAN_LEARNING_EXPLICIT_PREFERENCES_ENABLED") { - let normalized = flag.trim().to_ascii_lowercase(); - match normalized.as_str() { - "1" | "true" | "yes" | "on" => self.learning.explicit_preferences_enabled = true, - "0" | "false" | "no" | "off" => self.learning.explicit_preferences_enabled = false, - _ => {} - } - } - if let Some(source) = env.get("OPENHUMAN_LEARNING_REFLECTION_SOURCE") { - let normalized = source.trim().to_ascii_lowercase(); - match normalized.as_str() { - "local" => { - self.learning.reflection_source = - crate::openhuman::config::ReflectionSource::Local - } - "cloud" => { - self.learning.reflection_source = - crate::openhuman::config::ReflectionSource::Cloud - } - _ => { - tracing::warn!( - source = %source, - "ignoring invalid OPENHUMAN_LEARNING_REFLECTION_SOURCE (valid: local, cloud)" - ); - } - } - } - if let Some(val) = env.get("OPENHUMAN_LEARNING_MAX_REFLECTIONS_PER_SESSION") { - if let Ok(max) = val.trim().parse::() { - self.learning.max_reflections_per_session = max; - } - } - if let Some(val) = env.get("OPENHUMAN_LEARNING_MIN_TURN_COMPLEXITY") { - if let Ok(min) = val.trim().parse::() { - self.learning.min_turn_complexity = min; - } - } - if let Some(flag) = env.get("OPENHUMAN_LEARNING_EPISODIC_CAPTURE_ENABLED") { - if let Some(enabled) = - parse_env_bool("OPENHUMAN_LEARNING_EPISODIC_CAPTURE_ENABLED", flag.as_str()) - { - self.learning.episodic_capture_enabled = enabled; - } - } - if let Some(flag) = env.get("OPENHUMAN_LEARNING_STM_RECALL_ENABLED") { - if let Some(enabled) = - parse_env_bool("OPENHUMAN_LEARNING_STM_RECALL_ENABLED", flag.as_str()) - { - self.learning.stm_recall_enabled = enabled; - } - } - if let Some(flag) = env.get("OPENHUMAN_LEARNING_UNIFIED_COMPACTION_ENABLED") { - if let Some(enabled) = parse_env_bool( - "OPENHUMAN_LEARNING_UNIFIED_COMPACTION_ENABLED", - flag.as_str(), - ) { - self.learning.unified_compaction_enabled = enabled; - } - } - - // Phase 4 memory-tree embedding overrides (#710). Setting the env - // var to an empty string explicitly clears the default — useful - // for CI and other environments that want to opt into the - // InertEmbedder fallback without editing config.toml. - if let Ok(endpoint) = std::env::var("OPENHUMAN_MEMORY_EMBED_ENDPOINT") { - let trimmed = endpoint.trim(); - self.memory_tree.embedding_endpoint = if trimmed.is_empty() { - None - } else { - Some(trimmed.to_string()) - }; - } - if let Ok(model) = std::env::var("OPENHUMAN_MEMORY_EMBED_MODEL") { - let trimmed = model.trim(); - self.memory_tree.embedding_model = if trimmed.is_empty() { - None - } else { - Some(trimmed.to_string()) - }; - } - if let Ok(val) = std::env::var("OPENHUMAN_MEMORY_EMBED_TIMEOUT_MS") { - if let Ok(timeout_ms) = val.trim().parse::() { - if timeout_ms > 0 { - self.memory_tree.embedding_timeout_ms = Some(timeout_ms); - } - } - } - if let Ok(flag) = std::env::var("OPENHUMAN_MEMORY_EMBED_STRICT") { - if let Some(strict) = parse_env_bool("OPENHUMAN_MEMORY_EMBED_STRICT", &flag) { - self.memory_tree.embedding_strict = strict; - } - } - // Cloud embedding request budget (requests/min) on `memory.*`. `0` - // disables throttling. A blank or non-numeric value leaves the - // configured/default budget untouched. Committed to the process-global - // limiter in `apply_env_overrides`. - if let Some(val) = env.get("OPENHUMAN_MEMORY_EMBED_RATE_LIMIT") { - if let Ok(per_min) = val.trim().parse::() { - self.memory.embedding_rate_limit_per_min = per_min; - } - } - - // LLM entity extractor overrides — set endpoint + model to route - // ingest scoring through Ollama NER (Phase 2 follow-up). Empty - // string explicitly clears (opts out). - if let Ok(endpoint) = std::env::var("OPENHUMAN_MEMORY_EXTRACT_ENDPOINT") { - let trimmed = endpoint.trim(); - self.memory_tree.llm_extractor_endpoint = if trimmed.is_empty() { - None - } else { - Some(trimmed.to_string()) - }; - } - if let Ok(model) = std::env::var("OPENHUMAN_MEMORY_EXTRACT_MODEL") { - let trimmed = model.trim(); - self.memory_tree.llm_extractor_model = if trimmed.is_empty() { - None - } else { - Some(trimmed.to_string()) - }; - } - if let Ok(val) = std::env::var("OPENHUMAN_MEMORY_EXTRACT_TIMEOUT_MS") { - if let Ok(ms) = val.trim().parse::() { - if ms > 0 { - self.memory_tree.llm_extractor_timeout_ms = Some(ms); - } - } - } - - // LLM summariser overrides — set endpoint + model to route - // bucket-seal summaries through Ollama instead of InertSummariser - // (Phase 3a real-summariser hook). - if let Ok(endpoint) = std::env::var("OPENHUMAN_MEMORY_SUMMARISE_ENDPOINT") { - let trimmed = endpoint.trim(); - self.memory_tree.llm_summariser_endpoint = if trimmed.is_empty() { - None - } else { - Some(trimmed.to_string()) - }; - } - if let Ok(model) = std::env::var("OPENHUMAN_MEMORY_SUMMARISE_MODEL") { - let trimmed = model.trim(); - self.memory_tree.llm_summariser_model = if trimmed.is_empty() { - None - } else { - Some(trimmed.to_string()) - }; - } - if let Ok(val) = std::env::var("OPENHUMAN_MEMORY_SUMMARISE_TIMEOUT_MS") { - if let Ok(ms) = val.trim().parse::() { - if ms > 0 { - self.memory_tree.llm_summariser_timeout_ms = Some(ms); - } - } - } - - // Phase MD-content: chunk body directory override. Empty string means - // "fall back to default", consistent with other memory_tree env vars. - // Routed through `env.get` so `HashMapEnv`-style test callers see the - // override too — same seam as every other branch in this function. - if let Some(dir) = env.get("OPENHUMAN_MEMORY_TREE_CONTENT_DIR") { - let trimmed = dir.trim(); - self.memory_tree.content_dir = if trimmed.is_empty() { - None - } else { - Some(std::path::PathBuf::from(trimmed)) - }; - } - - // Memory-tree LLM backend selector: "cloud" (default) routes through - // the OpenHuman backend's summarizer model; "local" keeps the legacy - // Ollama-direct path. Empty / unset / unknown leaves the existing - // value untouched (and we warn on unknown). The embedder is unaffected. - if let Some(raw) = env.get("OPENHUMAN_MEMORY_TREE_LLM_BACKEND") { - let trimmed = raw.trim(); - if !trimmed.is_empty() { - match crate::openhuman::config::LlmBackend::parse(trimmed) { - Ok(b) => { - log::debug!( - "[memory_tree] OPENHUMAN_MEMORY_TREE_LLM_BACKEND override applied: {}", - b.as_str() - ); - self.memory_tree.llm_backend = b; - } - Err(e) => { - tracing::warn!( - value = trimmed, - error = %e, - "ignoring invalid OPENHUMAN_MEMORY_TREE_LLM_BACKEND (valid: cloud, local)" - ); - } - } - } - } - // Cloud LLM model override (only meaningful when llm_backend = cloud). - // Empty string explicitly clears the default — useful for tests that - // want to assert the absence of a configured cloud model. Non-empty - // strings are stored verbatim. - if let Some(raw) = env.get("OPENHUMAN_MEMORY_TREE_CLOUD_LLM_MODEL") { - let trimmed = raw.trim(); - self.memory_tree.cloud_llm_model = if trimmed.is_empty() { - None - } else { - Some(trimmed.to_string()) - }; - } - - if let Some(raw) = env.get("OPENHUMAN_MEMORY_TREE_SMART_WALK_MODEL") { - let trimmed = raw.trim(); - self.memory_tree.smart_walk_model = if trimmed.is_empty() { - None - } else { - Some(trimmed.to_string()) - }; - } - - if let Some(raw) = env.get("OPENHUMAN_MEMORY_TREE_CLOUD_SUMMARIZATION") { - if let Some(val) = parse_env_bool("OPENHUMAN_MEMORY_TREE_CLOUD_SUMMARIZATION", &raw) { - self.memory_tree.cloud_summarization_opt_in = val; - } - } - - // Auto-update overrides - if let Some(flag) = env.get("OPENHUMAN_AUTO_UPDATE_ENABLED") { - let normalized = flag.trim().to_ascii_lowercase(); - match normalized.as_str() { - "1" | "true" | "yes" | "on" => self.update.enabled = true, - "0" | "false" | "no" | "off" => self.update.enabled = false, - _ => {} - } - } - if let Some(val) = env.get("OPENHUMAN_AUTO_UPDATE_INTERVAL_MINUTES") { - if let Ok(minutes) = val.trim().parse::() { - self.update.interval_minutes = minutes; - } - } - if let Some(raw) = env.get("OPENHUMAN_AUTO_UPDATE_RESTART_STRATEGY") { - match raw.trim().to_ascii_lowercase().as_str() { - "self_replace" | "self-replace" | "self" => { - self.update.restart_strategy = UpdateRestartStrategy::SelfReplace; - } - "supervisor" | "stage_only" | "stage-only" => { - self.update.restart_strategy = UpdateRestartStrategy::Supervisor; - } - other => { - tracing::warn!( - value = other, - "ignoring invalid OPENHUMAN_AUTO_UPDATE_RESTART_STRATEGY \ - (valid: self_replace, supervisor)" - ); - } - } - } - if let Some(flag) = env.get("OPENHUMAN_AUTO_UPDATE_RPC_MUTATIONS_ENABLED") { - if let Some(enabled) = - parse_env_bool("OPENHUMAN_AUTO_UPDATE_RPC_MUTATIONS_ENABLED", &flag) - { - self.update.rpc_mutations_enabled = enabled; - } - } - - // Dictation overrides - if let Some(flag) = env.get("OPENHUMAN_DICTATION_ENABLED") { - let normalized = flag.trim().to_ascii_lowercase(); - match normalized.as_str() { - "1" | "true" | "yes" | "on" => self.dictation.enabled = true, - "0" | "false" | "no" | "off" => self.dictation.enabled = false, - _ => {} - } - } - if let Some(hotkey) = env.get("OPENHUMAN_DICTATION_HOTKEY") { - let hotkey = hotkey.trim(); - if !hotkey.is_empty() { - self.dictation.hotkey = hotkey.to_string(); - } - } - if let Some(mode) = env.get("OPENHUMAN_DICTATION_ACTIVATION_MODE") { - let normalized = mode.trim().to_ascii_lowercase(); - match normalized.as_str() { - "toggle" => { - self.dictation.activation_mode = - crate::openhuman::config::DictationActivationMode::Toggle - } - "push" => { - self.dictation.activation_mode = - crate::openhuman::config::DictationActivationMode::Push - } - _ => { - tracing::warn!( - mode = %mode, - "ignoring invalid OPENHUMAN_DICTATION_ACTIVATION_MODE (valid: toggle, push)" - ); - } - } - } - if let Some(flag) = env.get("OPENHUMAN_DICTATION_LLM_REFINEMENT") { - let normalized = flag.trim().to_ascii_lowercase(); - match normalized.as_str() { - "1" | "true" | "yes" | "on" => self.dictation.llm_refinement = true, - "0" | "false" | "no" | "off" => self.dictation.llm_refinement = false, - _ => {} - } - } - if let Some(flag) = env.get("OPENHUMAN_DICTATION_STREAMING") { - let normalized = flag.trim().to_ascii_lowercase(); - match normalized.as_str() { - "1" | "true" | "yes" | "on" => self.dictation.streaming = true, - "0" | "false" | "no" | "off" => self.dictation.streaming = false, - _ => {} - } - } - if let Some(val) = env.get("OPENHUMAN_DICTATION_STREAMING_INTERVAL_MS") { - if let Ok(ms) = val.trim().parse::() { - self.dictation.streaming_interval_ms = ms; - } - } - - // ── Context management overrides ─────────────────────────────── - if let Some(flag) = env.get("OPENHUMAN_CONTEXT_ENABLED") { - let normalized = flag.trim().to_ascii_lowercase(); - match normalized.as_str() { - "1" | "true" | "yes" | "on" => self.context.enabled = true, - "0" | "false" | "no" | "off" => self.context.enabled = false, - _ => {} - } - } - if let Some(flag) = env.get("OPENHUMAN_CONTEXT_MICROCOMPACT_ENABLED") { - let normalized = flag.trim().to_ascii_lowercase(); - match normalized.as_str() { - "1" | "true" | "yes" | "on" => self.context.microcompact_enabled = true, - "0" | "false" | "no" | "off" => self.context.microcompact_enabled = false, - _ => {} - } - } - if let Some(flag) = env.get("OPENHUMAN_CONTEXT_AUTOCOMPACT_ENABLED") { - let normalized = flag.trim().to_ascii_lowercase(); - match normalized.as_str() { - "1" | "true" | "yes" | "on" => self.context.autocompact_enabled = true, - "0" | "false" | "no" | "off" => self.context.autocompact_enabled = false, - _ => {} - } - } - if let Some(val) = env.get("OPENHUMAN_CONTEXT_TOOL_RESULT_BUDGET_BYTES") { - if let Ok(n) = val.trim().parse::() { - self.context.tool_result_budget_bytes = n; - } - } - if let Some(model) = env.get("OPENHUMAN_CONTEXT_SUMMARIZER_MODEL") { - let model = model.trim(); - if !model.is_empty() { - self.context.summarizer_model = Some(model.to_string()); - } - } - - // Migration: `agent.tool_result_budget_bytes` used to own this - // knob before it moved to `context.tool_result_budget_bytes`. If - // an existing config.toml sets the old field to a non-default - // value and the new field is still at its default AND the env - // var is not present, copy the old value forward and emit a - // deprecation warning so the user knows to move it. The env var - // check is important: without it a user who explicitly sets - // `OPENHUMAN_CONTEXT_TOOL_RESULT_BUDGET_BYTES` to the default - // value would have their env override silently clobbered by the - // agent-field migration. - let context_default = crate::openhuman::context::DEFAULT_TOOL_RESULT_BUDGET_BYTES; - let context_env_set = env.contains("OPENHUMAN_CONTEXT_TOOL_RESULT_BUDGET_BYTES"); - if !context_env_set - && self.context.tool_result_budget_bytes == context_default - && self.agent.tool_result_budget_bytes != context_default - { - tracing::warn!( - old = self.agent.tool_result_budget_bytes, - "[context:config] `agent.tool_result_budget_bytes` is \ - deprecated — please move it to \ - `context.tool_result_budget_bytes` in your config.toml" - ); - self.context.tool_result_budget_bytes = self.agent.tool_result_budget_bytes; - } - } - - pub async fn save(&self) -> Result<()> { - let mut config_to_save = self.clone(); - encrypt_config_secrets(&mut config_to_save)?; - - let toml_str = - toml::to_string_pretty(&config_to_save).context("Failed to serialize config")?; - - let parent_dir = self - .config_path - .parent() - .context("Config path must have a parent directory")?; - - fs::create_dir_all(parent_dir).await.with_context(|| { - format!( - "Failed to create config directory: {}", - parent_dir.display() - ) - })?; - - let file_name = self - .config_path - .file_name() - .and_then(|v| v.to_str()) - .unwrap_or("config.toml"); - let temp_path = parent_dir.join(format!(".{file_name}.tmp-{}", uuid::Uuid::new_v4())); - let backup_path = parent_dir.join(format!("{file_name}.bak")); - - let mut temp_file = OpenOptions::new() - .create_new(true) - .write(true) - .open(&temp_path) - .await - .with_context(|| { - format!( - "Failed to create temporary config file: {}", - temp_path.display() - ) - })?; - temp_file - .write_all(toml_str.as_bytes()) - .await - .context("Failed to write temporary config contents")?; - temp_file - .sync_all() - .await - .context("Failed to fsync temporary config file")?; - drop(temp_file); - - let had_existing_config = tokio::fs::try_exists(&self.config_path) - .await - .unwrap_or(false); - if had_existing_config { - // Copy the encrypted temp file as the backup, NOT the old on-disk - // config. The old config may still contain plaintext secrets from - // before encryption was wired in (#1900). Using the encrypted - // bytes ensures the .bak never leaks plaintext credentials. - fs::copy(&temp_path, &backup_path).await.with_context(|| { - format!( - "Failed to create config backup before atomic replace: {}", - backup_path.display() - ) - })?; - } - - if let Err(e) = fs::rename(&temp_path, &self.config_path).await { - let _ = fs::remove_file(&temp_path).await; - if had_existing_config && backup_path.exists() { - fs::copy(&backup_path, &self.config_path) - .await - .context("Failed to restore config backup")?; - } - anyhow::bail!("Failed to atomically replace config file: {e}"); - } - - sync_directory(parent_dir).await?; - - // Note: we intentionally keep the .bak file after a successful save so - // that `parse_config_with_recovery` can use it if the primary is later - // corrupted. The .bak is updated on every successful save, so it always - // holds the last-known-good config. - - Ok(()) - } -} - -#[cfg(test)] -#[path = "load_tests.rs"] -mod tests; diff --git a/src/openhuman/config/schema/load/dirs.rs b/src/openhuman/config/schema/load/dirs.rs new file mode 100644 index 0000000000..d15f2fd9d4 --- /dev/null +++ b/src/openhuman/config/schema/load/dirs.rs @@ -0,0 +1,361 @@ +use super::env::{EnvLookup, ProcessEnv}; +use anyhow::{Context, Result}; +use directories::UserDirs; +use serde::{Deserialize, Serialize}; +use std::path::{Path, PathBuf}; +use tokio::fs; + +pub use load_user_state::{ + clear_active_user, pre_login_user_dir, read_active_user_id, user_openhuman_dir, + write_active_user_id, PRE_LOGIN_USER_ID, +}; + +#[path = "../load_user_state.rs"] +mod load_user_state; +#[cfg(test)] +pub(crate) use load_user_state::ACTIVE_USER_STATE_FILE; + +const ACTIVE_WORKSPACE_STATE_FILE: &str = "active_workspace.toml"; + +#[derive(Debug, Serialize, Deserialize)] +struct ActiveWorkspaceState { + config_dir: String, +} + +/// Environment override for the agent's default projects directory. +pub const PROJECTS_DIR_ENV_VAR: &str = "OPENHUMAN_PROJECTS_DIR"; + +/// Environment override for the agent action sandbox directory. +pub const ACTION_DIR_ENV_VAR: &str = "OPENHUMAN_ACTION_DIR"; + +/// Environment override for the global memory-sync cadence (seconds). +/// `0` means "Manual only". See issue #3302 and +/// [`Config::memory_sync_interval_secs`]. +pub const MEMORY_SYNC_INTERVAL_SECS_ENV_VAR: &str = "OPENHUMAN_MEMORY_SYNC_INTERVAL_SECS"; + +fn default_root_dir_name() -> &'static str { + if crate::api::config::is_staging_app_env(crate::api::config::app_env_from_env().as_deref()) { + ".openhuman-staging" + } else { + ".openhuman" + } +} + +#[cfg(test)] +pub(crate) fn default_root_dir_name_pub() -> &'static str { + default_root_dir_name() +} + +/// Returns the root openhuman directory (`~/.openhuman`), independent of any +/// per-user scoping. Used to locate `active_user.toml` and the shared +/// `users/` tree. +pub fn default_root_openhuman_dir() -> Result { + let home = UserDirs::new() + .map(|u| u.home_dir().to_path_buf()) + .context("Could not find home directory")?; + Ok(home.join(default_root_dir_name())) +} + +pub(super) fn default_config_dir() -> Result { + default_root_openhuman_dir() +} + +pub(super) fn default_config_and_workspace_dirs() -> Result<(PathBuf, PathBuf)> { + let config_dir = default_config_dir()?; + Ok((config_dir.clone(), config_dir.join("workspace"))) +} + +/// The agent's default **projects home** — a visible, read-write directory +/// (`~/OpenHuman/projects`) where the coding agent creates and saves projects, +/// kept distinct from the hidden internal state dir (`~/.openhuman/workspace`, +/// which also holds `memory_tree` etc.). Overridable via `OPENHUMAN_PROJECTS_DIR`; +/// falls back to `./OpenHuman/projects` only when the home dir can't be resolved. +pub fn default_projects_dir() -> PathBuf { + if let Ok(p) = std::env::var(PROJECTS_DIR_ENV_VAR) { + let trimmed = p.trim(); + if !trimmed.is_empty() { + return PathBuf::from(trimmed); + } + } + UserDirs::new() + .map(|u| u.home_dir().to_path_buf()) + .unwrap_or_else(|| PathBuf::from(".")) + .join("OpenHuman") + .join("projects") +} + +/// The `OPENHUMAN_ACTION_DIR` env override, when set to a non-empty value. +/// +/// Returns `None` when the variable is unset or blank (a common shape from +/// shells that pass through a declared-but-unset variable). The trim mirrors +/// [`default_action_dir`] so an empty env var never pins `action_dir`. +pub fn action_dir_env_override() -> Option { + let raw = std::env::var(ACTION_DIR_ENV_VAR).ok()?; + let trimmed = raw.trim(); + if trimmed.is_empty() { + None + } else { + Some(PathBuf::from(trimmed)) + } +} + +/// Resolve the effective `action_dir` from the precedence chain: +/// env `OPENHUMAN_ACTION_DIR` > persisted `action_dir_override` > default +/// projects dir. Keeping the env var first means existing env-driven +/// deployments are unaffected by a UI-set override. +pub fn resolve_action_dir(action_dir_override: &Option) -> PathBuf { + if let Some(env_dir) = action_dir_env_override() { + return env_dir; + } + if let Some(over) = action_dir_override { + if !over.as_os_str().is_empty() && over.is_absolute() { + return over.clone(); + } + tracing::warn!( + value = %over.display(), + "[config] ignoring invalid action_dir_override; expected non-empty absolute path" + ); + } + default_projects_dir() +} + +pub fn default_action_dir() -> PathBuf { + if let Ok(p) = std::env::var(ACTION_DIR_ENV_VAR) { + let trimmed = p.trim(); + if !trimmed.is_empty() { + return PathBuf::from(trimmed); + } + } + default_projects_dir() +} + +fn active_workspace_state_path(default_dir: &Path) -> PathBuf { + default_dir.join(ACTIVE_WORKSPACE_STATE_FILE) +} + +async fn load_persisted_workspace_dirs( + default_config_dir: &Path, +) -> Result> { + let state_path = active_workspace_state_path(default_config_dir); + if !state_path.exists() { + return Ok(None); + } + + let contents = match fs::read_to_string(&state_path).await { + Ok(contents) => contents, + Err(error) => { + tracing::warn!( + "Failed to read active workspace marker {}: {error}", + state_path.display() + ); + return Ok(None); + } + }; + + let state: ActiveWorkspaceState = match toml::from_str(&contents) { + Ok(state) => state, + Err(error) => { + tracing::warn!( + "Failed to parse active workspace marker {}: {error}", + state_path.display() + ); + return Ok(None); + } + }; + + let raw_config_dir = state.config_dir.trim(); + if raw_config_dir.is_empty() { + tracing::warn!( + "Ignoring active workspace marker {} because config_dir is empty", + state_path.display() + ); + return Ok(None); + } + + let parsed_dir = PathBuf::from(raw_config_dir); + let config_dir = if parsed_dir.is_absolute() { + parsed_dir + } else { + default_config_dir.join(parsed_dir) + }; + Ok(Some((config_dir.clone(), config_dir.join("workspace")))) +} + +pub(crate) async fn persist_active_workspace_config_dir(config_dir: &Path) -> Result<()> { + let default_config_dir = default_config_dir()?; + let state_path = active_workspace_state_path(&default_config_dir); + + if config_dir == default_config_dir { + if state_path.exists() { + fs::remove_file(&state_path).await.with_context(|| { + format!( + "Failed to clear active workspace marker: {}", + state_path.display() + ) + })?; + } + return Ok(()); + } + + fs::create_dir_all(&default_config_dir) + .await + .with_context(|| { + format!( + "Failed to create default config directory: {}", + default_config_dir.display() + ) + })?; + + let state = ActiveWorkspaceState { + config_dir: config_dir.to_string_lossy().into_owned(), + }; + let serialized = + toml::to_string_pretty(&state).context("Failed to serialize active workspace marker")?; + + let temp_path = default_config_dir.join(format!( + ".{ACTIVE_WORKSPACE_STATE_FILE}.tmp-{}", + uuid::Uuid::new_v4() + )); + fs::write(&temp_path, serialized).await.with_context(|| { + format!( + "Failed to write temporary active workspace marker: {}", + temp_path.display() + ) + })?; + + if let Err(error) = fs::rename(&temp_path, &state_path).await { + let _ = fs::remove_file(&temp_path).await; + anyhow::bail!( + "Failed to atomically persist active workspace marker {}: {error}", + state_path.display() + ); + } + + super::sync_directory(&default_config_dir).await?; + Ok(()) +} + +pub(crate) fn resolve_config_dir_for_workspace(workspace_dir: &Path) -> (PathBuf, PathBuf) { + let workspace_config_dir = workspace_dir.to_path_buf(); + if workspace_config_dir.join("config.toml").exists() { + return ( + workspace_config_dir.clone(), + workspace_config_dir.join("workspace"), + ); + } + + let legacy_config_dir = workspace_dir + .parent() + .map(|parent| parent.join(".openhuman")); + if let Some(legacy_dir) = legacy_config_dir { + if legacy_dir.join("config.toml").exists() { + return (legacy_dir, workspace_config_dir); + } + + if workspace_dir + .file_name() + .is_some_and(|name| name == std::ffi::OsStr::new("workspace")) + { + return (legacy_dir, workspace_config_dir); + } + } + + ( + workspace_config_dir.clone(), + workspace_config_dir.join("workspace"), + ) +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub(crate) enum ConfigResolutionSource { + EnvWorkspace, + ActiveWorkspaceMarker, + ActiveUser, + DefaultConfigDir, +} + +impl ConfigResolutionSource { + pub(crate) const fn as_str(self) -> &'static str { + match self { + Self::EnvWorkspace => "OPENHUMAN_WORKSPACE", + Self::ActiveWorkspaceMarker => "active_workspace.toml", + Self::ActiveUser => "active_user.toml", + Self::DefaultConfigDir => "default", + } + } +} + +pub(crate) async fn resolve_runtime_config_dirs( + default_openhuman_dir: &Path, + default_workspace_dir: &Path, +) -> Result<(PathBuf, PathBuf, ConfigResolutionSource)> { + resolve_runtime_config_dirs_with(default_openhuman_dir, default_workspace_dir, &ProcessEnv) + .await +} + +/// Env-injectable variant of [`resolve_runtime_config_dirs`]. Accepts any +/// [`EnvLookup`] so unit tests can exercise the `OPENHUMAN_WORKSPACE` +/// override path without mutating the process environment. +pub(crate) async fn resolve_runtime_config_dirs_with( + default_openhuman_dir: &Path, + default_workspace_dir: &Path, + env: &(dyn EnvLookup + Send + Sync), +) -> Result<(PathBuf, PathBuf, ConfigResolutionSource)> { + if let Some(custom_workspace) = env.get("OPENHUMAN_WORKSPACE") { + if !custom_workspace.is_empty() { + let (openhuman_dir, workspace_dir) = + resolve_config_dir_for_workspace(&PathBuf::from(custom_workspace)); + return Ok(( + openhuman_dir, + workspace_dir, + ConfigResolutionSource::EnvWorkspace, + )); + } + } + + resolve_config_dirs_ignoring_env(default_openhuman_dir, default_workspace_dir).await +} + +/// Same as [`resolve_runtime_config_dirs`] but skips the +/// `OPENHUMAN_WORKSPACE` env var override. Used by +/// [`Config::load_from_default_paths`] so callers can reliably load +/// the real user config without mutating the process environment. +pub(super) async fn resolve_config_dirs_ignoring_env( + default_openhuman_dir: &Path, + default_workspace_dir: &Path, +) -> Result<(PathBuf, PathBuf, ConfigResolutionSource)> { + if let Some(user_id) = read_active_user_id(default_openhuman_dir) { + let user_dir = user_openhuman_dir(default_openhuman_dir, &user_id); + let user_workspace = user_dir.join("workspace"); + tracing::debug!( + user_id = %user_id, + user_dir = %user_dir.display(), + "Config dirs resolved via active_user.toml" + ); + return Ok((user_dir, user_workspace, ConfigResolutionSource::ActiveUser)); + } + + if let Some((openhuman_dir, workspace_dir)) = + load_persisted_workspace_dirs(default_openhuman_dir).await? + { + return Ok(( + openhuman_dir, + workspace_dir, + ConfigResolutionSource::ActiveWorkspaceMarker, + )); + } + + let user_dir = pre_login_user_dir(default_openhuman_dir); + let user_workspace = user_dir.join("workspace"); + tracing::debug!( + user_id = %PRE_LOGIN_USER_ID, + user_dir = %user_dir.display(), + default_workspace_dir = %default_workspace_dir.display(), + "Config dirs resolved to pre-login user directory (no active user, no workspace marker)" + ); + Ok(( + user_dir, + user_workspace, + ConfigResolutionSource::DefaultConfigDir, + )) +} diff --git a/src/openhuman/config/schema/load/env.rs b/src/openhuman/config/schema/load/env.rs new file mode 100644 index 0000000000..8aa0411382 --- /dev/null +++ b/src/openhuman/config/schema/load/env.rs @@ -0,0 +1,78 @@ +/// Read-only environment lookup used by [`crate::openhuman::config::schema::Config::apply_env_overrides`]. +/// The seam lets unit tests exercise the overlay without mutating the process +/// environment (which is racy under parallel tests and requires a shared +/// `TEST_ENV_LOCK`). +/// +/// Production code uses [`ProcessEnv`], which delegates to `std::env`. +pub(crate) trait EnvLookup { + /// Equivalent to `std::env::var(key).ok()`. + fn get(&self, key: &str) -> Option; + + /// Equivalent to `std::env::var_os(key).is_some()`. Used to distinguish + /// "variable not present" from "variable set to empty" where it matters + /// (see `OPENHUMAN_CONTEXT_TOOL_RESULT_BUDGET_BYTES` below). + fn contains(&self, key: &str) -> bool { + self.get(key).is_some() + } + + /// Looks up the first non-`None` value across `keys`, preserving the + /// precedence used by the manual `or_else` chains throughout this + /// module (e.g. `OPENHUMAN_FOO` wins over the bare `FOO` alias). + fn get_any(&self, keys: &[&str]) -> Option { + keys.iter().find_map(|k| self.get(k)) + } +} + +/// Default [`EnvLookup`] implementation backed by `std::env`. +pub(crate) struct ProcessEnv; + +impl EnvLookup for ProcessEnv { + fn get(&self, key: &str) -> Option { + std::env::var(key).ok() + } + + fn contains(&self, key: &str) -> bool { + std::env::var_os(key).is_some() + } +} + +/// Process env lookup that preserves every override except +/// `OPENHUMAN_WORKSPACE`. +pub(crate) struct ProcessEnvWithoutWorkspace; + +impl EnvLookup for ProcessEnvWithoutWorkspace { + fn get(&self, key: &str) -> Option { + if key == "OPENHUMAN_WORKSPACE" { + None + } else { + ProcessEnv.get(key) + } + } + + fn contains(&self, key: &str) -> bool { + if key == "OPENHUMAN_WORKSPACE" { + false + } else { + ProcessEnv.contains(key) + } + } +} + +/// Parse a boolean env-var value. Accepts the usual truthy/falsy tokens +/// (`1/true/yes/on` and `0/false/no/off`, case-insensitive). Returns `None` +/// on unrecognised values and logs a warning so silent mis-spellings don't +/// invisibly leave the config unchanged. +pub(super) fn parse_env_bool(name: &str, raw: &str) -> Option { + match raw.trim().to_ascii_lowercase().as_str() { + "1" | "true" | "yes" | "on" => Some(true), + "0" | "false" | "no" | "off" => Some(false), + _ => { + tracing::warn!( + env = %name, + value = %raw, + "invalid boolean env override ignored; expected 1/true/yes/on or 0/false/no/off" + ); + None + } + } +} diff --git a/src/openhuman/config/schema/load/env_overlay.rs b/src/openhuman/config/schema/load/env_overlay.rs new file mode 100644 index 0000000000..cc2a6ee4d6 --- /dev/null +++ b/src/openhuman/config/schema/load/env_overlay.rs @@ -0,0 +1,812 @@ +use super::super::proxy::{ + normalize_no_proxy_list, normalize_proxy_url_option, normalize_service_list, + parse_proxy_enabled, parse_proxy_scope, set_runtime_proxy_config, ProxyScope, +}; +use super::super::{Config, UpdateRestartStrategy}; +use super::dirs::MEMORY_SYNC_INTERVAL_SECS_ENV_VAR; +use super::env::parse_env_bool; +use std::path::PathBuf; + +impl Config { + pub fn apply_env_overrides(&mut self) { + use super::env::ProcessEnv; + self.apply_env_overrides_from(&ProcessEnv); + } + + pub(super) fn apply_env_overrides_from( + &mut self, + env: &(dyn super::env::EnvLookup + Send + Sync), + ) { + self.apply_env_overlay_with(env); + + if self.proxy.enabled && self.proxy.scope == ProxyScope::Environment { + self.proxy.apply_to_process_env(); + } + + set_runtime_proxy_config(self.proxy.clone()); + + crate::openhuman::embeddings::rate_limit::set_embedding_rate_limit( + self.memory.embedding_rate_limit_per_min, + ); + } + + /// Pure-ish env overlay: applies overrides read from `env` to `self`. + /// + /// "Pure-ish" because it still emits `tracing` logs and calls + /// `self.proxy.validate()` (which only reads). Crucially, it does + /// **not** write to the process environment nor the + /// `set_runtime_proxy_config` global — those stay in the public + /// [`Self::apply_env_overrides`] wrapper so unit tests can call this + /// with a [`HashMapEnv`] (see tests) without requiring the + /// `TEST_ENV_LOCK` or tainting sibling tests. + pub(crate) fn apply_env_overlay_with(&mut self, env: &E) { + // Only the namespaced `OPENHUMAN_MODEL` is honoured. The bare `MODEL` + // env var used to be accepted as an alias but collides with vendor + // asset-tag env vars (e.g. Dell OptiPlex sets `MODEL=7080`), which + // silently clobbered the LLM model and 400'd every backend call + // (Sentry OPENHUMAN-TAURI-J8). + if let Some(model) = env.get("OPENHUMAN_MODEL") { + let trimmed = model.trim(); + if !trimmed.is_empty() { + self.default_model = Some(trimmed.to_string()); + } + } + + if let Some(workspace) = env.get("OPENHUMAN_WORKSPACE") { + if !workspace.is_empty() { + let (_, workspace_dir) = + super::dirs::resolve_config_dir_for_workspace(&PathBuf::from(workspace)); + self.workspace_dir = workspace_dir; + } + } + + if let Some(v) = env.get("OPENHUMAN_ACTION_DIR") { + let trimmed = v.trim(); + if !trimmed.is_empty() { + self.action_dir = PathBuf::from(trimmed); + } + } + + if let Some(temp_str) = env.get("OPENHUMAN_TEMPERATURE") { + if let Ok(temp) = temp_str.parse::() { + if (0.0..=2.0).contains(&temp) { + self.default_temperature = temp; + } + } + } + + if let Some(raw) = env.get("OPENHUMAN_MAX_ACTIONS_PER_HOUR") { + let trimmed = raw.trim(); + if !trimmed.is_empty() { + match trimmed.parse::() { + Ok(limit) => self.autonomy.max_actions_per_hour = limit, + Err(_) => tracing::warn!( + value = %raw, + "invalid OPENHUMAN_MAX_ACTIONS_PER_HOUR ignored; expected an unsigned integer" + ), + } + } + } + + if let Some(raw) = env.get(MEMORY_SYNC_INTERVAL_SECS_ENV_VAR) { + let trimmed = raw.trim(); + if !trimmed.is_empty() { + match trimmed.parse::() { + Ok(secs) => self.memory_sync_interval_secs = Some(secs), + Err(_) => tracing::warn!( + env = %MEMORY_SYNC_INTERVAL_SECS_ENV_VAR, + value = %raw, + "invalid memory-sync interval ignored; expected an unsigned integer (0 = manual)" + ), + } + } + } + + if let Some(language) = env.get("OPENHUMAN_OUTPUT_LANGUAGE") { + let language = language.trim(); + if !language.is_empty() { + self.output_language = Some(language.to_string()); + } + } + + if let Some(flag) = env.get_any(&["OPENHUMAN_REASONING_ENABLED", "REASONING_ENABLED"]) { + let normalized = flag.trim().to_ascii_lowercase(); + match normalized.as_str() { + "1" | "true" | "yes" | "on" => self.runtime.reasoning_enabled = Some(true), + "0" | "false" | "no" | "off" => self.runtime.reasoning_enabled = Some(false), + _ => {} + } + } + + self.apply_search_env(env); + self.apply_proxy_env(env); + self.apply_runtime_env(env); + self.apply_observability_env(env); + self.apply_learning_env(env); + self.apply_memory_tree_env(env); + self.apply_update_env(env); + self.apply_dictation_env(env); + self.apply_context_env(env); + } + + fn apply_search_env(&mut self, env: &E) { + if let Some(key) = env.get_any(&["OPENHUMAN_SELTZ_API_KEY", "SELTZ_API_KEY"]) { + if !key.is_empty() { + self.seltz.api_key = Some(key); + self.seltz.enabled = true; + } + } + if let Some(url) = env.get_any(&["OPENHUMAN_SELTZ_API_URL", "SELTZ_API_URL"]) { + if !url.is_empty() { + self.seltz.api_url = Some(url); + } + } + if let Some(max) = env.get_any(&["OPENHUMAN_SELTZ_MAX_RESULTS", "SELTZ_MAX_RESULTS"]) { + if let Ok(n) = max.parse::() { + if (1..=20).contains(&n) { + self.seltz.max_results = n; + } + } + } + + if let Some(flag) = env.get_any(&["OPENHUMAN_SEARXNG_ENABLED", "SEARXNG_ENABLED"]) { + if let Some(enabled) = parse_env_bool("OPENHUMAN_SEARXNG_ENABLED", &flag) { + self.searxng.enabled = enabled; + } + } + if let Some(url) = env.get_any(&["OPENHUMAN_SEARXNG_BASE_URL", "SEARXNG_BASE_URL"]) { + let url = url.trim(); + if !url.is_empty() { + self.searxng.base_url = url.to_string(); + } + } + if let Some(max) = env.get_any(&["OPENHUMAN_SEARXNG_MAX_RESULTS", "SEARXNG_MAX_RESULTS"]) { + if let Ok(n) = max.parse::() { + if (1..=50).contains(&n) { + self.searxng.max_results = n; + } + } + } + if let Some(language) = env.get_any(&[ + "OPENHUMAN_SEARXNG_DEFAULT_LANGUAGE", + "SEARXNG_DEFAULT_LANGUAGE", + ]) { + let language = language.trim(); + if !language.is_empty() { + self.searxng.default_language = language.to_string(); + } + } + if let Some(timeout_secs) = env.get_any(&[ + "OPENHUMAN_SEARXNG_TIMEOUT_SECS", + "OPENHUMAN_SEARXNG_TIMEOUT_SECONDS", + "SEARXNG_TIMEOUT_SECS", + "SEARXNG_TIMEOUT_SECONDS", + ]) { + if let Ok(timeout_secs) = timeout_secs.parse::() { + if timeout_secs > 0 { + self.searxng.timeout_secs = timeout_secs; + } + } + } + + if let Some(engine) = env.get_any(&["OPENHUMAN_SEARCH_ENGINE", "SEARCH_ENGINE"]) { + let engine = engine.trim().to_ascii_lowercase(); + if !engine.is_empty() { + self.search.engine = engine; + } + } + if let Some(key) = env.get_any(&["OPENHUMAN_PARALLEL_API_KEY", "PARALLEL_API_KEY"]) { + if !key.trim().is_empty() { + self.search.parallel.api_key = Some(key); + } + } + if let Some(key) = env.get_any(&["OPENHUMAN_BRAVE_API_KEY", "BRAVE_API_KEY"]) { + if !key.trim().is_empty() { + self.search.brave.api_key = Some(key); + } + } + if let Some(key) = env.get_any(&["OPENHUMAN_QUERIT_API_KEY", "QUERIT_API_KEY"]) { + if !key.trim().is_empty() { + self.search.querit.api_key = Some(key); + } + } + if let Some(max) = env.get_any(&["OPENHUMAN_SEARCH_MAX_RESULTS", "SEARCH_MAX_RESULTS"]) { + if let Ok(n) = max.parse::() { + if (1..=20).contains(&n) { + self.search.max_results = n; + } + } + } + if let Some(t) = env.get_any(&["OPENHUMAN_SEARCH_TIMEOUT_SECS", "SEARCH_TIMEOUT_SECS"]) { + if let Ok(n) = t.parse::() { + if n > 0 { + self.search.timeout_secs = n; + } + } + } + + if env.contains("OPENHUMAN_WEB_SEARCH_ENABLED") { + log::warn!( + "[config] OPENHUMAN_WEB_SEARCH_ENABLED is deprecated and ignored — \ + web search is always registered; provider/API-key overrides were removed." + ); + } + + if let Some(max_results) = + env.get_any(&["OPENHUMAN_WEB_SEARCH_MAX_RESULTS", "WEB_SEARCH_MAX_RESULTS"]) + { + if let Ok(max_results) = max_results.parse::() { + if (1..=10).contains(&max_results) { + self.web_search.max_results = max_results; + } + } + } + + if let Some(timeout_secs) = env.get_any(&[ + "OPENHUMAN_WEB_SEARCH_TIMEOUT_SECS", + "WEB_SEARCH_TIMEOUT_SECS", + ]) { + if let Ok(timeout_secs) = timeout_secs.parse::() { + if timeout_secs > 0 { + self.web_search.timeout_secs = timeout_secs; + } + } + } + } + + fn apply_proxy_env(&mut self, env: &E) { + let explicit_proxy_enabled = env + .get("OPENHUMAN_PROXY_ENABLED") + .as_deref() + .and_then(parse_proxy_enabled); + if let Some(enabled) = explicit_proxy_enabled { + self.proxy.enabled = enabled; + } + + let mut proxy_url_overridden = false; + if let Some(proxy_url) = env.get_any(&["OPENHUMAN_HTTP_PROXY", "HTTP_PROXY"]) { + self.proxy.http_proxy = normalize_proxy_url_option(Some(&proxy_url)); + proxy_url_overridden = true; + } + if let Some(proxy_url) = env.get_any(&["OPENHUMAN_HTTPS_PROXY", "HTTPS_PROXY"]) { + self.proxy.https_proxy = normalize_proxy_url_option(Some(&proxy_url)); + proxy_url_overridden = true; + } + if let Some(proxy_url) = env.get_any(&["OPENHUMAN_ALL_PROXY", "ALL_PROXY"]) { + self.proxy.all_proxy = normalize_proxy_url_option(Some(&proxy_url)); + proxy_url_overridden = true; + } + if let Some(no_proxy) = env.get_any(&["OPENHUMAN_NO_PROXY", "NO_PROXY"]) { + self.proxy.no_proxy = normalize_no_proxy_list(vec![no_proxy]); + } + + if explicit_proxy_enabled.is_none() + && proxy_url_overridden + && self.proxy.has_any_proxy_url() + { + self.proxy.enabled = true; + } + + if let Some(scope_raw) = env.get("OPENHUMAN_PROXY_SCOPE") { + let trimmed = scope_raw.trim(); + if !trimmed.is_empty() { + match parse_proxy_scope(trimmed) { + Some(scope) => self.proxy.scope = scope, + None => { + tracing::warn!("Invalid OPENHUMAN_PROXY_SCOPE value {:?} ignored", trimmed); + } + } + } + } + + if let Some(services_raw) = env.get("OPENHUMAN_PROXY_SERVICES") { + self.proxy.services = normalize_service_list(vec![services_raw]); + } + + if let Err(error) = self.proxy.validate() { + tracing::warn!("Invalid proxy configuration ignored: {error}"); + self.proxy.enabled = false; + } + } + + fn apply_runtime_env(&mut self, env: &E) { + if let Some(tier_str) = env.get("OPENHUMAN_LOCAL_AI_TIER") { + let tier_str = tier_str.trim().to_ascii_lowercase(); + if !tier_str.is_empty() { + if let Some(tier) = + crate::openhuman::inference::presets::ModelTier::from_str_opt(&tier_str) + { + if tier == crate::openhuman::inference::presets::ModelTier::Custom { + tracing::warn!( + tier = %tier_str, + "ignoring custom OPENHUMAN_LOCAL_AI_TIER; only built-in presets are supported" + ); + } else if !tier.is_mvp_allowed() { + tracing::warn!( + tier = %tier_str, + "ignoring OPENHUMAN_LOCAL_AI_TIER outside the 1B local-model allowlist" + ); + } else { + crate::openhuman::inference::presets::apply_preset_to_config( + &mut self.local_ai, + tier, + ); + tracing::debug!( + tier = %tier_str, + "applied local AI tier from OPENHUMAN_LOCAL_AI_TIER" + ); + } + } else { + tracing::warn!( + tier = %tier_str, + "ignoring invalid OPENHUMAN_LOCAL_AI_TIER (valid: ram_2_4gb)" + ); + } + } + } + + if let Some(flag) = env.get("OPENHUMAN_NODE_ENABLED") { + if let Some(enabled) = parse_env_bool("OPENHUMAN_NODE_ENABLED", &flag) { + self.node.enabled = enabled; + } + } + if let Some(version) = env.get("OPENHUMAN_NODE_VERSION") { + let trimmed = version.trim(); + if !trimmed.is_empty() { + self.node.version = trimmed.to_string(); + } + } + if let Some(dir) = env.get("OPENHUMAN_NODE_CACHE_DIR") { + let trimmed = dir.trim(); + if !trimmed.is_empty() { + self.node.cache_dir = trimmed.to_string(); + } + } + if let Some(flag) = env.get("OPENHUMAN_NODE_PREFER_SYSTEM") { + if let Some(prefer_system) = parse_env_bool("OPENHUMAN_NODE_PREFER_SYSTEM", &flag) { + self.node.prefer_system = prefer_system; + } + } + + if let Some(flag) = env.get("OPENHUMAN_RUNTIME_PYTHON_ENABLED") { + if let Some(enabled) = parse_env_bool("OPENHUMAN_RUNTIME_PYTHON_ENABLED", &flag) { + self.runtime_python.enabled = enabled; + } + } + if let Some(version) = env.get("OPENHUMAN_RUNTIME_PYTHON_MINIMUM_VERSION") { + let trimmed = version.trim(); + if !trimmed.is_empty() { + self.runtime_python.minimum_version = trimmed.to_string(); + } + } + if let Some(dir) = env.get("OPENHUMAN_RUNTIME_PYTHON_CACHE_DIR") { + self.runtime_python.cache_dir = dir.trim().to_string(); + } + if let Some(tag) = env.get("OPENHUMAN_RUNTIME_PYTHON_MANAGED_RELEASE_TAG") { + self.runtime_python.managed_release_tag = tag.trim().to_string(); + } + if let Some(flag) = env.get("OPENHUMAN_RUNTIME_PYTHON_PREFER_SYSTEM") { + if let Some(prefer_system) = + parse_env_bool("OPENHUMAN_RUNTIME_PYTHON_PREFER_SYSTEM", &flag) + { + self.runtime_python.prefer_system = prefer_system; + } + } + if let Some(command) = env.get("OPENHUMAN_RUNTIME_PYTHON_PREFERRED_COMMAND") { + self.runtime_python.preferred_command = command.trim().to_string(); + } + } + + fn apply_observability_env(&mut self, env: &E) { + let dsn_value = env + .get("OPENHUMAN_CORE_SENTRY_DSN") + .or_else(|| env.get("OPENHUMAN_SENTRY_DSN")) + .or_else(|| option_env!("OPENHUMAN_CORE_SENTRY_DSN").map(|s| s.to_string())) + .or_else(|| option_env!("OPENHUMAN_SENTRY_DSN").map(|s| s.to_string())); + if let Some(dsn) = dsn_value { + let dsn = dsn.trim(); + if !dsn.is_empty() { + self.observability.sentry_dsn = Some(dsn.to_string()); + } + } + + if let Some(flag) = env.get("OPENHUMAN_ANALYTICS_ENABLED") { + let normalized = flag.trim().to_ascii_lowercase(); + match normalized.as_str() { + "1" | "true" | "yes" | "on" => self.observability.analytics_enabled = true, + "0" | "false" | "no" | "off" => self.observability.analytics_enabled = false, + _ => {} + } + } + } + + fn apply_learning_env(&mut self, env: &E) { + if let Some(flag) = env.get("OPENHUMAN_LEARNING_ENABLED") { + let normalized = flag.trim().to_ascii_lowercase(); + match normalized.as_str() { + "1" | "true" | "yes" | "on" => self.learning.enabled = true, + "0" | "false" | "no" | "off" => self.learning.enabled = false, + _ => {} + } + } + if let Some(flag) = env.get("OPENHUMAN_LEARNING_REFLECTION_ENABLED") { + let normalized = flag.trim().to_ascii_lowercase(); + match normalized.as_str() { + "1" | "true" | "yes" | "on" => self.learning.reflection_enabled = true, + "0" | "false" | "no" | "off" => self.learning.reflection_enabled = false, + _ => {} + } + } + if let Some(flag) = env.get("OPENHUMAN_LEARNING_USER_PROFILE_ENABLED") { + let normalized = flag.trim().to_ascii_lowercase(); + match normalized.as_str() { + "1" | "true" | "yes" | "on" => self.learning.user_profile_enabled = true, + "0" | "false" | "no" | "off" => self.learning.user_profile_enabled = false, + _ => {} + } + } + if let Some(flag) = env.get("OPENHUMAN_LEARNING_TOOL_TRACKING_ENABLED") { + let normalized = flag.trim().to_ascii_lowercase(); + match normalized.as_str() { + "1" | "true" | "yes" | "on" => self.learning.tool_tracking_enabled = true, + "0" | "false" | "no" | "off" => self.learning.tool_tracking_enabled = false, + _ => {} + } + } + if let Some(flag) = env.get("OPENHUMAN_LEARNING_TOOL_MEMORY_CAPTURE_ENABLED") { + if let Some(enabled) = parse_env_bool( + "OPENHUMAN_LEARNING_TOOL_MEMORY_CAPTURE_ENABLED", + flag.as_str(), + ) { + self.learning.tool_memory_capture_enabled = enabled; + } + } + if let Some(flag) = env.get("OPENHUMAN_LEARNING_EXPLICIT_PREFERENCES_ENABLED") { + let normalized = flag.trim().to_ascii_lowercase(); + match normalized.as_str() { + "1" | "true" | "yes" | "on" => self.learning.explicit_preferences_enabled = true, + "0" | "false" | "no" | "off" => self.learning.explicit_preferences_enabled = false, + _ => {} + } + } + if let Some(source) = env.get("OPENHUMAN_LEARNING_REFLECTION_SOURCE") { + let normalized = source.trim().to_ascii_lowercase(); + match normalized.as_str() { + "local" => { + self.learning.reflection_source = + crate::openhuman::config::ReflectionSource::Local + } + "cloud" => { + self.learning.reflection_source = + crate::openhuman::config::ReflectionSource::Cloud + } + _ => { + tracing::warn!( + source = %source, + "ignoring invalid OPENHUMAN_LEARNING_REFLECTION_SOURCE (valid: local, cloud)" + ); + } + } + } + if let Some(val) = env.get("OPENHUMAN_LEARNING_MAX_REFLECTIONS_PER_SESSION") { + if let Ok(max) = val.trim().parse::() { + self.learning.max_reflections_per_session = max; + } + } + if let Some(val) = env.get("OPENHUMAN_LEARNING_MIN_TURN_COMPLEXITY") { + if let Ok(min) = val.trim().parse::() { + self.learning.min_turn_complexity = min; + } + } + if let Some(flag) = env.get("OPENHUMAN_LEARNING_EPISODIC_CAPTURE_ENABLED") { + if let Some(enabled) = + parse_env_bool("OPENHUMAN_LEARNING_EPISODIC_CAPTURE_ENABLED", flag.as_str()) + { + self.learning.episodic_capture_enabled = enabled; + } + } + if let Some(flag) = env.get("OPENHUMAN_LEARNING_STM_RECALL_ENABLED") { + if let Some(enabled) = + parse_env_bool("OPENHUMAN_LEARNING_STM_RECALL_ENABLED", flag.as_str()) + { + self.learning.stm_recall_enabled = enabled; + } + } + if let Some(flag) = env.get("OPENHUMAN_LEARNING_UNIFIED_COMPACTION_ENABLED") { + if let Some(enabled) = parse_env_bool( + "OPENHUMAN_LEARNING_UNIFIED_COMPACTION_ENABLED", + flag.as_str(), + ) { + self.learning.unified_compaction_enabled = enabled; + } + } + } + + fn apply_memory_tree_env(&mut self, env: &E) { + if let Ok(endpoint) = std::env::var("OPENHUMAN_MEMORY_EMBED_ENDPOINT") { + let trimmed = endpoint.trim(); + self.memory_tree.embedding_endpoint = if trimmed.is_empty() { + None + } else { + Some(trimmed.to_string()) + }; + } + if let Ok(model) = std::env::var("OPENHUMAN_MEMORY_EMBED_MODEL") { + let trimmed = model.trim(); + self.memory_tree.embedding_model = if trimmed.is_empty() { + None + } else { + Some(trimmed.to_string()) + }; + } + if let Ok(val) = std::env::var("OPENHUMAN_MEMORY_EMBED_TIMEOUT_MS") { + if let Ok(timeout_ms) = val.trim().parse::() { + if timeout_ms > 0 { + self.memory_tree.embedding_timeout_ms = Some(timeout_ms); + } + } + } + if let Ok(flag) = std::env::var("OPENHUMAN_MEMORY_EMBED_STRICT") { + if let Some(strict) = parse_env_bool("OPENHUMAN_MEMORY_EMBED_STRICT", &flag) { + self.memory_tree.embedding_strict = strict; + } + } + if let Some(val) = env.get("OPENHUMAN_MEMORY_EMBED_RATE_LIMIT") { + if let Ok(per_min) = val.trim().parse::() { + self.memory.embedding_rate_limit_per_min = per_min; + } + } + + if let Ok(endpoint) = std::env::var("OPENHUMAN_MEMORY_EXTRACT_ENDPOINT") { + let trimmed = endpoint.trim(); + self.memory_tree.llm_extractor_endpoint = if trimmed.is_empty() { + None + } else { + Some(trimmed.to_string()) + }; + } + if let Ok(model) = std::env::var("OPENHUMAN_MEMORY_EXTRACT_MODEL") { + let trimmed = model.trim(); + self.memory_tree.llm_extractor_model = if trimmed.is_empty() { + None + } else { + Some(trimmed.to_string()) + }; + } + if let Ok(val) = std::env::var("OPENHUMAN_MEMORY_EXTRACT_TIMEOUT_MS") { + if let Ok(ms) = val.trim().parse::() { + if ms > 0 { + self.memory_tree.llm_extractor_timeout_ms = Some(ms); + } + } + } + + if let Ok(endpoint) = std::env::var("OPENHUMAN_MEMORY_SUMMARISE_ENDPOINT") { + let trimmed = endpoint.trim(); + self.memory_tree.llm_summariser_endpoint = if trimmed.is_empty() { + None + } else { + Some(trimmed.to_string()) + }; + } + if let Ok(model) = std::env::var("OPENHUMAN_MEMORY_SUMMARISE_MODEL") { + let trimmed = model.trim(); + self.memory_tree.llm_summariser_model = if trimmed.is_empty() { + None + } else { + Some(trimmed.to_string()) + }; + } + if let Ok(val) = std::env::var("OPENHUMAN_MEMORY_SUMMARISE_TIMEOUT_MS") { + if let Ok(ms) = val.trim().parse::() { + if ms > 0 { + self.memory_tree.llm_summariser_timeout_ms = Some(ms); + } + } + } + + if let Some(dir) = env.get("OPENHUMAN_MEMORY_TREE_CONTENT_DIR") { + let trimmed = dir.trim(); + self.memory_tree.content_dir = if trimmed.is_empty() { + None + } else { + Some(std::path::PathBuf::from(trimmed)) + }; + } + + if let Some(raw) = env.get("OPENHUMAN_MEMORY_TREE_LLM_BACKEND") { + let trimmed = raw.trim(); + if !trimmed.is_empty() { + match crate::openhuman::config::LlmBackend::parse(trimmed) { + Ok(b) => { + log::debug!( + "[memory_tree] OPENHUMAN_MEMORY_TREE_LLM_BACKEND override applied: {}", + b.as_str() + ); + self.memory_tree.llm_backend = b; + } + Err(e) => { + tracing::warn!( + value = trimmed, + error = %e, + "ignoring invalid OPENHUMAN_MEMORY_TREE_LLM_BACKEND (valid: cloud, local)" + ); + } + } + } + } + if let Some(raw) = env.get("OPENHUMAN_MEMORY_TREE_CLOUD_LLM_MODEL") { + let trimmed = raw.trim(); + self.memory_tree.cloud_llm_model = if trimmed.is_empty() { + None + } else { + Some(trimmed.to_string()) + }; + } + + if let Some(raw) = env.get("OPENHUMAN_MEMORY_TREE_SMART_WALK_MODEL") { + let trimmed = raw.trim(); + self.memory_tree.smart_walk_model = if trimmed.is_empty() { + None + } else { + Some(trimmed.to_string()) + }; + } + + if let Some(raw) = env.get("OPENHUMAN_MEMORY_TREE_CLOUD_SUMMARIZATION") { + if let Some(val) = parse_env_bool("OPENHUMAN_MEMORY_TREE_CLOUD_SUMMARIZATION", &raw) { + self.memory_tree.cloud_summarization_opt_in = val; + } + } + } + + fn apply_update_env(&mut self, env: &E) { + if let Some(flag) = env.get("OPENHUMAN_AUTO_UPDATE_ENABLED") { + let normalized = flag.trim().to_ascii_lowercase(); + match normalized.as_str() { + "1" | "true" | "yes" | "on" => self.update.enabled = true, + "0" | "false" | "no" | "off" => self.update.enabled = false, + _ => {} + } + } + if let Some(val) = env.get("OPENHUMAN_AUTO_UPDATE_INTERVAL_MINUTES") { + if let Ok(minutes) = val.trim().parse::() { + self.update.interval_minutes = minutes; + } + } + if let Some(raw) = env.get("OPENHUMAN_AUTO_UPDATE_RESTART_STRATEGY") { + match raw.trim().to_ascii_lowercase().as_str() { + "self_replace" | "self-replace" | "self" => { + self.update.restart_strategy = UpdateRestartStrategy::SelfReplace; + } + "supervisor" | "stage_only" | "stage-only" => { + self.update.restart_strategy = UpdateRestartStrategy::Supervisor; + } + other => { + tracing::warn!( + value = other, + "ignoring invalid OPENHUMAN_AUTO_UPDATE_RESTART_STRATEGY \ + (valid: self_replace, supervisor)" + ); + } + } + } + if let Some(flag) = env.get("OPENHUMAN_AUTO_UPDATE_RPC_MUTATIONS_ENABLED") { + if let Some(enabled) = + parse_env_bool("OPENHUMAN_AUTO_UPDATE_RPC_MUTATIONS_ENABLED", &flag) + { + self.update.rpc_mutations_enabled = enabled; + } + } + } + + fn apply_dictation_env(&mut self, env: &E) { + if let Some(flag) = env.get("OPENHUMAN_DICTATION_ENABLED") { + let normalized = flag.trim().to_ascii_lowercase(); + match normalized.as_str() { + "1" | "true" | "yes" | "on" => self.dictation.enabled = true, + "0" | "false" | "no" | "off" => self.dictation.enabled = false, + _ => {} + } + } + if let Some(hotkey) = env.get("OPENHUMAN_DICTATION_HOTKEY") { + let hotkey = hotkey.trim(); + if !hotkey.is_empty() { + self.dictation.hotkey = hotkey.to_string(); + } + } + if let Some(mode) = env.get("OPENHUMAN_DICTATION_ACTIVATION_MODE") { + let normalized = mode.trim().to_ascii_lowercase(); + match normalized.as_str() { + "toggle" => { + self.dictation.activation_mode = + crate::openhuman::config::DictationActivationMode::Toggle + } + "push" => { + self.dictation.activation_mode = + crate::openhuman::config::DictationActivationMode::Push + } + _ => { + tracing::warn!( + mode = %mode, + "ignoring invalid OPENHUMAN_DICTATION_ACTIVATION_MODE (valid: toggle, push)" + ); + } + } + } + if let Some(flag) = env.get("OPENHUMAN_DICTATION_LLM_REFINEMENT") { + let normalized = flag.trim().to_ascii_lowercase(); + match normalized.as_str() { + "1" | "true" | "yes" | "on" => self.dictation.llm_refinement = true, + "0" | "false" | "no" | "off" => self.dictation.llm_refinement = false, + _ => {} + } + } + if let Some(flag) = env.get("OPENHUMAN_DICTATION_STREAMING") { + let normalized = flag.trim().to_ascii_lowercase(); + match normalized.as_str() { + "1" | "true" | "yes" | "on" => self.dictation.streaming = true, + "0" | "false" | "no" | "off" => self.dictation.streaming = false, + _ => {} + } + } + if let Some(val) = env.get("OPENHUMAN_DICTATION_STREAMING_INTERVAL_MS") { + if let Ok(ms) = val.trim().parse::() { + self.dictation.streaming_interval_ms = ms; + } + } + } + + fn apply_context_env(&mut self, env: &E) { + if let Some(flag) = env.get("OPENHUMAN_CONTEXT_ENABLED") { + let normalized = flag.trim().to_ascii_lowercase(); + match normalized.as_str() { + "1" | "true" | "yes" | "on" => self.context.enabled = true, + "0" | "false" | "no" | "off" => self.context.enabled = false, + _ => {} + } + } + if let Some(flag) = env.get("OPENHUMAN_CONTEXT_MICROCOMPACT_ENABLED") { + let normalized = flag.trim().to_ascii_lowercase(); + match normalized.as_str() { + "1" | "true" | "yes" | "on" => self.context.microcompact_enabled = true, + "0" | "false" | "no" | "off" => self.context.microcompact_enabled = false, + _ => {} + } + } + if let Some(flag) = env.get("OPENHUMAN_CONTEXT_AUTOCOMPACT_ENABLED") { + let normalized = flag.trim().to_ascii_lowercase(); + match normalized.as_str() { + "1" | "true" | "yes" | "on" => self.context.autocompact_enabled = true, + "0" | "false" | "no" | "off" => self.context.autocompact_enabled = false, + _ => {} + } + } + if let Some(val) = env.get("OPENHUMAN_CONTEXT_TOOL_RESULT_BUDGET_BYTES") { + if let Ok(n) = val.trim().parse::() { + self.context.tool_result_budget_bytes = n; + } + } + if let Some(model) = env.get("OPENHUMAN_CONTEXT_SUMMARIZER_MODEL") { + let model = model.trim(); + if !model.is_empty() { + self.context.summarizer_model = Some(model.to_string()); + } + } + + let context_default = crate::openhuman::context::DEFAULT_TOOL_RESULT_BUDGET_BYTES; + let context_env_set = env.contains("OPENHUMAN_CONTEXT_TOOL_RESULT_BUDGET_BYTES"); + if !context_env_set + && self.context.tool_result_budget_bytes == context_default + && self.agent.tool_result_budget_bytes != context_default + { + tracing::warn!( + old = self.agent.tool_result_budget_bytes, + "[context:config] `agent.tool_result_budget_bytes` is \ + deprecated — please move it to \ + `context.tool_result_budget_bytes` in your config.toml" + ); + self.context.tool_result_budget_bytes = self.agent.tool_result_budget_bytes; + } + } +} diff --git a/src/openhuman/config/schema/load/impl_load.rs b/src/openhuman/config/schema/load/impl_load.rs new file mode 100644 index 0000000000..60b067c46d --- /dev/null +++ b/src/openhuman/config/schema/load/impl_load.rs @@ -0,0 +1,433 @@ +use super::super::Config; +use super::dirs::{ + default_action_dir, default_config_and_workspace_dirs, resolve_action_dir, + resolve_config_dirs_ignoring_env, resolve_runtime_config_dirs_with, ConfigResolutionSource, +}; +use super::env::{EnvLookup, ProcessEnv, ProcessEnvWithoutWorkspace}; +use super::migrate::{ + migrate_cloud_provider_slugs, migrate_legacy_autocomplete_disabled_apps, + migrate_legacy_inference_url, +}; +use super::secrets::{decrypt_config_secrets, encrypt_config_secrets}; +use anyhow::{Context, Result}; +use std::collections::HashSet; +use std::path::Path; +use std::sync::{Mutex, OnceLock}; +use tokio::fs::{self, File, OpenOptions}; +use tokio::io::AsyncWriteExt; + +static WARNED_WORLD_READABLE_CONFIGS: OnceLock>> = + OnceLock::new(); + +pub(crate) async fn parse_config_with_recovery( + config_path: &Path, + contents: &str, +) -> (Config, bool) { + let parse_err = match parse_toml_off_worker(contents.to_string()).await { + Ok(config) => { + tracing::debug!( + path = %config_path.display(), + "[config] Config parsed successfully" + ); + return (config, false); + } + Err(parse_err) => parse_err, + }; + + let backup_path = config_path.with_extension("toml.bak"); + if tokio::fs::try_exists(&backup_path).await.unwrap_or(false) { + tracing::warn!( + path = %config_path.display(), + backup = %backup_path.display(), + error = %parse_err, + "[config] Config file is corrupted — attempting recovery from backup" + ); + match fs::read_to_string(&backup_path).await { + Ok(bak_contents) => match parse_toml_off_worker(bak_contents).await { + Ok(bak_config) => { + tracing::info!( + path = %config_path.display(), + backup = %backup_path.display(), + "[config] Recovered config from backup" + ); + return (bak_config, true); + } + Err(bak_err) => { + tracing::warn!( + path = %config_path.display(), + backup = %backup_path.display(), + error = %bak_err, + "[config] Backup is also corrupted; resetting to defaults" + ); + } + }, + Err(read_err) => { + tracing::warn!( + path = %config_path.display(), + backup = %backup_path.display(), + error = %read_err, + "[config] Failed to read backup; resetting to defaults" + ); + } + } + } else { + tracing::warn!( + path = %config_path.display(), + error = %parse_err, + "[config] Config file is corrupted (no backup found); resetting to defaults" + ); + } + + (Config::default(), true) +} + +async fn parse_toml_off_worker(contents: String) -> Result { + match tokio::task::spawn_blocking(move || toml::from_str::(&contents)).await { + Ok(Ok(config)) => Ok(config), + Ok(Err(parse_err)) => Err(parse_err.to_string()), + Err(join_err) => Err(format!("blocking-pool parse join failed: {join_err}")), + } +} + +impl Config { + pub async fn load_or_init() -> Result { + let (default_openhuman_dir, default_workspace_dir) = default_config_and_workspace_dirs()?; + Self::load_or_init_with_env_lookup( + &default_openhuman_dir, + &default_workspace_dir, + &ProcessEnv, + ) + .await + } + + pub(crate) async fn load_or_init_with_env_lookup( + default_openhuman_dir: &Path, + default_workspace_dir: &Path, + env: &(dyn EnvLookup + Send + Sync), + ) -> Result { + let (openhuman_dir, workspace_dir, resolution_source) = + resolve_runtime_config_dirs_with(default_openhuman_dir, default_workspace_dir, env) + .await?; + + let config_path = openhuman_dir.join("config.toml"); + + if resolution_source == ConfigResolutionSource::DefaultConfigDir && !config_path.exists() { + let mut config = Config { + config_path: config_path.clone(), + workspace_dir: workspace_dir.clone(), + action_dir: default_action_dir(), + ..Default::default() + }; + config.apply_env_overrides_from(env); + + tracing::debug!( + path = %config.config_path.display(), + workspace = %config.workspace_dir.display(), + source = resolution_source.as_str(), + initialized = false, + persisted = false, + "Config loaded (pre-login, in-memory only — no dirs or files written)" + ); + return Ok(config); + } + + fs::create_dir_all(&openhuman_dir) + .await + .context("Failed to create config directory")?; + fs::create_dir_all(&workspace_dir) + .await + .context("Failed to create workspace directory")?; + + if config_path.exists() { + #[cfg(unix)] + { + use std::{fs::Permissions, os::unix::fs::PermissionsExt}; + if let Ok(meta) = fs::metadata(&config_path).await { + if meta.permissions().mode() & 0o004 != 0 { + let warned = WARNED_WORLD_READABLE_CONFIGS + .get_or_init(|| Mutex::new(HashSet::new())); + let already_fixed = warned + .lock() + .unwrap_or_else(|e| e.into_inner()) + .contains(&config_path); + if !already_fixed { + tracing::warn!( + "[config] Config file {:?} is world-readable (mode {:o}); \ + auto-fixing to 600", + config_path, + meta.permissions().mode() & 0o777, + ); + match fs::set_permissions(&config_path, Permissions::from_mode(0o600)) + .await + { + Ok(()) => { + warned + .lock() + .unwrap_or_else(|e| e.into_inner()) + .insert(config_path.clone()); + } + Err(e) => { + tracing::warn!( + path = %config_path.display(), + error = %e, + "[config] failed to auto-fix config file permissions to 600", + ); + } + } + } + } + } + } + + let contents = crate::openhuman::util::retry_with_backoff_async( + "read config file", + 5, + 20, + || async { + fs::read_to_string(&config_path).await.with_context(|| { + format!("Failed to read config file: {}", config_path.display()) + }) + }, + ) + .await?; + let (mut config, config_was_corrupted) = + parse_config_with_recovery(&config_path, &contents).await; + config.config_path = config_path.clone(); + config.workspace_dir = workspace_dir; + config.action_dir = resolve_action_dir(&config.action_dir_override); + migrate_legacy_autocomplete_disabled_apps(&mut config); + migrate_legacy_inference_url(&mut config); + migrate_cloud_provider_slugs(&mut config); + config.apply_env_overrides_from(env); + + if config_was_corrupted { + let corrupted_path = config_path.with_extension("toml.corrupted"); + match fs::rename(&config_path, &corrupted_path).await { + Ok(()) => { + tracing::debug!( + src = %config_path.display(), + dst = %corrupted_path.display(), + "[config] Renamed corrupted config; persisting recovered config" + ); + if let Err(e) = config.save().await { + tracing::warn!( + path = %config.config_path.display(), + error = %e, + "[config] Failed to persist recovered config to disk" + ); + } + } + Err(e) => { + tracing::warn!( + src = %config_path.display(), + dst = %corrupted_path.display(), + error = %e, + "[config] Failed to rename corrupted config; skipping save to \ + protect the .bak — will retry recovery on next startup" + ); + } + } + } + + tracing::debug!( + path = %config.config_path.display(), + workspace = %config.workspace_dir.display(), + source = resolution_source.as_str(), + initialized = false, + recovered = config_was_corrupted, + "Config loaded" + ); + crate::openhuman::migrations::run_pending(&mut config).await; + decrypt_config_secrets(&mut config, &openhuman_dir)?; + Ok(config) + } else { + let mut config = Config { + config_path: config_path.clone(), + workspace_dir, + action_dir: default_action_dir(), + schema_version: crate::openhuman::migrations::CURRENT_SCHEMA_VERSION, + ..Default::default() + }; + config.save().await?; + + #[cfg(unix)] + { + use std::{fs::Permissions, os::unix::fs::PermissionsExt}; + let _ = fs::set_permissions(&config_path, Permissions::from_mode(0o600)).await; + } + + config.apply_env_overrides_from(env); + + tracing::debug!( + path = %config.config_path.display(), + workspace = %config.workspace_dir.display(), + source = resolution_source.as_str(), + initialized = true, + "Config loaded" + ); + crate::openhuman::migrations::run_pending(&mut config).await; + Ok(config) + } + } + + /// Load config from the default user paths, bypassing the + /// `OPENHUMAN_WORKSPACE` environment variable. + /// + /// This is used by the debug dump to load the real user config + /// for auth token resolution when the dump script overrides + /// `OPENHUMAN_WORKSPACE` to a throwaway temp directory. + pub async fn load_from_default_paths() -> Result { + let (default_openhuman_dir, default_workspace_dir) = default_config_and_workspace_dirs()?; + let (openhuman_dir, workspace_dir, _source) = + resolve_config_dirs_ignoring_env(&default_openhuman_dir, &default_workspace_dir) + .await?; + let config_path = openhuman_dir.join("config.toml"); + + if !config_path.exists() { + let mut config = Config { + config_path, + workspace_dir, + action_dir: default_action_dir(), + ..Default::default() + }; + config.apply_env_overrides(); + return Ok(config); + } + + // NOTE: no backup recovery here by design — this is the debug-dump path only; + // `load_or_init()` is the authoritative startup path that handles corruption. + let raw = fs::read_to_string(&config_path) + .await + .context("reading config.toml from default paths")?; + let (mut config, _was_corrupted) = parse_config_with_recovery(&config_path, &raw).await; + config.config_path = config_path; + config.workspace_dir = workspace_dir; + config.action_dir = resolve_action_dir(&config.action_dir_override); + config.apply_env_overrides(); + decrypt_config_secrets(&mut config, &openhuman_dir)?; + Ok(config) + } + + /// Reload a config from an already-resolved `config.toml` path. + /// + /// This is for long-lived runtime objects that hold a `Config` + /// snapshot and need to observe updates written back to the same + /// file. It deliberately bypasses only `OPENHUMAN_WORKSPACE` + /// resolution: the caller has already been scoped to a user/workspace, + /// and following the process-global workspace env var again can cross + /// streams with unrelated tests or runtime tasks that temporarily + /// repoint it. Other process env overrides still apply. + pub async fn load_from_config_path(config_path: &Path, workspace_dir: &Path) -> Result { + let config_path = config_path.to_path_buf(); + let workspace_dir = workspace_dir.to_path_buf(); + + if !config_path.exists() { + let mut config = Config { + config_path, + workspace_dir, + action_dir: default_action_dir(), + ..Default::default() + }; + config.apply_env_overrides_from(&ProcessEnvWithoutWorkspace); + return Ok(config); + } + + let raw = fs::read_to_string(&config_path) + .await + .with_context(|| format!("reading config.toml from {}", config_path.display()))?; + let (mut config, config_was_corrupted) = + parse_config_with_recovery(&config_path, &raw).await; + config.config_path = config_path; + config.workspace_dir = workspace_dir; + config.action_dir = resolve_action_dir(&config.action_dir_override); + migrate_legacy_autocomplete_disabled_apps(&mut config); + migrate_legacy_inference_url(&mut config); + migrate_cloud_provider_slugs(&mut config); + config.apply_env_overrides_from(&ProcessEnvWithoutWorkspace); + + if config_was_corrupted { + tracing::warn!( + path = %config.config_path.display(), + "[config] Snapshot reload recovered a corrupted config; skipping persistence" + ); + } + + crate::openhuman::migrations::run_pending(&mut config).await; + Ok(config) + } + + pub async fn save(&self) -> Result<()> { + let mut config_to_save = self.clone(); + encrypt_config_secrets(&mut config_to_save)?; + + let toml_str = + toml::to_string_pretty(&config_to_save).context("Failed to serialize config")?; + + let parent_dir = self + .config_path + .parent() + .context("Config path must have a parent directory")?; + + fs::create_dir_all(parent_dir).await.with_context(|| { + format!( + "Failed to create config directory: {}", + parent_dir.display() + ) + })?; + + let file_name = self + .config_path + .file_name() + .and_then(|v| v.to_str()) + .unwrap_or("config.toml"); + let temp_path = parent_dir.join(format!(".{file_name}.tmp-{}", uuid::Uuid::new_v4())); + let backup_path = parent_dir.join(format!("{file_name}.bak")); + + let mut temp_file = OpenOptions::new() + .create_new(true) + .write(true) + .open(&temp_path) + .await + .with_context(|| { + format!( + "Failed to create temporary config file: {}", + temp_path.display() + ) + })?; + temp_file + .write_all(toml_str.as_bytes()) + .await + .context("Failed to write temporary config contents")?; + temp_file + .sync_all() + .await + .context("Failed to fsync temporary config file")?; + drop(temp_file); + + let had_existing_config = tokio::fs::try_exists(&self.config_path) + .await + .unwrap_or(false); + if had_existing_config { + fs::copy(&temp_path, &backup_path).await.with_context(|| { + format!( + "Failed to create config backup before atomic replace: {}", + backup_path.display() + ) + })?; + } + + if let Err(e) = fs::rename(&temp_path, &self.config_path).await { + let _ = fs::remove_file(&temp_path).await; + if had_existing_config && backup_path.exists() { + fs::copy(&backup_path, &self.config_path) + .await + .context("Failed to restore config backup")?; + } + anyhow::bail!("Failed to atomically replace config file: {e}"); + } + + super::sync_directory(parent_dir).await?; + + Ok(()) + } +} diff --git a/src/openhuman/config/schema/load/migrate.rs b/src/openhuman/config/schema/load/migrate.rs new file mode 100644 index 0000000000..3ab377a5ba --- /dev/null +++ b/src/openhuman/config/schema/load/migrate.rs @@ -0,0 +1,211 @@ +use super::super::Config; + +pub(crate) fn migrate_legacy_inference_url(config: &mut Config) { + if config.inference_url.is_some() { + return; + } + let Some(url) = config.api_url.as_deref() else { + return; + }; + let trimmed = url.trim().trim_end_matches('/'); + if !trimmed.ends_with("/chat/completions") { + return; + } + let is_openhuman_backend = trimmed.starts_with("https://api.tinyhumans.ai/") + || trimmed.starts_with("https://staging-api.tinyhumans.ai/"); + let moved = if is_openhuman_backend { + None + } else { + Some(trimmed.to_string()) + }; + let logged = match moved.as_deref() { + None => "".to_string(), + Some(u) => super::redact_url_for_log(u), + }; + tracing::info!( + "[config][migrate] splitting legacy api_url -> inference_url (api_url cleared, inference_url={})", + logged + ); + config.inference_url = moved; + config.api_url = None; +} + +/// Strip userinfo (basic-auth) and query string from a URL string for log +/// emission. Falls back to a coarse `/...` form when parsing fails so +/// we never leak the raw input. Public only so the migration's unit test +/// can assert the behaviour. +pub fn redact_url_for_log(raw: &str) -> String { + if let Ok(mut url) = url::Url::parse(raw) { + let _ = url.set_username(""); + let _ = url.set_password(None); + url.set_query(None); + url.set_fragment(None); + return url.to_string(); + } + let truncated = raw + .split(['?', '#']) + .next() + .unwrap_or(raw) + .trim_end_matches('/'); + if let Some((scheme, rest)) = truncated.split_once("://") { + if let Some((_, host_path)) = rest.split_once('@') { + return format!("{scheme}://***@{host_path}"); + } + return format!("{scheme}://{rest}"); + } + "".to_string() +} + +/// Migrate `cloud_providers` entries to the new slug-keyed shape and rewrite +/// any per-workload routing strings that still use the old bare-prefix grammar. +/// +/// This is idempotent: entries that already have a slug/label are left +/// untouched. Routing fields that already contain a `:` are assumed to be +/// in the new `:` form. +pub(crate) fn migrate_cloud_provider_slugs(config: &mut Config) { + use super::super::cloud_providers::{migrate_legacy_fields, AuthStyle}; + + for entry in &mut config.cloud_providers { + migrate_legacy_fields(entry); + } + + let slug_to_id: std::collections::HashMap = config + .cloud_providers + .iter() + .map(|e| (e.slug.clone(), e.id.clone())) + .collect(); + + let legacy_custom_slug = config + .inference_url + .as_deref() + .map(str::trim) + .filter(|url| !url.is_empty() && !looks_like_openhuman_provider_endpoint(url)) + .and_then(|url| { + let normalized = normalize_provider_endpoint(url); + config + .cloud_providers + .iter() + .find(|entry| { + !is_openhuman_provider_entry(entry) + && normalize_provider_endpoint(&entry.endpoint) == normalized + }) + .map(|entry| entry.slug.clone()) + }); + + let rewrite = |field: &mut Option| { + let raw = match field.as_deref() { + Some(s) if !s.is_empty() => s.to_string(), + _ => return, + }; + if raw.contains(':') || raw == "openhuman" { + return; + } + match raw.as_str() { + "cloud" => { + let primary_slug = config.primary_cloud.as_deref().and_then(|pid| { + config + .cloud_providers + .iter() + .find(|e| e.id == pid) + .map(|e| e.slug.clone()) + }); + let slug = match primary_slug.as_deref() { + Some("openhuman") => legacy_custom_slug.clone().or(primary_slug), + Some(_) => primary_slug, + None => legacy_custom_slug.clone().or_else(|| { + config + .cloud_providers + .iter() + .find(|entry| !is_openhuman_provider_entry(entry)) + .map(|entry| entry.slug.clone()) + }), + }; + if let Some(s) = slug { + if s == "openhuman" { + tracing::debug!( + "[config][migrate] rewriting routing 'cloud' → 'openhuman'" + ); + *field = Some("openhuman".to_string()); + } else { + tracing::info!( + "[config][migrate] rewriting routing 'cloud' → '{s}:' (empty model)" + ); + *field = Some(format!("{s}:")); + } + } else { + tracing::debug!( + "[config][migrate] routing 'cloud' with no non-openhuman provider → 'openhuman'" + ); + *field = Some("openhuman".to_string()); + } + } + other => { + if slug_to_id.contains_key(other) { + tracing::info!( + "[config][migrate] rewriting bare routing '{}' → '{}:'", + other, + other + ); + *field = Some(format!("{other}:")); + } else if other != "openhuman" { + tracing::warn!( + "[config][migrate] bare routing '{}' has no matching provider entry, \ + falling back to 'openhuman'", + other + ); + *field = Some("openhuman".to_string()); + } + } + } + }; + + rewrite(&mut config.reasoning_provider); + rewrite(&mut config.agentic_provider); + rewrite(&mut config.coding_provider); + rewrite(&mut config.memory_provider); + rewrite(&mut config.embeddings_provider); + rewrite(&mut config.heartbeat_provider); + rewrite(&mut config.learning_provider); + rewrite(&mut config.subconscious_provider); + + fn normalize_provider_endpoint(url: &str) -> String { + url.trim().trim_end_matches('/').to_ascii_lowercase() + } + + fn looks_like_openhuman_provider_endpoint(url: &str) -> bool { + let lower = url.trim().to_ascii_lowercase(); + let without_scheme = lower.split("://").nth(1).unwrap_or(&lower); + let authority = without_scheme.split('/').next().unwrap_or(""); + let host = authority.split('@').next_back().unwrap_or(authority); + let host_no_port = host.split(':').next().unwrap_or(host); + matches!( + host_no_port, + "api.openhuman.ai" | "api.tinyhumans.ai" | "staging-api.tinyhumans.ai" | "openhuman" + ) || host_no_port.ends_with(".openhuman.ai") + || host_no_port.ends_with(".tinyhumans.ai") + } + + fn is_openhuman_provider_entry( + entry: &super::super::cloud_providers::CloudProviderCreds, + ) -> bool { + entry.slug == "openhuman" + || matches!(entry.auth_style, AuthStyle::OpenhumanJwt) + || looks_like_openhuman_provider_endpoint(&entry.endpoint) + } +} + +pub(super) fn migrate_legacy_autocomplete_disabled_apps(config: &mut Config) { + let mut normalized: Vec = config + .autocomplete + .disabled_apps + .iter() + .map(|value| value.trim().to_ascii_lowercase()) + .filter(|value| !value.is_empty()) + .collect(); + normalized.sort(); + normalized.dedup(); + + if normalized == ["code".to_string(), "terminal".to_string()] { + config.autocomplete.disabled_apps = vec!["code".to_string()]; + } +} diff --git a/src/openhuman/config/schema/load/mod.rs b/src/openhuman/config/schema/load/mod.rs new file mode 100644 index 0000000000..1b6137ced6 --- /dev/null +++ b/src/openhuman/config/schema/load/mod.rs @@ -0,0 +1,69 @@ +//! Config load/save and environment variable overrides. + +mod dirs; +mod env; +mod env_overlay; +mod impl_load; +mod migrate; +mod secrets; + +pub(crate) use env::EnvLookup; +pub(crate) use env::ProcessEnv; + +pub use dirs::{ + action_dir_env_override, clear_active_user, default_action_dir, default_projects_dir, + default_root_openhuman_dir, pre_login_user_dir, read_active_user_id, resolve_action_dir, + user_openhuman_dir, write_active_user_id, ACTION_DIR_ENV_VAR, + MEMORY_SYNC_INTERVAL_SECS_ENV_VAR, PRE_LOGIN_USER_ID, PROJECTS_DIR_ENV_VAR, +}; + +pub(crate) use dirs::persist_active_workspace_config_dir; + +// redact_url_for_log is pub(super) for the schema module; tests inside load +// can access it because they are a submodule and use `use super::*`. +pub(super) use migrate::redact_url_for_log; + +// Items needed by load_tests.rs (loaded as `mod tests` below). +// Tests are a submodule of `load`, so `super::*` == this module's namespace. +#[cfg(test)] +pub(crate) use dirs::default_root_dir_name_pub as default_root_dir_name; +#[cfg(test)] +pub(crate) use dirs::{ + resolve_config_dir_for_workspace, resolve_runtime_config_dirs, + resolve_runtime_config_dirs_with, ConfigResolutionSource, +}; +// PathBuf and Config were in scope via `use super::*` in the original load.rs. +#[cfg(test)] +pub(crate) use super::Config; +#[cfg(test)] +pub(crate) use dirs::ACTIVE_USER_STATE_FILE; +#[cfg(test)] +pub(crate) use env::ProcessEnvWithoutWorkspace; +#[cfg(test)] +pub(crate) use impl_load::parse_config_with_recovery; +#[cfg(test)] +pub(crate) use migrate::{migrate_cloud_provider_slugs, migrate_legacy_inference_url}; +#[cfg(test)] +pub(crate) use std::path::PathBuf; + +#[cfg(unix)] +pub(super) async fn sync_directory(path: &std::path::Path) -> anyhow::Result<()> { + use anyhow::Context; + use tokio::fs::File; + let dir = File::open(path) + .await + .with_context(|| format!("Failed to open directory for fsync: {}", path.display()))?; + dir.sync_all() + .await + .with_context(|| format!("Failed to fsync directory metadata: {}", path.display()))?; + Ok(()) +} + +#[cfg(not(unix))] +pub(super) async fn sync_directory(_path: &std::path::Path) -> anyhow::Result<()> { + Ok(()) +} + +#[cfg(test)] +#[path = "../load_tests.rs"] +mod tests; diff --git a/src/openhuman/config/schema/load/secrets.rs b/src/openhuman/config/schema/load/secrets.rs new file mode 100644 index 0000000000..27008e54c1 --- /dev/null +++ b/src/openhuman/config/schema/load/secrets.rs @@ -0,0 +1,252 @@ +use super::super::Config; +use anyhow::{Context, Result}; +use std::path::Path; + +fn decrypt_optional_secret( + store: &crate::openhuman::keyring::SecretStore, + value: &mut Option, + field_name: &str, +) -> Result<()> { + if let Some(raw) = value.clone() { + if crate::openhuman::keyring::SecretStore::is_encrypted(&raw) { + match store.decrypt(&raw) { + Ok(plaintext) => *value = Some(plaintext), + Err(e) => { + // Decryption key is inaccessible (e.g. rotated, keyring reset, or + // migrated across machines). Clear the field so config loads + // successfully — the affected integration will be disabled until + // the user re-enters the credential. A hard error here would block + // every config load and make the app unusable. + log::warn!( + "[config] Failed to decrypt {field_name} — field cleared (key inaccessible): {e}" + ); + crate::openhuman::keyring_consent::policy::notify_decrypt_failure( + field_name, + &e.to_string(), + ); + *value = None; + } + } + } + } + Ok(()) +} + +fn encrypt_optional_secret( + store: &crate::openhuman::keyring::SecretStore, + value: &mut Option, + field_name: &str, +) -> Result<()> { + if let Some(raw) = value.clone() { + if !crate::openhuman::keyring::SecretStore::is_encrypted(&raw) { + *value = Some( + store + .encrypt(&raw) + .with_context(|| format!("Failed to encrypt {field_name}"))?, + ); + } + } + Ok(()) +} + +/// Decrypt all secret fields in the configuration that are marked as encrypted. +/// +/// Called during config load when `secrets.encrypt` is true. Only decrypts +/// values that have the `enc:` or `enc2:` prefix; plaintext values are +/// returned as-is. This is a no-op when encryption is disabled. +pub(super) fn decrypt_config_secrets(config: &mut Config, openhuman_dir: &Path) -> Result<()> { + if !config.secrets.encrypt { + return Ok(()); + } + let store = crate::openhuman::keyring::SecretStore::new(openhuman_dir, true); + + decrypt_optional_secret(&store, &mut config.api_key, "api_key")?; + + decrypt_optional_secret( + &store, + &mut config.search.parallel.api_key, + "search.parallel.api_key", + )?; + decrypt_optional_secret( + &store, + &mut config.search.brave.api_key, + "search.brave.api_key", + )?; + decrypt_optional_secret( + &store, + &mut config.search.querit.api_key, + "search.querit.api_key", + )?; + + let ch = &mut config.channels_config; + if let Some(ref mut tg) = ch.telegram { + let mut tok = Some(tg.bot_token.clone()); + decrypt_optional_secret(&store, &mut tok, "telegram.bot_token")?; + tg.bot_token = tok.unwrap_or_default(); + } + if let Some(ref mut d) = ch.discord { + let mut tok = Some(d.bot_token.clone()); + decrypt_optional_secret(&store, &mut tok, "discord.bot_token")?; + d.bot_token = tok.unwrap_or_default(); + } + if let Some(ref mut s) = ch.slack { + let mut tok = Some(s.bot_token.clone()); + decrypt_optional_secret(&store, &mut tok, "slack.bot_token")?; + s.bot_token = tok.unwrap_or_default(); + decrypt_optional_secret(&store, &mut s.app_token, "slack.app_token")?; + } + if let Some(ref mut m) = ch.mattermost { + let mut tok = Some(m.bot_token.clone()); + decrypt_optional_secret(&store, &mut tok, "mattermost.bot_token")?; + m.bot_token = tok.unwrap_or_default(); + } + if let Some(ref mut w) = ch.webhook { + decrypt_optional_secret(&store, &mut w.secret, "webhook.secret")?; + } + if let Some(ref mut mx) = ch.matrix { + let mut tok = Some(mx.access_token.clone()); + decrypt_optional_secret(&store, &mut tok, "matrix.access_token")?; + mx.access_token = tok.unwrap_or_default(); + } + if let Some(ref mut wa) = ch.whatsapp { + decrypt_optional_secret(&store, &mut wa.access_token, "whatsapp.access_token")?; + decrypt_optional_secret(&store, &mut wa.verify_token, "whatsapp.verify_token")?; + decrypt_optional_secret(&store, &mut wa.app_secret, "whatsapp.app_secret")?; + } + if let Some(ref mut lq) = ch.linq { + let mut tok = Some(lq.api_token.clone()); + decrypt_optional_secret(&store, &mut tok, "linq.api_token")?; + lq.api_token = tok.unwrap_or_default(); + } + if let Some(ref mut irc) = ch.irc { + decrypt_optional_secret(&store, &mut irc.server_password, "irc.server_password")?; + decrypt_optional_secret(&store, &mut irc.nickserv_password, "irc.nickserv_password")?; + decrypt_optional_secret(&store, &mut irc.sasl_password, "irc.sasl_password")?; + } + if let Some(ref mut lk) = ch.lark { + let mut tok = Some(lk.app_secret.clone()); + decrypt_optional_secret(&store, &mut tok, "lark.app_secret")?; + lk.app_secret = tok.unwrap_or_default(); + decrypt_optional_secret(&store, &mut lk.encrypt_key, "lark.encrypt_key")?; + decrypt_optional_secret( + &store, + &mut lk.verification_token, + "lark.verification_token", + )?; + } + if let Some(ref mut dt) = ch.dingtalk { + let mut tok = Some(dt.client_secret.clone()); + decrypt_optional_secret(&store, &mut tok, "dingtalk.client_secret")?; + dt.client_secret = tok.unwrap_or_default(); + } + if let Some(ref mut qq) = ch.qq { + let mut tok = Some(qq.app_secret.clone()); + decrypt_optional_secret(&store, &mut tok, "qq.app_secret")?; + qq.app_secret = tok.unwrap_or_default(); + } + + Ok(()) +} + +/// Encrypt all secret fields in the configuration before writing to disk. +/// +/// Called during `Config::save()` when `secrets.encrypt` is true. Only +/// encrypts values that are NOT already encrypted. This is a no-op when +/// encryption is disabled. +pub(super) fn encrypt_config_secrets(config: &mut Config) -> Result<()> { + if !config.secrets.encrypt { + return Ok(()); + } + let parent_dir = config + .config_path + .parent() + .context("Config path must have a parent directory")?; + let store = crate::openhuman::keyring::SecretStore::new(parent_dir, true); + + encrypt_optional_secret(&store, &mut config.api_key, "api_key")?; + + encrypt_optional_secret( + &store, + &mut config.search.parallel.api_key, + "search.parallel.api_key", + )?; + encrypt_optional_secret( + &store, + &mut config.search.brave.api_key, + "search.brave.api_key", + )?; + encrypt_optional_secret( + &store, + &mut config.search.querit.api_key, + "search.querit.api_key", + )?; + + let ch = &mut config.channels_config; + if let Some(ref mut tg) = ch.telegram { + let mut tok = Some(tg.bot_token.clone()); + encrypt_optional_secret(&store, &mut tok, "telegram.bot_token")?; + tg.bot_token = tok.unwrap_or_default(); + } + if let Some(ref mut d) = ch.discord { + let mut tok = Some(d.bot_token.clone()); + encrypt_optional_secret(&store, &mut tok, "discord.bot_token")?; + d.bot_token = tok.unwrap_or_default(); + } + if let Some(ref mut s) = ch.slack { + let mut tok = Some(s.bot_token.clone()); + encrypt_optional_secret(&store, &mut tok, "slack.bot_token")?; + s.bot_token = tok.unwrap_or_default(); + encrypt_optional_secret(&store, &mut s.app_token, "slack.app_token")?; + } + if let Some(ref mut m) = ch.mattermost { + let mut tok = Some(m.bot_token.clone()); + encrypt_optional_secret(&store, &mut tok, "mattermost.bot_token")?; + m.bot_token = tok.unwrap_or_default(); + } + if let Some(ref mut w) = ch.webhook { + encrypt_optional_secret(&store, &mut w.secret, "webhook.secret")?; + } + if let Some(ref mut mx) = ch.matrix { + let mut tok = Some(mx.access_token.clone()); + encrypt_optional_secret(&store, &mut tok, "matrix.access_token")?; + mx.access_token = tok.unwrap_or_default(); + } + if let Some(ref mut wa) = ch.whatsapp { + encrypt_optional_secret(&store, &mut wa.access_token, "whatsapp.access_token")?; + encrypt_optional_secret(&store, &mut wa.verify_token, "whatsapp.verify_token")?; + encrypt_optional_secret(&store, &mut wa.app_secret, "whatsapp.app_secret")?; + } + if let Some(ref mut lq) = ch.linq { + let mut tok = Some(lq.api_token.clone()); + encrypt_optional_secret(&store, &mut tok, "linq.api_token")?; + lq.api_token = tok.unwrap_or_default(); + } + if let Some(ref mut irc) = ch.irc { + encrypt_optional_secret(&store, &mut irc.server_password, "irc.server_password")?; + encrypt_optional_secret(&store, &mut irc.nickserv_password, "irc.nickserv_password")?; + encrypt_optional_secret(&store, &mut irc.sasl_password, "irc.sasl_password")?; + } + if let Some(ref mut lk) = ch.lark { + let mut tok = Some(lk.app_secret.clone()); + encrypt_optional_secret(&store, &mut tok, "lark.app_secret")?; + lk.app_secret = tok.unwrap_or_default(); + encrypt_optional_secret(&store, &mut lk.encrypt_key, "lark.encrypt_key")?; + encrypt_optional_secret( + &store, + &mut lk.verification_token, + "lark.verification_token", + )?; + } + if let Some(ref mut dt) = ch.dingtalk { + let mut tok = Some(dt.client_secret.clone()); + encrypt_optional_secret(&store, &mut tok, "dingtalk.client_secret")?; + dt.client_secret = tok.unwrap_or_default(); + } + if let Some(ref mut qq) = ch.qq { + let mut tok = Some(qq.app_secret.clone()); + encrypt_optional_secret(&store, &mut tok, "qq.app_secret")?; + qq.app_secret = tok.unwrap_or_default(); + } + + Ok(()) +} diff --git a/src/openhuman/config/schema/tools.rs b/src/openhuman/config/schema/tools.rs deleted file mode 100644 index 76370a0323..0000000000 --- a/src/openhuman/config/schema/tools.rs +++ /dev/null @@ -1,1269 +0,0 @@ -//! Tool-related config: browser, HTTP, web search, composio, secrets, multimodal. - -use super::defaults; -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; -use std::collections::HashMap; - -#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] -#[serde(default)] -pub struct MultimodalConfig { - #[serde(default = "default_multimodal_max_images")] - pub max_images: usize, - #[serde(default = "default_multimodal_max_image_size_mb")] - pub max_image_size_mb: usize, - #[serde(default)] - pub allow_remote_fetch: bool, -} - -fn default_multimodal_max_images() -> usize { - 4 -} - -fn default_multimodal_max_image_size_mb() -> usize { - 8 -} - -impl MultimodalConfig { - /// Clamp configured values to safe runtime bounds. - pub fn effective_limits(&self) -> (usize, usize) { - let max_images = self.max_images.clamp(1, 16); - let max_image_size_mb = self.max_image_size_mb.clamp(1, 20); - (max_images, max_image_size_mb) - } - - /// Clamp image count to the configured maximum. - pub fn clamp_image_count(&self, count: usize) -> usize { - count.min(self.max_images) - } -} - -impl Default for MultimodalConfig { - fn default() -> Self { - Self { - max_images: default_multimodal_max_images(), - max_image_size_mb: default_multimodal_max_image_size_mb(), - allow_remote_fetch: false, - } - } -} - -/// File-attachment counterpart to [`MultimodalConfig`]. Governs how -/// `[FILE:…]` markers in user messages are resolved, validated, and -/// inlined as text context for the agent. -/// -/// Defaults err on the side of "useful for prose docs without blowing -/// the context window": 4 files per turn, 16 MB per file, 50 000 chars -/// of extracted text per file. Remote fetch is opt-in. -#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] -#[serde(default)] -pub struct MultimodalFileConfig { - #[serde(default = "default_multimodal_max_files")] - pub max_files: usize, - #[serde(default = "default_multimodal_max_file_size_mb")] - pub max_file_size_mb: usize, - #[serde(default = "default_multimodal_max_extracted_text_chars")] - pub max_extracted_text_chars: usize, - #[serde(default)] - pub allow_remote_fetch: bool, - #[serde(default = "default_multimodal_allowed_file_mime_types")] - pub allowed_mime_types: Vec, -} - -fn default_multimodal_max_files() -> usize { - 4 -} - -fn default_multimodal_max_file_size_mb() -> usize { - 16 -} - -fn default_multimodal_max_extracted_text_chars() -> usize { - 50_000 -} - -fn default_multimodal_allowed_file_mime_types() -> Vec { - vec![ - // Extractable text formats. - "application/pdf".to_string(), - "text/plain".to_string(), - "text/csv".to_string(), - "text/markdown".to_string(), - // Binary-only formats surfaced as metadata-only references. - "application/zip".to_string(), - "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet".to_string(), - "application/vnd.openxmlformats-officedocument.wordprocessingml.document".to_string(), - "application/vnd.openxmlformats-officedocument.presentationml.presentation".to_string(), - "application/octet-stream".to_string(), - ] -} - -impl MultimodalFileConfig { - /// Clamp configured values to safe runtime bounds. - pub fn effective_limits(&self) -> (usize, usize, usize) { - let max_files = self.max_files.clamp(1, 16); - let max_file_size_mb = self.max_file_size_mb.clamp(1, 50); - let max_extracted_text_chars = self.max_extracted_text_chars.clamp(1_000, 200_000); - (max_files, max_file_size_mb, max_extracted_text_chars) - } - - /// True iff `mime` is on the configured allowlist (case-insensitive). - pub fn is_mime_allowed(&self, mime: &str) -> bool { - let needle = mime.to_ascii_lowercase(); - self.allowed_mime_types - .iter() - .any(|allowed| allowed.eq_ignore_ascii_case(&needle)) - } - - /// Hardened config for turns whose user text originates from an - /// untrusted third-party channel (Slack / Discord / Telegram / - /// WhatsApp / etc.). Disables `[FILE:…]` marker resolution outright - /// so a remote sender cannot smuggle `[FILE:/etc/passwd]`, - /// `[FILE:.env]`, or any other local-path marker into an inbound - /// message and have the agent exfiltrate the file's contents into - /// an LLM call. Also forbids remote fetch. - /// - /// `max_files: 0` is a sentinel: `prepare_messages_for_provider` - /// short-circuits at the first `[FILE:…]` marker with - /// `TooManyFiles` before any disk or network read happens. This - /// holds regardless of the per-operator - /// `[tools.multimodal_files]` block in `config.toml`. - /// - /// Mirrors the triage-arm hardening in - /// `openhuman::agent::triage::evaluator`. Apply at the per-turn - /// application site (the channel-runtime dispatcher) — the - /// operator-supplied `config.multimodal_files` stays the source of - /// truth for the desktop / web-chat path where the user owns the - /// local filesystem. - pub fn for_untrusted_channel_input() -> Self { - Self { - max_files: 0, - allow_remote_fetch: false, - ..Default::default() - } - } -} - -impl Default for MultimodalFileConfig { - fn default() -> Self { - Self { - max_files: default_multimodal_max_files(), - max_file_size_mb: default_multimodal_max_file_size_mb(), - max_extracted_text_chars: default_multimodal_max_extracted_text_chars(), - allow_remote_fetch: false, - allowed_mime_types: default_multimodal_allowed_file_mime_types(), - } - } -} - -#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] -#[serde(default)] -pub struct BrowserComputerUseConfig { - #[serde(default = "default_browser_computer_use_endpoint")] - pub endpoint: String, - #[serde(default = "default_browser_computer_use_timeout_ms")] - pub timeout_ms: u64, - #[serde(default)] - pub allow_remote_endpoint: bool, - #[serde(default)] - pub window_allowlist: Vec, - #[serde(default)] - pub max_coordinate_x: Option, - #[serde(default)] - pub max_coordinate_y: Option, -} - -fn default_browser_computer_use_endpoint() -> String { - "http://127.0.0.1:8787/v1/actions".into() -} - -fn default_browser_computer_use_timeout_ms() -> u64 { - 15_000 -} - -impl Default for BrowserComputerUseConfig { - fn default() -> Self { - Self { - endpoint: default_browser_computer_use_endpoint(), - timeout_ms: default_browser_computer_use_timeout_ms(), - allow_remote_endpoint: false, - window_allowlist: Vec::new(), - max_coordinate_x: None, - max_coordinate_y: None, - } - } -} - -#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] -#[serde(default)] -pub struct BrowserConfig { - #[serde(default)] - pub enabled: bool, - /// DEPRECATED: the browser tool now shares the unified web-access host list - /// in `[http_request].allowed_domains` (see `tools::ops::all_tools_with_runtime`). - /// Still parsed for backward compatibility but no longer gates browser - /// navigation. Manage allowed hosts via Settings → Search → Allowed websites; - /// browser allow-all remains gated by `OPENHUMAN_BROWSER_ALLOW_ALL`. - #[serde(default)] - pub allowed_domains: Vec, - #[serde(default)] - pub session_name: Option, - #[serde(default = "default_browser_backend")] - pub backend: String, - #[serde(default = "default_true")] - pub native_headless: bool, - #[serde(default = "default_browser_webdriver_url")] - pub native_webdriver_url: String, - #[serde(default)] - pub native_chrome_path: Option, - #[serde(default)] - pub computer_use: BrowserComputerUseConfig, -} - -fn default_true() -> bool { - defaults::default_true() -} - -fn default_browser_backend() -> String { - "agent_browser".into() -} - -fn default_browser_webdriver_url() -> String { - "http://127.0.0.1:9515".into() -} - -impl Default for BrowserConfig { - fn default() -> Self { - Self { - enabled: false, - allowed_domains: Vec::new(), - session_name: None, - backend: default_browser_backend(), - native_headless: default_true(), - native_webdriver_url: default_browser_webdriver_url(), - native_chrome_path: None, - computer_use: BrowserComputerUseConfig::default(), - } - } -} - -#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] -#[serde(default)] -pub struct HttpRequestConfig { - /// Hosts the assistant may open/read via `web_fetch` / `curl`. An exact - /// host also matches its subdomains; `"*"` allows all public sites; an - /// empty list blocks all web access. Defaults to `["*"]` so web research - /// works out of the box — the SSRF guard still blocks local/private hosts - /// regardless. Narrow this via Settings → Search → Allowed websites. - #[serde(default = "default_http_allowed_domains")] - pub allowed_domains: Vec, - #[serde(default = "default_http_max_response_size")] - pub max_response_size: usize, - #[serde(default = "default_http_timeout_secs")] - pub timeout_secs: u64, -} - -impl Default for HttpRequestConfig { - fn default() -> Self { - Self { - allowed_domains: default_http_allowed_domains(), - max_response_size: default_http_max_response_size(), - timeout_secs: default_http_timeout_secs(), - } - } -} - -fn default_http_allowed_domains() -> Vec { - vec!["*".to_string()] -} - -fn default_http_max_response_size() -> usize { - 1_000_000 -} - -fn default_http_timeout_secs() -> u64 { - 30 -} - -#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] -#[serde(default)] -pub struct CurlConfig { - /// Subdirectory under `workspace_dir` where downloads land. Inputs - /// are resolved relative to this root; absolute paths and `..` - /// segments are rejected. - #[serde(default = "default_curl_dest_subdir")] - pub dest_subdir: String, - /// Hard byte ceiling per download. Streaming aborts and the - /// partial file is removed if exceeded. - #[serde(default = "default_curl_max_download_bytes")] - pub max_download_bytes: u64, - /// Per-request timeout in seconds. - #[serde(default = "default_curl_timeout_secs")] - pub timeout_secs: u64, -} - -fn default_curl_dest_subdir() -> String { - "downloads".into() -} - -fn default_curl_max_download_bytes() -> u64 { - 50 * 1024 * 1024 -} - -fn default_curl_timeout_secs() -> u64 { - 120 -} - -impl Default for CurlConfig { - fn default() -> Self { - Self { - dest_subdir: default_curl_dest_subdir(), - max_download_bytes: default_curl_max_download_bytes(), - timeout_secs: default_curl_timeout_secs(), - } - } -} - -#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] -#[serde(default)] -pub struct GitbooksConfig { - /// When `true`, register `gitbooks_search` and `gitbooks_get_page`. - #[serde(default = "defaults::default_true")] - pub enabled: bool, - /// MCP endpoint URL for the OpenHuman GitBook docs. - #[serde(default = "default_gitbooks_endpoint")] - pub endpoint: String, - /// Per-request timeout in seconds. - #[serde(default = "default_gitbooks_timeout_secs")] - pub timeout_secs: u64, -} - -fn default_gitbooks_endpoint() -> String { - "https://tinyhumans.gitbook.io/openhuman/~gitbook/mcp".into() -} - -fn default_gitbooks_timeout_secs() -> u64 { - 30 -} - -impl Default for GitbooksConfig { - fn default() -> Self { - Self { - enabled: defaults::default_true(), - endpoint: default_gitbooks_endpoint(), - timeout_secs: default_gitbooks_timeout_secs(), - } - } -} - -#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] -#[serde(default)] -pub struct McpServerConfig { - /// Stable server slug used by the agent-facing bridge tools. - #[serde(default)] - pub name: String, - /// MCP endpoint URL. Current implementation supports stateless - /// Streamable HTTP / JSON responses. - #[serde(default)] - pub endpoint: String, - /// Optional stdio command for local MCP servers. When set, the - /// client launches this command as a subprocess and speaks newline- - /// delimited JSON-RPC over stdin/stdout per the MCP stdio transport. - #[serde(default)] - pub command: String, - /// Command-line arguments for stdio MCP servers. - #[serde(default)] - pub args: Vec, - /// Extra environment variables for stdio MCP servers. MCP stdio auth - /// is typically passed this way. - #[serde(default)] - pub env: HashMap, - /// Optional working directory for stdio MCP servers. - #[serde(default)] - pub cwd: Option, - /// Optional human-readable description shown in bridge tool output. - #[serde(default)] - pub description: Option, - /// Whether this server should be exposed to the MCP bridge tools. - #[serde(default = "defaults::default_true")] - pub enabled: bool, - /// Exact remote tool names this server may expose through the generic - /// MCP bridge. Empty means all remote tools are allowed unless they - /// appear in `disallowed_tools`. - #[serde(default)] - pub allowed_tools: Vec, - /// Exact remote tool names that should always be hidden and blocked. - /// This denylist takes precedence over `allowed_tools`. - #[serde(default)] - pub disallowed_tools: Vec, - /// Per-request timeout in seconds. - #[serde(default = "default_mcp_timeout_secs")] - pub timeout_secs: u64, - /// Optional auth strategy applied to outbound requests for this - /// server. Useful for API-key and pre-provisioned bearer-token - /// flows; interactive OAuth discovery is handled by the client - /// transport separately when a server returns an auth challenge. - #[serde(default)] - pub auth: McpAuthConfig, -} - -fn default_mcp_timeout_secs() -> u64 { - 30 -} - -impl Default for McpServerConfig { - fn default() -> Self { - Self { - name: String::new(), - endpoint: String::new(), - command: String::new(), - args: Vec::new(), - env: HashMap::new(), - cwd: None, - description: None, - enabled: defaults::default_true(), - allowed_tools: Vec::new(), - disallowed_tools: Vec::new(), - timeout_secs: default_mcp_timeout_secs(), - auth: McpAuthConfig::None, - } - } -} - -#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] -#[serde(tag = "kind", rename_all = "snake_case")] -pub enum McpAuthConfig { - None, - BearerToken { token: String }, - Basic { username: String, password: String }, - Header { name: String, value: String }, - QueryParam { name: String, value: String }, -} - -impl Default for McpAuthConfig { - fn default() -> Self { - Self::None - } -} - -#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] -#[serde(default)] -pub struct McpClientIdentityConfig { - /// Client name sent during `initialize.clientInfo.name`. - #[serde(default = "default_mcp_client_name")] - pub name: String, - /// Client title sent during `initialize.clientInfo.title`. - #[serde(default = "default_mcp_client_title")] - pub title: String, - /// Client version sent during `initialize.clientInfo.version`. - #[serde(default = "default_mcp_client_version")] - pub version: String, -} - -fn default_mcp_client_name() -> String { - "openhuman-core".into() -} - -fn default_mcp_client_title() -> String { - "OpenHuman Core MCP Client".into() -} - -fn default_mcp_client_version() -> String { - env!("CARGO_PKG_VERSION").into() -} - -impl Default for McpClientIdentityConfig { - fn default() -> Self { - Self { - name: default_mcp_client_name(), - title: default_mcp_client_title(), - version: default_mcp_client_version(), - } - } -} - -#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] -#[serde(default)] -pub struct McpClientConfig { - /// When `true`, register the generic MCP bridge tools and expose - /// configured remote MCP servers to the agent runtime. - #[serde(default = "defaults::default_true")] - pub enabled: bool, - /// Named remote MCP servers accessible via `mcp_list_*` / - /// `mcp_call_tool`. - #[serde(default)] - pub servers: Vec, - /// Identity block sent during initialize. - #[serde(default)] - pub client_identity: McpClientIdentityConfig, - /// Optional auth/overrides for the MCP *registry* browse APIs (Smithery + - /// the official modelcontextprotocol/registry). Each value falls back to - /// the corresponding env var when unset (issue #3039 gap A6). - #[serde(default)] - pub registry_auth: McpRegistryAuthConfig, -} - -impl Default for McpClientConfig { - fn default() -> Self { - Self { - enabled: defaults::default_true(), - servers: Vec::new(), - client_identity: McpClientIdentityConfig::default(), - registry_auth: McpRegistryAuthConfig::default(), - } - } -} - -/// Registry-browse auth + endpoint overrides. Lets a user who hits Smithery -/// rate limits (or needs an authenticated official-registry endpoint) supply -/// credentials from the desktop app instead of editing env vars. Each field is -/// config-first with an env-var fallback so existing CI/Docker deployments that -/// only set env vars keep working unchanged. -/// -/// Secrets are write-only over RPC: the getter reports whether each secret is -/// *set* (a boolean) and never echoes the value back. -#[derive(Debug, Clone, Default, Serialize, Deserialize, JsonSchema)] -#[serde(default)] -pub struct McpRegistryAuthConfig { - /// Smithery API key. Falls back to `SMITHERY_API_KEY`. - #[serde(default)] - pub smithery_api_key: Option, - /// Base URL override for the official registry. Falls back to - /// `MCP_OFFICIAL_REGISTRY_BASE` (non-secret). - #[serde(default)] - pub mcp_official_base: Option, - /// Bearer token for the official registry. Falls back to - /// `MCP_OFFICIAL_REGISTRY_TOKEN`. - #[serde(default)] - pub mcp_official_token: Option, -} - -#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] -#[serde(default)] -pub struct SeltzConfig { - /// When `true`, register `seltz_search` as an agent tool. - #[serde(default)] - pub enabled: bool, - /// Seltz API key. Can also be set via `SELTZ_API_KEY` or - /// `OPENHUMAN_SELTZ_API_KEY` env var. - #[serde(default)] - pub api_key: Option, - /// Override the Seltz API base URL (default: `https://api.seltz.ai/v1`). - #[serde(default)] - pub api_url: Option, - /// Max results per query (1–20, default 10). - #[serde(default = "default_seltz_max_results")] - pub max_results: usize, - /// Per-request timeout in seconds (default 15). - #[serde(default = "default_seltz_timeout_secs")] - pub timeout_secs: u64, -} - -fn default_seltz_max_results() -> usize { - 10 -} - -fn default_seltz_timeout_secs() -> u64 { - 15 -} - -impl Default for SeltzConfig { - fn default() -> Self { - Self { - enabled: false, - api_key: None, - api_url: None, - max_results: default_seltz_max_results(), - timeout_secs: default_seltz_timeout_secs(), - } - } -} - -#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] -#[serde(default)] -pub struct SearxngConfig { - /// When `true`, register `searxng_search` as an agent and MCP tool. - #[serde(default)] - pub enabled: bool, - /// Base URL for the user's SearXNG instance. - #[serde(default = "default_searxng_base_url")] - pub base_url: String, - /// Max results per query (1-50, default 10). - #[serde(default = "default_searxng_max_results")] - pub max_results: usize, - /// Language code passed to SearXNG when a call omits `language`. - #[serde(default = "default_searxng_language")] - pub default_language: String, - /// Per-request timeout in seconds (default 10). - #[serde(default = "default_searxng_timeout_secs", alias = "timeout_seconds")] - pub timeout_secs: u64, -} - -fn default_searxng_base_url() -> String { - "http://localhost:8080".into() -} - -fn default_searxng_max_results() -> usize { - 10 -} - -fn default_searxng_language() -> String { - "en".into() -} - -fn default_searxng_timeout_secs() -> u64 { - 10 -} - -impl Default for SearxngConfig { - fn default() -> Self { - Self { - enabled: false, - base_url: default_searxng_base_url(), - max_results: default_searxng_max_results(), - default_language: default_searxng_language(), - timeout_secs: default_searxng_timeout_secs(), - } - } -} - -#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] -#[serde(default)] -pub struct WebSearchConfig { - #[serde(default = "default_web_search_max_results")] - pub max_results: usize, - #[serde(default = "default_web_search_timeout_secs")] - pub timeout_secs: u64, -} - -fn default_web_search_max_results() -> usize { - 5 -} - -fn default_web_search_timeout_secs() -> u64 { - 15 -} - -impl Default for WebSearchConfig { - fn default() -> Self { - Self { - max_results: default_web_search_max_results(), - timeout_secs: default_web_search_timeout_secs(), - } - } -} - -// ── Search engines ────────────────────────────────────────────────── -// -// Unified search-engine selector. Only one engine is active at a time -// (mirrors the LLM-provider API-key flow). The active engine governs -// which tools are registered: `disabled` → no search tools; `managed` → -// backend-proxied `web_search`; `parallel` → direct Parallel API tools -// (search/extract/chat/research/enrich/dataset); `brave` → direct Brave Search -// tools (web/news/images/videos); `querit` → direct Querit web search. - -pub const SEARCH_ENGINE_DISABLED: &str = "disabled"; -pub const SEARCH_ENGINE_MANAGED: &str = "managed"; -pub const SEARCH_ENGINE_PARALLEL: &str = "parallel"; -pub const SEARCH_ENGINE_BRAVE: &str = "brave"; -pub const SEARCH_ENGINE_QUERIT: &str = "querit"; - -fn default_search_engine() -> String { - SEARCH_ENGINE_MANAGED.into() -} - -fn default_search_max_results() -> usize { - 5 -} - -fn default_search_timeout_secs() -> u64 { - 15 -} - -/// Credentials for a BYO search engine. Mirrors the LLM provider API- -/// key shape — a simple `Option` that is considered configured -/// iff the trimmed value is non-empty. -#[derive(Debug, Clone, Default, Serialize, Deserialize, JsonSchema)] -#[serde(default)] -pub struct SearchEngineCredentials { - #[serde(default)] - pub api_key: Option, -} - -impl SearchEngineCredentials { - pub fn has_key(&self) -> bool { - self.api_key - .as_deref() - .map(|s| !s.trim().is_empty()) - .unwrap_or(false) - } - - pub fn key(&self) -> Option<&str> { - self.api_key.as_deref().and_then(|s| { - let t = s.trim(); - if t.is_empty() { - None - } else { - Some(t) - } - }) - } -} - -/// Unified search-engine configuration. Exactly one engine drives tool -/// registration at a time. `disabled` suppresses all search tools; `managed` is -/// the backend-proxied default and requires no key; `parallel`, `brave`, and -/// `querit` are BYO and require their own API key in the matching sub-block. -#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] -#[serde(default)] -pub struct SearchConfig { - /// Active search engine. One of [`SEARCH_ENGINE_DISABLED`], - /// [`SEARCH_ENGINE_MANAGED`], [`SEARCH_ENGINE_PARALLEL`], - /// [`SEARCH_ENGINE_BRAVE`], or [`SEARCH_ENGINE_QUERIT`]. Unknown values - /// fall back to managed at registration time. - #[serde(default = "default_search_engine")] - pub engine: String, - - /// Max results per query (1–20, default 5). - #[serde(default = "default_search_max_results")] - pub max_results: usize, - - /// Per-request timeout in seconds (default 15). - #[serde(default = "default_search_timeout_secs")] - pub timeout_secs: u64, - - /// Parallel API credentials (used when `engine = "parallel"`). - #[serde(default)] - pub parallel: SearchEngineCredentials, - - /// Brave Search credentials (used when `engine = "brave"`). - #[serde(default)] - pub brave: SearchEngineCredentials, - - /// Querit credentials (used when `engine = "querit"`). - #[serde(default)] - pub querit: SearchEngineCredentials, -} - -impl Default for SearchConfig { - fn default() -> Self { - Self { - engine: default_search_engine(), - max_results: default_search_max_results(), - timeout_secs: default_search_timeout_secs(), - parallel: SearchEngineCredentials::default(), - brave: SearchEngineCredentials::default(), - querit: SearchEngineCredentials::default(), - } - } -} - -/// Normalized search-engine enum used at tool-registration time. Falls -/// back to [`SearchEngine::Managed`] for unknown strings and for BYO -/// engines that have no API key configured. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum SearchEngine { - Disabled, - Managed, - Parallel, - Brave, - Querit, -} - -impl SearchConfig { - /// Resolve the *effective* engine after gating on API-key - /// availability. A BYO engine without a key silently falls back to - /// managed so the agent never ends up with zero search tools — the - /// UI surfaces the misconfiguration separately. - pub fn effective_engine(&self) -> SearchEngine { - match self.engine.trim().to_ascii_lowercase().as_str() { - SEARCH_ENGINE_DISABLED => SearchEngine::Disabled, - SEARCH_ENGINE_PARALLEL if self.parallel.has_key() => SearchEngine::Parallel, - SEARCH_ENGINE_BRAVE if self.brave.has_key() => SearchEngine::Brave, - SEARCH_ENGINE_QUERIT if self.querit.has_key() => SearchEngine::Querit, - _ => SearchEngine::Managed, - } - } - - pub fn requested_engine_str(&self) -> &str { - let trimmed = self.engine.trim(); - if trimmed.is_empty() { - SEARCH_ENGINE_MANAGED - } else { - trimmed - } - } -} - -#[cfg(test)] -mod search_config_tests { - use super::*; - - #[test] - fn defaults_to_managed() { - let cfg = SearchConfig::default(); - assert_eq!(cfg.effective_engine(), SearchEngine::Managed); - } - - #[test] - fn disabled_stays_disabled() { - let cfg = SearchConfig { - engine: SEARCH_ENGINE_DISABLED.into(), - ..Default::default() - }; - assert_eq!(cfg.effective_engine(), SearchEngine::Disabled); - } - - #[test] - fn parallel_requires_key() { - let mut cfg = SearchConfig { - engine: SEARCH_ENGINE_PARALLEL.into(), - ..Default::default() - }; - assert_eq!(cfg.effective_engine(), SearchEngine::Managed); - cfg.parallel.api_key = Some(" ".into()); - assert_eq!(cfg.effective_engine(), SearchEngine::Managed); - cfg.parallel.api_key = Some("real".into()); - assert_eq!(cfg.effective_engine(), SearchEngine::Parallel); - } - - #[test] - fn brave_requires_key() { - let mut cfg = SearchConfig { - engine: SEARCH_ENGINE_BRAVE.into(), - ..Default::default() - }; - assert_eq!(cfg.effective_engine(), SearchEngine::Managed); - cfg.brave.api_key = Some("real".into()); - assert_eq!(cfg.effective_engine(), SearchEngine::Brave); - } - - #[test] - fn querit_requires_key() { - let mut cfg = SearchConfig { - engine: SEARCH_ENGINE_QUERIT.into(), - ..Default::default() - }; - assert_eq!(cfg.effective_engine(), SearchEngine::Managed); - cfg.querit.api_key = Some("real".into()); - assert_eq!(cfg.effective_engine(), SearchEngine::Querit); - } - - #[test] - fn http_request_defaults_to_allow_all() { - // Web research works out of the box: the default allowlist is the - // wildcard. The SSRF guard (url_guard) still blocks local/private - // hosts regardless, so this only opens public sites. - let cfg = HttpRequestConfig::default(); - assert_eq!(cfg.allowed_domains, vec!["*".to_string()]); - assert_eq!(cfg.max_response_size, 1_000_000); - assert_eq!(cfg.timeout_secs, 30); - } - - #[test] - fn unknown_engine_falls_back_to_managed() { - let cfg = SearchConfig { - engine: "duckduckgo".into(), - ..Default::default() - }; - assert_eq!(cfg.effective_engine(), SearchEngine::Managed); - } -} - -/// Composio integration routing mode for the main backend-proxied flow. -/// -/// `"backend"` (default) — every Composio call (toolkits, connections, -/// authorize, tools, execute, triggers, …) is proxied through the -/// OpenHuman backend (`api.tinyhumans.ai/agent-integrations/composio/*`). -/// The backend owns the Composio API key, allowlist, billing/margin, and -/// HMAC-verified trigger webhooks fanned out over socket.io. -/// -/// `"direct"` — the core hits `https://backend.composio.dev/api/v{2,3}` -/// directly with the user's own Composio API key (BYO). Tool execution is -/// synchronous and works fully sovereign. Real-time **trigger webhooks** -/// (the async push surface that the backend currently mediates via -/// socket.io) do not work in direct mode — the user has to enable them -/// out-of-band on Composio's dashboard and configure their own webhook -/// sink. See `composio/tools/direct.rs` for the underlying client. -pub const COMPOSIO_MODE_BACKEND: &str = "backend"; -pub const COMPOSIO_MODE_DIRECT: &str = "direct"; - -fn default_composio_mode() -> String { - COMPOSIO_MODE_BACKEND.into() -} - -#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] -#[serde(default)] -pub struct ComposioConfig { - #[serde(default)] - pub enabled: bool, - #[serde(default = "default_entity_id")] - pub entity_id: String, - /// When true, the triage pipeline is disabled for all Composio - /// triggers. Triggers are still recorded to history. - /// Overrides `triage_disabled_toolkits` when set. - #[serde(default)] - pub triage_disabled: bool, - /// Per-toolkit triage opt-out list. Toolkit slugs listed here - /// skip the LLM triage turn — triggers are still recorded to - /// history. Case-insensitive match against the incoming toolkit - /// field (e.g. `["gmail", "slack"]`). - #[serde(default)] - pub triage_disabled_toolkits: Vec, - - /// Routing mode for the main Composio integration flow. One of - /// [`COMPOSIO_MODE_BACKEND`] (default — proxied through the OpenHuman - /// backend) or [`COMPOSIO_MODE_DIRECT`] (BYO API key, calls - /// `backend.composio.dev` directly). - /// - /// The user-provided API key for direct mode is *not* stored in the - /// TOML — it lives in the encrypted keychain via - /// [`crate::openhuman::credentials`] under the - /// `composio-direct` provider slot. We only persist the mode here so - /// the factory can pick the right client at construction time. - #[serde(default = "default_composio_mode")] - pub mode: String, - - /// **Deprecated for direct storage** — present so users that hand-edit - /// `config.toml` can drop the key in here. The factory still prefers - /// the keychain-backed value over this field. Default `None`. - #[serde(default)] - pub api_key: Option, -} - -fn default_entity_id() -> String { - "default".into() -} - -impl Default for ComposioConfig { - fn default() -> Self { - Self { - enabled: false, - entity_id: default_entity_id(), - triage_disabled: false, - triage_disabled_toolkits: Vec::new(), - mode: default_composio_mode(), - api_key: None, - } - } -} - -#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] -#[serde(default)] -pub struct SecretsConfig { - #[serde(default = "default_true")] - pub encrypt: bool, -} - -impl Default for SecretsConfig { - fn default() -> Self { - Self { - encrypt: defaults::default_true(), - } - } -} - -// ── Native computer control (mouse + keyboard) ───────────────────── - -#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Default)] -#[serde(default)] -pub struct ComputerControlConfig { - /// Master toggle for mouse and keyboard tools. Disabled by default — - /// the user must explicitly opt in. - #[serde(default)] - pub enabled: bool, - /// Opt-in for the mutating `ax_interact` actions (`press` / `set_value`). - /// Disabled by default: the read-only `list` action is always available, - /// but actuating arbitrary app controls / typing into arbitrary fields - /// requires explicit user opt-in (mirrors `enabled` for mouse/keyboard). - #[serde(default)] - pub ax_interact_mutations: bool, -} - -// ── Agent integration tools (backend-proxied) ─────────────────────── - -/// Routing mode for an integration that supports a backend-managed -/// default and an optional BYO ("bring your own API key") override. -pub const INTEGRATION_MODE_MANAGED: &str = "managed"; -pub const INTEGRATION_MODE_BYO: &str = "byo"; - -fn default_integration_mode() -> String { - INTEGRATION_MODE_MANAGED.into() -} - -/// Per-integration toggle. -/// -/// Defaults to **OpenHuman-managed** routing: the OpenHuman backend -/// owns the upstream API key, billing, and rate limits — the user only -/// has to flip `enabled` to make the tools available. -/// -/// Users who hold their own provider account can switch `mode` to -/// `"byo"` and supply `api_key`. In that case tools register **iff** -/// the integration is `enabled = true` **and** `api_key` is a non-empty -/// trimmed string — see [`IntegrationToggle::is_active`]. This mirrors -/// the rule the Settings UI surfaces to the user ("loaded iff API key -/// is provided and enabled"). -#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] -#[serde(default)] -pub struct IntegrationToggle { - #[serde(default = "defaults::default_true")] - pub enabled: bool, - /// Routing mode. One of [`INTEGRATION_MODE_MANAGED`] (default — the - /// OpenHuman backend proxies the call) or [`INTEGRATION_MODE_BYO`] - /// (the user's own API key is required and tools refuse to - /// register without it). - #[serde(default = "default_integration_mode")] - pub mode: String, - /// API key for [`INTEGRATION_MODE_BYO`]. Ignored in managed mode. - /// Trimmed empty / `None` ⇒ no BYO key configured. - #[serde(default)] - pub api_key: Option, -} - -impl IntegrationToggle { - /// Returns true when the integration should be wired up at tool- - /// registration time. Managed mode requires only `enabled`; BYO - /// mode requires both `enabled` and a non-empty `api_key`. - pub fn is_active(&self) -> bool { - if !self.enabled { - return false; - } - match self.mode.as_str() { - INTEGRATION_MODE_BYO => self - .api_key - .as_deref() - .map(|s| !s.trim().is_empty()) - .unwrap_or(false), - _ => true, - } - } -} - -impl Default for IntegrationToggle { - fn default() -> Self { - Self { - enabled: defaults::default_true(), - mode: default_integration_mode(), - api_key: None, - } - } -} - -fn default_polymarket_gamma_base_url() -> String { - "https://gamma-api.polymarket.com".into() -} - -fn default_polymarket_clob_base_url() -> String { - "https://clob.polymarket.com".into() -} - -fn default_polymarket_timeout_secs() -> u64 { - 15 -} - -fn default_polymarket_enabled() -> bool { - false -} - -fn default_polymarket_polygon_rpc_url() -> String { - "https://polygon-rpc.com".into() -} - -fn default_polymarket_usdc_contract() -> String { - "0x2791Bca1f2de4661ED88A30C99A7a9449Aa84174".into() -} - -fn default_polymarket_clob_exchange_contract() -> String { - "0x4bFb41d5B3570DeFd03C39a9A4D8dE6Bd8B8982E".into() -} - -/// Polymarket CLOB L2 credentials (api_key + HMAC secret + passphrase). -/// -/// Single source of truth for both the config TOML surface AND the -/// in-process HTTP signing path — `polymarket.rs` / `clob_auth.rs` use -/// this type directly so there is no parallel internal struct + From-impl -/// glue to keep in sync. -#[derive(Clone, Default, Serialize, Deserialize, JsonSchema, PartialEq, Eq)] -pub struct PolymarketClobCredentials { - pub api_key: String, - pub secret: String, - pub passphrase: String, -} - -impl PolymarketClobCredentials { - /// Returns true iff all three credential fields are non-empty after - /// trimming whitespace. - pub fn is_complete(&self) -> bool { - !(self.api_key.trim().is_empty() - || self.secret.trim().is_empty() - || self.passphrase.trim().is_empty()) - } -} - -impl std::fmt::Debug for PolymarketClobCredentials { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("PolymarketClobCredentials") - .field("api_key", &"") - .field("secret", &"") - .field("passphrase", &"") - .finish() - } -} - -/// Polymarket API configuration (read + write actions via CLOB). -#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] -#[serde(default)] -pub struct PolymarketConfig { - #[serde(default = "default_polymarket_enabled")] - pub enabled: bool, - #[serde(default = "default_polymarket_gamma_base_url")] - pub gamma_base_url: String, - #[serde(default = "default_polymarket_clob_base_url")] - pub clob_base_url: String, - #[serde(default = "default_polymarket_timeout_secs")] - pub timeout_secs: u64, - #[serde(default)] - pub eoa_address: Option, - #[serde(default = "default_polymarket_polygon_rpc_url")] - pub polygon_rpc_url: String, - #[serde(default = "default_polymarket_usdc_contract")] - pub usdc_contract: String, - #[serde(default = "default_polymarket_clob_exchange_contract")] - pub clob_exchange_contract: String, - /// Persisted L2 CLOB credentials (api_key, secret, passphrase) derived - /// from the user's EOA via the L1 EIP-712 handshake against - /// `/auth/api-key`. - /// - /// **Threat model — temporary plaintext.** Stored in the TOML config - /// file in plaintext until #1900 lands the `SecretStore` encryption - /// surface. Anything that reads the config (other tools, agents, - /// disk-snapshot exfil) can exfiltrate the HMAC secret. Acceptable - /// trade-off for a Beta feature that is off by default - /// (`integrations.polymarket.enabled = false`) and explicitly - /// opt-in. Migrate to SecretStore the moment #1900 merges — the in- - /// memory cache (`PolymarketTool::cached_clob_credentials`) remains - /// authoritative within a single process so the wire-level behaviour - /// is unchanged on the migration. - #[serde(default)] - pub derived_clob_credentials: Option, -} - -impl Default for PolymarketConfig { - fn default() -> Self { - Self { - enabled: default_polymarket_enabled(), - gamma_base_url: default_polymarket_gamma_base_url(), - clob_base_url: default_polymarket_clob_base_url(), - timeout_secs: default_polymarket_timeout_secs(), - eoa_address: None, - polygon_rpc_url: default_polymarket_polygon_rpc_url(), - usdc_contract: default_polymarket_usdc_contract(), - clob_exchange_contract: default_polymarket_clob_exchange_contract(), - derived_clob_credentials: None, - } - } -} - -/// Agent integration tools that proxy through the backend API. -/// -/// The backend URL and auth token are **not** configurable here — -/// they're always resolved from the core `config.api_url` plus the -/// app-session JWT. -/// Composio in particular is unconditionally enabled and has no toggle: -/// as long as the user is signed in, composio tools are available. -/// -/// The per-tool `apify`, `twilio`, `google_places`, `parallel`, and `tinyfish` -/// flags below are preserved because those integrations incur per-call -/// costs that the user may legitimately want to turn off; composio -/// costs are metered server-side, so there is no client-side toggle -/// for it. -#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Default)] -#[serde(default)] -pub struct IntegrationsConfig { - /// Apify actor execution and scraper integration. - #[serde(default)] - pub apify: IntegrationToggle, - - /// Twilio phone-call integration. - #[serde(default)] - pub twilio: IntegrationToggle, - - /// Google Places location search integration. - #[serde(default)] - pub google_places: IntegrationToggle, - - /// Parallel web search & content extraction integration. - #[serde(default)] - pub parallel: IntegrationToggle, - - /// TinyFish web search, fetch, and browser automation integration. - #[serde(default)] - pub tinyfish: IntegrationToggle, - - /// Stock-price / market-data integration (Alpha Vantage on the backend). - #[serde(default)] - pub stock_prices: IntegrationToggle, - - /// Polymarket browse + trading APIs (Gamma + CLOB). - #[serde(default)] - pub polymarket: PolymarketConfig, -} - -#[cfg(test)] -mod integration_toggle_tests { - use super::*; - - #[test] - fn managed_mode_active_when_enabled_without_key() { - let toggle = IntegrationToggle { - enabled: true, - mode: INTEGRATION_MODE_MANAGED.into(), - api_key: None, - }; - assert!(toggle.is_active()); - } - - #[test] - fn managed_mode_inactive_when_disabled() { - let toggle = IntegrationToggle { - enabled: false, - mode: INTEGRATION_MODE_MANAGED.into(), - api_key: Some("ignored".into()), - }; - assert!(!toggle.is_active()); - } - - #[test] - fn byo_mode_requires_non_empty_key() { - let mut toggle = IntegrationToggle { - enabled: true, - mode: INTEGRATION_MODE_BYO.into(), - api_key: None, - }; - assert!(!toggle.is_active(), "missing key"); - - toggle.api_key = Some(" ".into()); - assert!(!toggle.is_active(), "whitespace key"); - - toggle.api_key = Some("real-key".into()); - assert!(toggle.is_active()); - } - - #[test] - fn byo_mode_inactive_when_disabled_even_with_key() { - let toggle = IntegrationToggle { - enabled: false, - mode: INTEGRATION_MODE_BYO.into(), - api_key: Some("real-key".into()), - }; - assert!(!toggle.is_active()); - } - - #[test] - fn default_is_managed_and_active() { - let toggle = IntegrationToggle::default(); - assert_eq!(toggle.mode, INTEGRATION_MODE_MANAGED); - assert!(toggle.api_key.is_none()); - assert!(toggle.is_active()); - } -} diff --git a/src/openhuman/config/schema/tools/browser.rs b/src/openhuman/config/schema/tools/browser.rs new file mode 100644 index 0000000000..4bf40cbf90 --- /dev/null +++ b/src/openhuman/config/schema/tools/browser.rs @@ -0,0 +1,96 @@ +//! Browser and computer-use config types. + +use super::super::defaults; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +#[serde(default)] +pub struct BrowserComputerUseConfig { + #[serde(default = "default_browser_computer_use_endpoint")] + pub endpoint: String, + #[serde(default = "default_browser_computer_use_timeout_ms")] + pub timeout_ms: u64, + #[serde(default)] + pub allow_remote_endpoint: bool, + #[serde(default)] + pub window_allowlist: Vec, + #[serde(default)] + pub max_coordinate_x: Option, + #[serde(default)] + pub max_coordinate_y: Option, +} + +fn default_browser_computer_use_endpoint() -> String { + "http://127.0.0.1:8787/v1/actions".into() +} + +fn default_browser_computer_use_timeout_ms() -> u64 { + 15_000 +} + +impl Default for BrowserComputerUseConfig { + fn default() -> Self { + Self { + endpoint: default_browser_computer_use_endpoint(), + timeout_ms: default_browser_computer_use_timeout_ms(), + allow_remote_endpoint: false, + window_allowlist: Vec::new(), + max_coordinate_x: None, + max_coordinate_y: None, + } + } +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +#[serde(default)] +pub struct BrowserConfig { + #[serde(default)] + pub enabled: bool, + /// DEPRECATED: the browser tool now shares the unified web-access host list + /// in `[http_request].allowed_domains` (see `tools::ops::all_tools_with_runtime`). + /// Still parsed for backward compatibility but no longer gates browser + /// navigation. Manage allowed hosts via Settings → Search → Allowed websites; + /// browser allow-all remains gated by `OPENHUMAN_BROWSER_ALLOW_ALL`. + #[serde(default)] + pub allowed_domains: Vec, + #[serde(default)] + pub session_name: Option, + #[serde(default = "default_browser_backend")] + pub backend: String, + #[serde(default = "default_true")] + pub native_headless: bool, + #[serde(default = "default_browser_webdriver_url")] + pub native_webdriver_url: String, + #[serde(default)] + pub native_chrome_path: Option, + #[serde(default)] + pub computer_use: BrowserComputerUseConfig, +} + +fn default_true() -> bool { + defaults::default_true() +} + +fn default_browser_backend() -> String { + "agent_browser".into() +} + +fn default_browser_webdriver_url() -> String { + "http://127.0.0.1:9515".into() +} + +impl Default for BrowserConfig { + fn default() -> Self { + Self { + enabled: false, + allowed_domains: Vec::new(), + session_name: None, + backend: default_browser_backend(), + native_headless: default_true(), + native_webdriver_url: default_browser_webdriver_url(), + native_chrome_path: None, + computer_use: BrowserComputerUseConfig::default(), + } + } +} diff --git a/src/openhuman/config/schema/tools/http.rs b/src/openhuman/config/schema/tools/http.rs new file mode 100644 index 0000000000..ee7ccec860 --- /dev/null +++ b/src/openhuman/config/schema/tools/http.rs @@ -0,0 +1,81 @@ +//! HTTP request and curl download config types. + +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +#[serde(default)] +pub struct HttpRequestConfig { + /// Hosts the assistant may open/read via `web_fetch` / `curl`. An exact + /// host also matches its subdomains; `"*"` allows all public sites; an + /// empty list blocks all web access. Defaults to `["*"]` so web research + /// works out of the box — the SSRF guard still blocks local/private hosts + /// regardless. Narrow this via Settings → Search → Allowed websites. + #[serde(default = "default_http_allowed_domains")] + pub allowed_domains: Vec, + #[serde(default = "default_http_max_response_size")] + pub max_response_size: usize, + #[serde(default = "default_http_timeout_secs")] + pub timeout_secs: u64, +} + +impl Default for HttpRequestConfig { + fn default() -> Self { + Self { + allowed_domains: default_http_allowed_domains(), + max_response_size: default_http_max_response_size(), + timeout_secs: default_http_timeout_secs(), + } + } +} + +fn default_http_allowed_domains() -> Vec { + vec!["*".to_string()] +} + +fn default_http_max_response_size() -> usize { + 1_000_000 +} + +fn default_http_timeout_secs() -> u64 { + 30 +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +#[serde(default)] +pub struct CurlConfig { + /// Subdirectory under `workspace_dir` where downloads land. Inputs + /// are resolved relative to this root; absolute paths and `..` + /// segments are rejected. + #[serde(default = "default_curl_dest_subdir")] + pub dest_subdir: String, + /// Hard byte ceiling per download. Streaming aborts and the + /// partial file is removed if exceeded. + #[serde(default = "default_curl_max_download_bytes")] + pub max_download_bytes: u64, + /// Per-request timeout in seconds. + #[serde(default = "default_curl_timeout_secs")] + pub timeout_secs: u64, +} + +fn default_curl_dest_subdir() -> String { + "downloads".into() +} + +fn default_curl_max_download_bytes() -> u64 { + 50 * 1024 * 1024 +} + +fn default_curl_timeout_secs() -> u64 { + 120 +} + +impl Default for CurlConfig { + fn default() -> Self { + Self { + dest_subdir: default_curl_dest_subdir(), + max_download_bytes: default_curl_max_download_bytes(), + timeout_secs: default_curl_timeout_secs(), + } + } +} diff --git a/src/openhuman/config/schema/tools/integrations.rs b/src/openhuman/config/schema/tools/integrations.rs new file mode 100644 index 0000000000..459b691c42 --- /dev/null +++ b/src/openhuman/config/schema/tools/integrations.rs @@ -0,0 +1,403 @@ +//! Composio, secrets, computer control, and agent integration toggle types. + +use super::super::defaults; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; + +/// Composio integration routing mode for the main backend-proxied flow. +/// +/// `"backend"` (default) — every Composio call (toolkits, connections, +/// authorize, tools, execute, triggers, …) is proxied through the +/// OpenHuman backend (`api.tinyhumans.ai/agent-integrations/composio/*`). +/// The backend owns the Composio API key, allowlist, billing/margin, and +/// HMAC-verified trigger webhooks fanned out over socket.io. +/// +/// `"direct"` — the core hits `https://backend.composio.dev/api/v{2,3}` +/// directly with the user's own Composio API key (BYO). Tool execution is +/// synchronous and works fully sovereign. Real-time **trigger webhooks** +/// (the async push surface that the backend currently mediates via +/// socket.io) do not work in direct mode — the user has to enable them +/// out-of-band on Composio's dashboard and configure their own webhook +/// sink. See `composio/tools/direct.rs` for the underlying client. +pub const COMPOSIO_MODE_BACKEND: &str = "backend"; +pub const COMPOSIO_MODE_DIRECT: &str = "direct"; + +fn default_composio_mode() -> String { + COMPOSIO_MODE_BACKEND.into() +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +#[serde(default)] +pub struct ComposioConfig { + #[serde(default)] + pub enabled: bool, + #[serde(default = "default_entity_id")] + pub entity_id: String, + /// When true, the triage pipeline is disabled for all Composio + /// triggers. Triggers are still recorded to history. + /// Overrides `triage_disabled_toolkits` when set. + #[serde(default)] + pub triage_disabled: bool, + /// Per-toolkit triage opt-out list. Toolkit slugs listed here + /// skip the LLM triage turn — triggers are still recorded to + /// history. Case-insensitive match against the incoming toolkit + /// field (e.g. `["gmail", "slack"]`). + #[serde(default)] + pub triage_disabled_toolkits: Vec, + + /// Routing mode for the main Composio integration flow. One of + /// [`COMPOSIO_MODE_BACKEND`] (default — proxied through the OpenHuman + /// backend) or [`COMPOSIO_MODE_DIRECT`] (BYO API key, calls + /// `backend.composio.dev` directly). + /// + /// The user-provided API key for direct mode is *not* stored in the + /// TOML — it lives in the encrypted keychain via + /// [`crate::openhuman::credentials`] under the + /// `composio-direct` provider slot. We only persist the mode here so + /// the factory can pick the right client at construction time. + #[serde(default = "default_composio_mode")] + pub mode: String, + + /// **Deprecated for direct storage** — present so users that hand-edit + /// `config.toml` can drop the key in here. The factory still prefers + /// the keychain-backed value over this field. Default `None`. + #[serde(default)] + pub api_key: Option, +} + +fn default_entity_id() -> String { + "default".into() +} + +impl Default for ComposioConfig { + fn default() -> Self { + Self { + enabled: false, + entity_id: default_entity_id(), + triage_disabled: false, + triage_disabled_toolkits: Vec::new(), + mode: default_composio_mode(), + api_key: None, + } + } +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +#[serde(default)] +pub struct SecretsConfig { + #[serde(default = "defaults::default_true")] + pub encrypt: bool, +} + +impl Default for SecretsConfig { + fn default() -> Self { + Self { + encrypt: defaults::default_true(), + } + } +} + +// ── Native computer control (mouse + keyboard) ───────────────────── + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Default)] +#[serde(default)] +pub struct ComputerControlConfig { + /// Master toggle for mouse and keyboard tools. Disabled by default — + /// the user must explicitly opt in. + #[serde(default)] + pub enabled: bool, + /// Opt-in for the mutating `ax_interact` actions (`press` / `set_value`). + /// Disabled by default: the read-only `list` action is always available, + /// but actuating arbitrary app controls / typing into arbitrary fields + /// requires explicit user opt-in (mirrors `enabled` for mouse/keyboard). + #[serde(default)] + pub ax_interact_mutations: bool, +} + +// ── Agent integration tools (backend-proxied) ─────────────────────── + +/// Routing mode for an integration that supports a backend-managed +/// default and an optional BYO ("bring your own API key") override. +pub const INTEGRATION_MODE_MANAGED: &str = "managed"; +pub const INTEGRATION_MODE_BYO: &str = "byo"; + +fn default_integration_mode() -> String { + INTEGRATION_MODE_MANAGED.into() +} + +/// Per-integration toggle. +/// +/// Defaults to **OpenHuman-managed** routing: the OpenHuman backend +/// owns the upstream API key, billing, and rate limits — the user only +/// has to flip `enabled` to make the tools available. +/// +/// Users who hold their own provider account can switch `mode` to +/// `"byo"` and supply `api_key`. In that case tools register **iff** +/// the integration is `enabled = true` **and** `api_key` is a non-empty +/// trimmed string — see [`IntegrationToggle::is_active`]. This mirrors +/// the rule the Settings UI surfaces to the user ("loaded iff API key +/// is provided and enabled"). +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +#[serde(default)] +pub struct IntegrationToggle { + #[serde(default = "defaults::default_true")] + pub enabled: bool, + /// Routing mode. One of [`INTEGRATION_MODE_MANAGED`] (default — the + /// OpenHuman backend proxies the call) or [`INTEGRATION_MODE_BYO`] + /// (the user's own API key is required and tools refuse to + /// register without it). + #[serde(default = "default_integration_mode")] + pub mode: String, + /// API key for [`INTEGRATION_MODE_BYO`]. Ignored in managed mode. + /// Trimmed empty / `None` ⇒ no BYO key configured. + #[serde(default)] + pub api_key: Option, +} + +impl IntegrationToggle { + /// Returns true when the integration should be wired up at tool- + /// registration time. Managed mode requires only `enabled`; BYO + /// mode requires both `enabled` and a non-empty `api_key`. + pub fn is_active(&self) -> bool { + if !self.enabled { + return false; + } + match self.mode.as_str() { + INTEGRATION_MODE_BYO => self + .api_key + .as_deref() + .map(|s| !s.trim().is_empty()) + .unwrap_or(false), + _ => true, + } + } +} + +impl Default for IntegrationToggle { + fn default() -> Self { + Self { + enabled: defaults::default_true(), + mode: default_integration_mode(), + api_key: None, + } + } +} + +fn default_polymarket_gamma_base_url() -> String { + "https://gamma-api.polymarket.com".into() +} + +fn default_polymarket_clob_base_url() -> String { + "https://clob.polymarket.com".into() +} + +fn default_polymarket_timeout_secs() -> u64 { + 15 +} + +fn default_polymarket_enabled() -> bool { + false +} + +fn default_polymarket_polygon_rpc_url() -> String { + "https://polygon-rpc.com".into() +} + +fn default_polymarket_usdc_contract() -> String { + "0x2791Bca1f2de4661ED88A30C99A7a9449Aa84174".into() +} + +fn default_polymarket_clob_exchange_contract() -> String { + "0x4bFb41d5B3570DeFd03C39a9A4D8dE6Bd8B8982E".into() +} + +/// Polymarket CLOB L2 credentials (api_key + HMAC secret + passphrase). +/// +/// Single source of truth for both the config TOML surface AND the +/// in-process HTTP signing path — `polymarket.rs` / `clob_auth.rs` use +/// this type directly so there is no parallel internal struct + From-impl +/// glue to keep in sync. +#[derive(Clone, Default, Serialize, Deserialize, JsonSchema, PartialEq, Eq)] +pub struct PolymarketClobCredentials { + pub api_key: String, + pub secret: String, + pub passphrase: String, +} + +impl PolymarketClobCredentials { + /// Returns true iff all three credential fields are non-empty after + /// trimming whitespace. + pub fn is_complete(&self) -> bool { + !(self.api_key.trim().is_empty() + || self.secret.trim().is_empty() + || self.passphrase.trim().is_empty()) + } +} + +impl std::fmt::Debug for PolymarketClobCredentials { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("PolymarketClobCredentials") + .field("api_key", &"") + .field("secret", &"") + .field("passphrase", &"") + .finish() + } +} + +/// Polymarket API configuration (read + write actions via CLOB). +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +#[serde(default)] +pub struct PolymarketConfig { + #[serde(default = "default_polymarket_enabled")] + pub enabled: bool, + #[serde(default = "default_polymarket_gamma_base_url")] + pub gamma_base_url: String, + #[serde(default = "default_polymarket_clob_base_url")] + pub clob_base_url: String, + #[serde(default = "default_polymarket_timeout_secs")] + pub timeout_secs: u64, + #[serde(default)] + pub eoa_address: Option, + #[serde(default = "default_polymarket_polygon_rpc_url")] + pub polygon_rpc_url: String, + #[serde(default = "default_polymarket_usdc_contract")] + pub usdc_contract: String, + #[serde(default = "default_polymarket_clob_exchange_contract")] + pub clob_exchange_contract: String, + /// Persisted L2 CLOB credentials (api_key, secret, passphrase) derived + /// from the user's EOA via the L1 EIP-712 handshake against + /// `/auth/api-key`. + /// + /// **Threat model — temporary plaintext.** Stored in the TOML config + /// file in plaintext until #1900 lands the `SecretStore` encryption + /// surface. Anything that reads the config (other tools, agents, + /// disk-snapshot exfil) can exfiltrate the HMAC secret. Acceptable + /// trade-off for a Beta feature that is off by default + /// (`integrations.polymarket.enabled = false`) and explicitly + /// opt-in. Migrate to SecretStore the moment #1900 merges — the in- + /// memory cache (`PolymarketTool::cached_clob_credentials`) remains + /// authoritative within a single process so the wire-level behaviour + /// is unchanged on the migration. + #[serde(default)] + pub derived_clob_credentials: Option, +} + +impl Default for PolymarketConfig { + fn default() -> Self { + Self { + enabled: default_polymarket_enabled(), + gamma_base_url: default_polymarket_gamma_base_url(), + clob_base_url: default_polymarket_clob_base_url(), + timeout_secs: default_polymarket_timeout_secs(), + eoa_address: None, + polygon_rpc_url: default_polymarket_polygon_rpc_url(), + usdc_contract: default_polymarket_usdc_contract(), + clob_exchange_contract: default_polymarket_clob_exchange_contract(), + derived_clob_credentials: None, + } + } +} + +/// Agent integration tools that proxy through the backend API. +/// +/// The backend URL and auth token are **not** configurable here — +/// they're always resolved from the core `config.api_url` plus the +/// app-session JWT. +/// Composio in particular is unconditionally enabled and has no toggle: +/// as long as the user is signed in, composio tools are available. +/// +/// The per-tool `apify`, `twilio`, `google_places`, `parallel`, and `tinyfish` +/// flags below are preserved because those integrations incur per-call +/// costs that the user may legitimately want to turn off; composio +/// costs are metered server-side, so there is no client-side toggle +/// for it. +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema, Default)] +#[serde(default)] +pub struct IntegrationsConfig { + /// Apify actor execution and scraper integration. + #[serde(default)] + pub apify: IntegrationToggle, + + /// Twilio phone-call integration. + #[serde(default)] + pub twilio: IntegrationToggle, + + /// Google Places location search integration. + #[serde(default)] + pub google_places: IntegrationToggle, + + /// Parallel web search & content extraction integration. + #[serde(default)] + pub parallel: IntegrationToggle, + + /// TinyFish web search, fetch, and browser automation integration. + #[serde(default)] + pub tinyfish: IntegrationToggle, + + /// Stock-price / market-data integration (Alpha Vantage on the backend). + #[serde(default)] + pub stock_prices: IntegrationToggle, + + /// Polymarket browse + trading APIs (Gamma + CLOB). + #[serde(default)] + pub polymarket: PolymarketConfig, +} + +#[cfg(test)] +mod integration_toggle_tests { + use super::*; + + #[test] + fn managed_mode_active_when_enabled_without_key() { + let toggle = IntegrationToggle { + enabled: true, + mode: INTEGRATION_MODE_MANAGED.into(), + api_key: None, + }; + assert!(toggle.is_active()); + } + + #[test] + fn managed_mode_inactive_when_disabled() { + let toggle = IntegrationToggle { + enabled: false, + mode: INTEGRATION_MODE_MANAGED.into(), + api_key: Some("ignored".into()), + }; + assert!(!toggle.is_active()); + } + + #[test] + fn byo_mode_requires_non_empty_key() { + let mut toggle = IntegrationToggle { + enabled: true, + mode: INTEGRATION_MODE_BYO.into(), + api_key: None, + }; + assert!(!toggle.is_active(), "missing key"); + + toggle.api_key = Some(" ".into()); + assert!(!toggle.is_active(), "whitespace key"); + + toggle.api_key = Some("real-key".into()); + assert!(toggle.is_active()); + } + + #[test] + fn byo_mode_inactive_when_disabled_even_with_key() { + let toggle = IntegrationToggle { + enabled: false, + mode: INTEGRATION_MODE_BYO.into(), + api_key: Some("real-key".into()), + }; + assert!(!toggle.is_active()); + } + + #[test] + fn default_is_managed_and_active() { + let toggle = IntegrationToggle::default(); + assert_eq!(toggle.mode, INTEGRATION_MODE_MANAGED); + assert!(toggle.api_key.is_none()); + assert!(toggle.is_active()); + } +} diff --git a/src/openhuman/config/schema/tools/mcp.rs b/src/openhuman/config/schema/tools/mcp.rs new file mode 100644 index 0000000000..a083cd9e2a --- /dev/null +++ b/src/openhuman/config/schema/tools/mcp.rs @@ -0,0 +1,220 @@ +//! MCP client, server, auth, and GitBooks config types. + +use super::super::defaults; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +#[serde(default)] +pub struct GitbooksConfig { + /// When `true`, register `gitbooks_search` and `gitbooks_get_page`. + #[serde(default = "defaults::default_true")] + pub enabled: bool, + /// MCP endpoint URL for the OpenHuman GitBook docs. + #[serde(default = "default_gitbooks_endpoint")] + pub endpoint: String, + /// Per-request timeout in seconds. + #[serde(default = "default_gitbooks_timeout_secs")] + pub timeout_secs: u64, +} + +fn default_gitbooks_endpoint() -> String { + "https://tinyhumans.gitbook.io/openhuman/~gitbook/mcp".into() +} + +fn default_gitbooks_timeout_secs() -> u64 { + 30 +} + +impl Default for GitbooksConfig { + fn default() -> Self { + Self { + enabled: defaults::default_true(), + endpoint: default_gitbooks_endpoint(), + timeout_secs: default_gitbooks_timeout_secs(), + } + } +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +#[serde(default)] +pub struct McpServerConfig { + /// Stable server slug used by the agent-facing bridge tools. + #[serde(default)] + pub name: String, + /// MCP endpoint URL. Current implementation supports stateless + /// Streamable HTTP / JSON responses. + #[serde(default)] + pub endpoint: String, + /// Optional stdio command for local MCP servers. When set, the + /// client launches this command as a subprocess and speaks newline- + /// delimited JSON-RPC over stdin/stdout per the MCP stdio transport. + #[serde(default)] + pub command: String, + /// Command-line arguments for stdio MCP servers. + #[serde(default)] + pub args: Vec, + /// Extra environment variables for stdio MCP servers. MCP stdio auth + /// is typically passed this way. + #[serde(default)] + pub env: HashMap, + /// Optional working directory for stdio MCP servers. + #[serde(default)] + pub cwd: Option, + /// Optional human-readable description shown in bridge tool output. + #[serde(default)] + pub description: Option, + /// Whether this server should be exposed to the MCP bridge tools. + #[serde(default = "defaults::default_true")] + pub enabled: bool, + /// Exact remote tool names this server may expose through the generic + /// MCP bridge. Empty means all remote tools are allowed unless they + /// appear in `disallowed_tools`. + #[serde(default)] + pub allowed_tools: Vec, + /// Exact remote tool names that should always be hidden and blocked. + /// This denylist takes precedence over `allowed_tools`. + #[serde(default)] + pub disallowed_tools: Vec, + /// Per-request timeout in seconds. + #[serde(default = "default_mcp_timeout_secs")] + pub timeout_secs: u64, + /// Optional auth strategy applied to outbound requests for this + /// server. Useful for API-key and pre-provisioned bearer-token + /// flows; interactive OAuth discovery is handled by the client + /// transport separately when a server returns an auth challenge. + #[serde(default)] + pub auth: McpAuthConfig, +} + +fn default_mcp_timeout_secs() -> u64 { + 30 +} + +impl Default for McpServerConfig { + fn default() -> Self { + Self { + name: String::new(), + endpoint: String::new(), + command: String::new(), + args: Vec::new(), + env: HashMap::new(), + cwd: None, + description: None, + enabled: defaults::default_true(), + allowed_tools: Vec::new(), + disallowed_tools: Vec::new(), + timeout_secs: default_mcp_timeout_secs(), + auth: McpAuthConfig::None, + } + } +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +#[serde(tag = "kind", rename_all = "snake_case")] +pub enum McpAuthConfig { + None, + BearerToken { token: String }, + Basic { username: String, password: String }, + Header { name: String, value: String }, + QueryParam { name: String, value: String }, +} + +impl Default for McpAuthConfig { + fn default() -> Self { + Self::None + } +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +#[serde(default)] +pub struct McpClientIdentityConfig { + /// Client name sent during `initialize.clientInfo.name`. + #[serde(default = "default_mcp_client_name")] + pub name: String, + /// Client title sent during `initialize.clientInfo.title`. + #[serde(default = "default_mcp_client_title")] + pub title: String, + /// Client version sent during `initialize.clientInfo.version`. + #[serde(default = "default_mcp_client_version")] + pub version: String, +} + +fn default_mcp_client_name() -> String { + "openhuman-core".into() +} + +fn default_mcp_client_title() -> String { + "OpenHuman Core MCP Client".into() +} + +fn default_mcp_client_version() -> String { + env!("CARGO_PKG_VERSION").into() +} + +impl Default for McpClientIdentityConfig { + fn default() -> Self { + Self { + name: default_mcp_client_name(), + title: default_mcp_client_title(), + version: default_mcp_client_version(), + } + } +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +#[serde(default)] +pub struct McpClientConfig { + /// When `true`, register the generic MCP bridge tools and expose + /// configured remote MCP servers to the agent runtime. + #[serde(default = "defaults::default_true")] + pub enabled: bool, + /// Named remote MCP servers accessible via `mcp_list_*` / + /// `mcp_call_tool`. + #[serde(default)] + pub servers: Vec, + /// Identity block sent during initialize. + #[serde(default)] + pub client_identity: McpClientIdentityConfig, + /// Optional auth/overrides for the MCP *registry* browse APIs (Smithery + + /// the official modelcontextprotocol/registry). Each value falls back to + /// the corresponding env var when unset (issue #3039 gap A6). + #[serde(default)] + pub registry_auth: McpRegistryAuthConfig, +} + +impl Default for McpClientConfig { + fn default() -> Self { + Self { + enabled: defaults::default_true(), + servers: Vec::new(), + client_identity: McpClientIdentityConfig::default(), + registry_auth: McpRegistryAuthConfig::default(), + } + } +} + +/// Registry-browse auth + endpoint overrides. Lets a user who hits Smithery +/// rate limits (or needs an authenticated official-registry endpoint) supply +/// credentials from the desktop app instead of editing env vars. Each field is +/// config-first with an env-var fallback so existing CI/Docker deployments that +/// only set env vars keep working unchanged. +/// +/// Secrets are write-only over RPC: the getter reports whether each secret is +/// *set* (a boolean) and never echoes the value back. +#[derive(Debug, Clone, Default, Serialize, Deserialize, JsonSchema)] +#[serde(default)] +pub struct McpRegistryAuthConfig { + /// Smithery API key. Falls back to `SMITHERY_API_KEY`. + #[serde(default)] + pub smithery_api_key: Option, + /// Base URL override for the official registry. Falls back to + /// `MCP_OFFICIAL_REGISTRY_BASE` (non-secret). + #[serde(default)] + pub mcp_official_base: Option, + /// Bearer token for the official registry. Falls back to + /// `MCP_OFFICIAL_REGISTRY_TOKEN`. + #[serde(default)] + pub mcp_official_token: Option, +} diff --git a/src/openhuman/config/schema/tools/mod.rs b/src/openhuman/config/schema/tools/mod.rs new file mode 100644 index 0000000000..2012ffb3ab --- /dev/null +++ b/src/openhuman/config/schema/tools/mod.rs @@ -0,0 +1,26 @@ +//! Tool-related config: browser, HTTP, web search, composio, secrets, multimodal. + +pub mod browser; +pub mod http; +pub mod integrations; +pub mod mcp; +pub mod multimodal; +pub mod search; + +pub use browser::{BrowserComputerUseConfig, BrowserConfig}; +pub use http::{CurlConfig, HttpRequestConfig}; +pub use integrations::{ + ComposioConfig, ComputerControlConfig, IntegrationToggle, IntegrationsConfig, + PolymarketClobCredentials, PolymarketConfig, SecretsConfig, COMPOSIO_MODE_BACKEND, + COMPOSIO_MODE_DIRECT, INTEGRATION_MODE_BYO, INTEGRATION_MODE_MANAGED, +}; +pub use mcp::{ + GitbooksConfig, McpAuthConfig, McpClientConfig, McpClientIdentityConfig, McpRegistryAuthConfig, + McpServerConfig, +}; +pub use multimodal::{MultimodalConfig, MultimodalFileConfig}; +pub use search::{ + SearchConfig, SearchEngine, SearchEngineCredentials, SearxngConfig, SeltzConfig, + WebSearchConfig, SEARCH_ENGINE_BRAVE, SEARCH_ENGINE_DISABLED, SEARCH_ENGINE_MANAGED, + SEARCH_ENGINE_PARALLEL, SEARCH_ENGINE_QUERIT, +}; diff --git a/src/openhuman/config/schema/tools/multimodal.rs b/src/openhuman/config/schema/tools/multimodal.rs new file mode 100644 index 0000000000..cda405da55 --- /dev/null +++ b/src/openhuman/config/schema/tools/multimodal.rs @@ -0,0 +1,155 @@ +//! Multimodal (image + file) config types. + +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +#[serde(default)] +pub struct MultimodalConfig { + #[serde(default = "default_multimodal_max_images")] + pub max_images: usize, + #[serde(default = "default_multimodal_max_image_size_mb")] + pub max_image_size_mb: usize, + #[serde(default)] + pub allow_remote_fetch: bool, +} + +fn default_multimodal_max_images() -> usize { + 4 +} + +fn default_multimodal_max_image_size_mb() -> usize { + 8 +} + +impl MultimodalConfig { + /// Clamp configured values to safe runtime bounds. + pub fn effective_limits(&self) -> (usize, usize) { + let max_images = self.max_images.clamp(1, 16); + let max_image_size_mb = self.max_image_size_mb.clamp(1, 20); + (max_images, max_image_size_mb) + } + + /// Clamp image count to the configured maximum. + pub fn clamp_image_count(&self, count: usize) -> usize { + count.min(self.max_images) + } +} + +impl Default for MultimodalConfig { + fn default() -> Self { + Self { + max_images: default_multimodal_max_images(), + max_image_size_mb: default_multimodal_max_image_size_mb(), + allow_remote_fetch: false, + } + } +} + +/// File-attachment counterpart to [`MultimodalConfig`]. Governs how +/// `[FILE:…]` markers in user messages are resolved, validated, and +/// inlined as text context for the agent. +/// +/// Defaults err on the side of "useful for prose docs without blowing +/// the context window": 4 files per turn, 16 MB per file, 50 000 chars +/// of extracted text per file. Remote fetch is opt-in. +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +#[serde(default)] +pub struct MultimodalFileConfig { + #[serde(default = "default_multimodal_max_files")] + pub max_files: usize, + #[serde(default = "default_multimodal_max_file_size_mb")] + pub max_file_size_mb: usize, + #[serde(default = "default_multimodal_max_extracted_text_chars")] + pub max_extracted_text_chars: usize, + #[serde(default)] + pub allow_remote_fetch: bool, + #[serde(default = "default_multimodal_allowed_file_mime_types")] + pub allowed_mime_types: Vec, +} + +fn default_multimodal_max_files() -> usize { + 4 +} + +fn default_multimodal_max_file_size_mb() -> usize { + 16 +} + +fn default_multimodal_max_extracted_text_chars() -> usize { + 50_000 +} + +fn default_multimodal_allowed_file_mime_types() -> Vec { + vec![ + // Extractable text formats. + "application/pdf".to_string(), + "text/plain".to_string(), + "text/csv".to_string(), + "text/markdown".to_string(), + // Binary-only formats surfaced as metadata-only references. + "application/zip".to_string(), + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet".to_string(), + "application/vnd.openxmlformats-officedocument.wordprocessingml.document".to_string(), + "application/vnd.openxmlformats-officedocument.presentationml.presentation".to_string(), + "application/octet-stream".to_string(), + ] +} + +impl MultimodalFileConfig { + /// Clamp configured values to safe runtime bounds. + pub fn effective_limits(&self) -> (usize, usize, usize) { + let max_files = self.max_files.clamp(1, 16); + let max_file_size_mb = self.max_file_size_mb.clamp(1, 50); + let max_extracted_text_chars = self.max_extracted_text_chars.clamp(1_000, 200_000); + (max_files, max_file_size_mb, max_extracted_text_chars) + } + + /// True iff `mime` is on the configured allowlist (case-insensitive). + pub fn is_mime_allowed(&self, mime: &str) -> bool { + let needle = mime.to_ascii_lowercase(); + self.allowed_mime_types + .iter() + .any(|allowed| allowed.eq_ignore_ascii_case(&needle)) + } + + /// Hardened config for turns whose user text originates from an + /// untrusted third-party channel (Slack / Discord / Telegram / + /// WhatsApp / etc.). Disables `[FILE:…]` marker resolution outright + /// so a remote sender cannot smuggle `[FILE:/etc/passwd]`, + /// `[FILE:.env]`, or any other local-path marker into an inbound + /// message and have the agent exfiltrate the file's contents into + /// an LLM call. Also forbids remote fetch. + /// + /// `max_files: 0` is a sentinel: `prepare_messages_for_provider` + /// short-circuits at the first `[FILE:…]` marker with + /// `TooManyFiles` before any disk or network read happens. This + /// holds regardless of the per-operator + /// `[tools.multimodal_files]` block in `config.toml`. + /// + /// Mirrors the triage-arm hardening in + /// `openhuman::agent::triage::evaluator`. Apply at the per-turn + /// application site (the channel-runtime dispatcher) — the + /// operator-supplied `config.multimodal_files` stays the source of + /// truth for the desktop / web-chat path where the user owns the + /// local filesystem. + pub fn for_untrusted_channel_input() -> Self { + Self { + max_files: 0, + allow_remote_fetch: false, + ..Default::default() + } + } +} + +impl Default for MultimodalFileConfig { + fn default() -> Self { + Self { + max_files: default_multimodal_max_files(), + max_file_size_mb: default_multimodal_max_file_size_mb(), + max_extracted_text_chars: default_multimodal_max_extracted_text_chars(), + allow_remote_fetch: false, + allowed_mime_types: default_multimodal_allowed_file_mime_types(), + } + } +} diff --git a/src/openhuman/config/schema/tools/search.rs b/src/openhuman/config/schema/tools/search.rs new file mode 100644 index 0000000000..1af88aef5f --- /dev/null +++ b/src/openhuman/config/schema/tools/search.rs @@ -0,0 +1,337 @@ +//! Search engine config: Seltz, SearXNG, web search, and unified search selector. + +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +#[serde(default)] +pub struct SeltzConfig { + /// When `true`, register `seltz_search` as an agent tool. + #[serde(default)] + pub enabled: bool, + /// Seltz API key. Can also be set via `SELTZ_API_KEY` or + /// `OPENHUMAN_SELTZ_API_KEY` env var. + #[serde(default)] + pub api_key: Option, + /// Override the Seltz API base URL (default: `https://api.seltz.ai/v1`). + #[serde(default)] + pub api_url: Option, + /// Max results per query (1–20, default 10). + #[serde(default = "default_seltz_max_results")] + pub max_results: usize, + /// Per-request timeout in seconds (default 15). + #[serde(default = "default_seltz_timeout_secs")] + pub timeout_secs: u64, +} + +fn default_seltz_max_results() -> usize { + 10 +} + +fn default_seltz_timeout_secs() -> u64 { + 15 +} + +impl Default for SeltzConfig { + fn default() -> Self { + Self { + enabled: false, + api_key: None, + api_url: None, + max_results: default_seltz_max_results(), + timeout_secs: default_seltz_timeout_secs(), + } + } +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +#[serde(default)] +pub struct SearxngConfig { + /// When `true`, register `searxng_search` as an agent and MCP tool. + #[serde(default)] + pub enabled: bool, + /// Base URL for the user's SearXNG instance. + #[serde(default = "default_searxng_base_url")] + pub base_url: String, + /// Max results per query (1-50, default 10). + #[serde(default = "default_searxng_max_results")] + pub max_results: usize, + /// Language code passed to SearXNG when a call omits `language`. + #[serde(default = "default_searxng_language")] + pub default_language: String, + /// Per-request timeout in seconds (default 10). + #[serde(default = "default_searxng_timeout_secs", alias = "timeout_seconds")] + pub timeout_secs: u64, +} + +fn default_searxng_base_url() -> String { + "http://localhost:8080".into() +} + +fn default_searxng_max_results() -> usize { + 10 +} + +fn default_searxng_language() -> String { + "en".into() +} + +fn default_searxng_timeout_secs() -> u64 { + 10 +} + +impl Default for SearxngConfig { + fn default() -> Self { + Self { + enabled: false, + base_url: default_searxng_base_url(), + max_results: default_searxng_max_results(), + default_language: default_searxng_language(), + timeout_secs: default_searxng_timeout_secs(), + } + } +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +#[serde(default)] +pub struct WebSearchConfig { + #[serde(default = "default_web_search_max_results")] + pub max_results: usize, + #[serde(default = "default_web_search_timeout_secs")] + pub timeout_secs: u64, +} + +fn default_web_search_max_results() -> usize { + 5 +} + +fn default_web_search_timeout_secs() -> u64 { + 15 +} + +impl Default for WebSearchConfig { + fn default() -> Self { + Self { + max_results: default_web_search_max_results(), + timeout_secs: default_web_search_timeout_secs(), + } + } +} + +// ── Search engines ────────────────────────────────────────────────── +// +// Unified search-engine selector. Only one engine is active at a time +// (mirrors the LLM-provider API-key flow). The active engine governs +// which tools are registered: `disabled` → no search tools; `managed` → +// backend-proxied `web_search`; `parallel` → direct Parallel API tools +// (search/extract/chat/research/enrich/dataset); `brave` → direct Brave Search +// tools (web/news/images/videos); `querit` → direct Querit web search. + +pub const SEARCH_ENGINE_DISABLED: &str = "disabled"; +pub const SEARCH_ENGINE_MANAGED: &str = "managed"; +pub const SEARCH_ENGINE_PARALLEL: &str = "parallel"; +pub const SEARCH_ENGINE_BRAVE: &str = "brave"; +pub const SEARCH_ENGINE_QUERIT: &str = "querit"; + +fn default_search_engine() -> String { + SEARCH_ENGINE_MANAGED.into() +} + +fn default_search_max_results() -> usize { + 5 +} + +fn default_search_timeout_secs() -> u64 { + 15 +} + +/// Credentials for a BYO search engine. Mirrors the LLM provider API- +/// key shape — a simple `Option` that is considered configured +/// iff the trimmed value is non-empty. +#[derive(Debug, Clone, Default, Serialize, Deserialize, JsonSchema)] +#[serde(default)] +pub struct SearchEngineCredentials { + #[serde(default)] + pub api_key: Option, +} + +impl SearchEngineCredentials { + pub fn has_key(&self) -> bool { + self.api_key + .as_deref() + .map(|s| !s.trim().is_empty()) + .unwrap_or(false) + } + + pub fn key(&self) -> Option<&str> { + self.api_key.as_deref().and_then(|s| { + let t = s.trim(); + if t.is_empty() { + None + } else { + Some(t) + } + }) + } +} + +/// Unified search-engine configuration. Exactly one engine drives tool +/// registration at a time. `disabled` suppresses all search tools; `managed` is +/// the backend-proxied default and requires no key; `parallel`, `brave`, and +/// `querit` are BYO and require their own API key in the matching sub-block. +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +#[serde(default)] +pub struct SearchConfig { + /// Active search engine. One of [`SEARCH_ENGINE_DISABLED`], + /// [`SEARCH_ENGINE_MANAGED`], [`SEARCH_ENGINE_PARALLEL`], + /// [`SEARCH_ENGINE_BRAVE`], or [`SEARCH_ENGINE_QUERIT`]. Unknown values + /// fall back to managed at registration time. + #[serde(default = "default_search_engine")] + pub engine: String, + + /// Max results per query (1–20, default 5). + #[serde(default = "default_search_max_results")] + pub max_results: usize, + + /// Per-request timeout in seconds (default 15). + #[serde(default = "default_search_timeout_secs")] + pub timeout_secs: u64, + + /// Parallel API credentials (used when `engine = "parallel"`). + #[serde(default)] + pub parallel: SearchEngineCredentials, + + /// Brave Search credentials (used when `engine = "brave"`). + #[serde(default)] + pub brave: SearchEngineCredentials, + + /// Querit credentials (used when `engine = "querit"`). + #[serde(default)] + pub querit: SearchEngineCredentials, +} + +impl Default for SearchConfig { + fn default() -> Self { + Self { + engine: default_search_engine(), + max_results: default_search_max_results(), + timeout_secs: default_search_timeout_secs(), + parallel: SearchEngineCredentials::default(), + brave: SearchEngineCredentials::default(), + querit: SearchEngineCredentials::default(), + } + } +} + +/// Normalized search-engine enum used at tool-registration time. Falls +/// back to [`SearchEngine::Managed`] for unknown strings and for BYO +/// engines that have no API key configured. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum SearchEngine { + Disabled, + Managed, + Parallel, + Brave, + Querit, +} + +impl SearchConfig { + /// Resolve the *effective* engine after gating on API-key + /// availability. A BYO engine without a key silently falls back to + /// managed so the agent never ends up with zero search tools — the + /// UI surfaces the misconfiguration separately. + pub fn effective_engine(&self) -> SearchEngine { + match self.engine.trim().to_ascii_lowercase().as_str() { + SEARCH_ENGINE_DISABLED => SearchEngine::Disabled, + SEARCH_ENGINE_PARALLEL if self.parallel.has_key() => SearchEngine::Parallel, + SEARCH_ENGINE_BRAVE if self.brave.has_key() => SearchEngine::Brave, + SEARCH_ENGINE_QUERIT if self.querit.has_key() => SearchEngine::Querit, + _ => SearchEngine::Managed, + } + } + + pub fn requested_engine_str(&self) -> &str { + let trimmed = self.engine.trim(); + if trimmed.is_empty() { + SEARCH_ENGINE_MANAGED + } else { + trimmed + } + } +} + +#[cfg(test)] +mod search_config_tests { + use super::*; + use crate::openhuman::config::schema::tools::http::HttpRequestConfig; + + #[test] + fn defaults_to_managed() { + let cfg = SearchConfig::default(); + assert_eq!(cfg.effective_engine(), SearchEngine::Managed); + } + + #[test] + fn disabled_stays_disabled() { + let cfg = SearchConfig { + engine: SEARCH_ENGINE_DISABLED.into(), + ..Default::default() + }; + assert_eq!(cfg.effective_engine(), SearchEngine::Disabled); + } + + #[test] + fn parallel_requires_key() { + let mut cfg = SearchConfig { + engine: SEARCH_ENGINE_PARALLEL.into(), + ..Default::default() + }; + assert_eq!(cfg.effective_engine(), SearchEngine::Managed); + cfg.parallel.api_key = Some(" ".into()); + assert_eq!(cfg.effective_engine(), SearchEngine::Managed); + cfg.parallel.api_key = Some("real".into()); + assert_eq!(cfg.effective_engine(), SearchEngine::Parallel); + } + + #[test] + fn brave_requires_key() { + let mut cfg = SearchConfig { + engine: SEARCH_ENGINE_BRAVE.into(), + ..Default::default() + }; + assert_eq!(cfg.effective_engine(), SearchEngine::Managed); + cfg.brave.api_key = Some("real".into()); + assert_eq!(cfg.effective_engine(), SearchEngine::Brave); + } + + #[test] + fn querit_requires_key() { + let mut cfg = SearchConfig { + engine: SEARCH_ENGINE_QUERIT.into(), + ..Default::default() + }; + assert_eq!(cfg.effective_engine(), SearchEngine::Managed); + cfg.querit.api_key = Some("real".into()); + assert_eq!(cfg.effective_engine(), SearchEngine::Querit); + } + + #[test] + fn http_request_defaults_to_allow_all() { + // Web research works out of the box: the default allowlist is the + // wildcard. The SSRF guard (url_guard) still blocks local/private + // hosts regardless, so this only opens public sites. + let cfg = HttpRequestConfig::default(); + assert_eq!(cfg.allowed_domains, vec!["*".to_string()]); + assert_eq!(cfg.max_response_size, 1_000_000); + assert_eq!(cfg.timeout_secs, 30); + } + + #[test] + fn unknown_engine_falls_back_to_managed() { + let cfg = SearchConfig { + engine: "duckduckgo".into(), + ..Default::default() + }; + assert_eq!(cfg.effective_engine(), SearchEngine::Managed); + } +} diff --git a/src/openhuman/config/schemas.rs b/src/openhuman/config/schemas.rs deleted file mode 100644 index 908cbee87c..0000000000 --- a/src/openhuman/config/schemas.rs +++ /dev/null @@ -1,2004 +0,0 @@ -use serde::de::{DeserializeOwned, Deserializer}; -use serde::Deserialize; -use serde_json::{Map, Value}; - -use crate::core::all::{ControllerFuture, RegisteredController}; -use crate::core::{ControllerSchema, FieldSchema, TypeSchema}; -use crate::openhuman::config::rpc as config_rpc; -use crate::rpc::RpcOutcome; - -const DEFAULT_ONBOARDING_FLAG_NAME: &str = ".skip_onboarding"; - -#[derive(Debug, Deserialize)] -struct ModelRouteUpdate { - hint: String, - model: String, -} - -#[derive(Debug, Deserialize)] -struct CloudProviderUpdate { - /// Opaque stable id. Empty / missing → server generates a new id. - id: Option, - /// Routing slug, e.g. "openai", "my-deepseek". Must be unique per config. - slug: String, - /// Human-readable label. - #[serde(default)] - label: Option, - endpoint: String, - /// Auth style: "bearer" | "anthropic" | "openhuman_jwt" | "none". - #[serde(default)] - auth_style: Option, - /// Legacy field — tolerated on read for back-compat but not required. - #[serde(rename = "type", default)] - legacy_type: Option, - /// Legacy field — tolerated on read. - #[serde(default)] - default_model: Option, -} - -#[derive(Debug, Deserialize)] -struct ModelSettingsUpdate { - /// OpenHuman product backend URL. Used for auth, billing, voice, and - /// every non-inference HTTP call. Almost always left blank so it - /// defaults to the canonical hosted backend. - api_url: Option, - /// Custom OpenAI-compatible LLM endpoint. When set together with - /// `api_key`, inference talks directly to this URL instead of routing - /// through the OpenHuman backend. Send an empty string to clear. - inference_url: Option, - /// Optional API key for OpenAI-compatible backends. Stored verbatim in - /// `config.toml` on the user's machine — see #1342 (local-first / pluggable - /// backends). The key is never echoed back over RPC; `get_client_config` - /// only reports `api_key_set: bool`. - api_key: Option, - default_model: Option, - default_temperature: Option, - /// When present, REPLACES `config.model_routes` wholesale with these - /// `(hint, model)` pairs. Send `Some([])` to clear all routes (used when - /// the user switches back to the OpenHuman backend whose built-in router - /// picks per-task models on its own). Omit to leave existing routes - /// untouched. - model_routes: Option>, - /// When present, REPLACES `config.cloud_providers` wholesale. The keys - /// themselves live in `auth-profiles.json` via - /// `cloud_provider_set_key` — they are NOT carried here. - cloud_providers: Option>, - primary_cloud: Option, - chat_provider: Option, - reasoning_provider: Option, - agentic_provider: Option, - coding_provider: Option, - memory_provider: Option, - embeddings_provider: Option, - heartbeat_provider: Option, - learning_provider: Option, - subconscious_provider: Option, -} - -#[derive(Debug, Deserialize)] -struct MemorySettingsUpdate { - backend: Option, - auto_save: Option, - embedding_provider: Option, - embedding_model: Option, - embedding_dimensions: Option, - /// One of `"minimal" | "balanced" | "extended" | "maximum"`. - memory_window: Option, -} - -#[derive(Debug, Deserialize)] -struct RuntimeSettingsUpdate { - kind: Option, - reasoning_enabled: Option, -} - -#[derive(Debug, Deserialize)] -struct BrowserSettingsUpdate { - enabled: Option, -} - -#[derive(Debug, Deserialize)] -struct ScreenIntelligenceSettingsUpdate { - enabled: Option, - capture_policy: Option, - policy_mode: Option, - baseline_fps: Option, - vision_enabled: Option, - autocomplete_enabled: Option, - use_vision_model: Option, - keep_screenshots: Option, - allowlist: Option>, - denylist: Option>, -} - -#[derive(Debug, Deserialize)] -struct AnalyticsSettingsUpdate { - enabled: Option, -} - -#[derive(Debug, Deserialize)] -struct MeetSettingsUpdate { - auto_orchestrator_handoff: Option, -} - -#[derive(Debug, Deserialize)] -struct SearchSettingsUpdate { - engine: Option, - max_results: Option, - timeout_secs: Option, - parallel_api_key: Option, - brave_api_key: Option, - querit_api_key: Option, - allowed_domains: Option>, - allow_all: Option, -} - -#[derive(Debug, Deserialize)] -struct LocalAiSettingsUpdate { - runtime_enabled: Option, - /// MVP opt-in marker. Tied to `runtime_enabled` from the unified AI - /// panel toggle (both flip on enable, both flip off on disable) so - /// the user gets local AI working with a single click instead of - /// having to also apply a tier preset. - opt_in_confirmed: Option, - provider: Option, - #[serde(default, deserialize_with = "deserialize_present_json")] - base_url: Option, - model_id: Option, - chat_model_id: Option, - usage_embeddings: Option, - usage_heartbeat: Option, - usage_learning_reflection: Option, - usage_subconscious: Option, -} - -#[derive(Debug, Deserialize)] -struct SetBrowserAllowAllParams { - enabled: bool, -} - -#[derive(Debug, Deserialize)] -struct WorkspaceOnboardingFlagParams { - flag_name: Option, -} - -#[derive(Debug, Deserialize)] -struct WorkspaceOnboardingFlagSetParams { - flag_name: Option, - value: bool, -} - -#[derive(Debug, Deserialize)] -struct OnboardingCompletedSetParams { - value: bool, -} - -#[derive(Debug, Deserialize)] -struct DictationSettingsUpdate { - enabled: Option, - hotkey: Option, - activation_mode: Option, - llm_refinement: Option, - streaming: Option, - streaming_interval_ms: Option, -} - -#[derive(Debug, Deserialize)] -struct VoiceServerSettingsUpdate { - auto_start: Option, - hotkey: Option, - activation_mode: Option, - skip_cleanup: Option, - min_duration_secs: Option, - silence_threshold: Option, - custom_dictionary: Option>, - always_on_enabled: Option, - wake_word: Option, -} - -#[derive(Debug, Deserialize)] -struct ComposioTriggerSettingsUpdate { - triage_disabled: Option, - triage_disabled_toolkits: Option>, -} - -#[derive(Debug, Deserialize)] -struct AutonomySettingsUpdate { - /// `"readonly" | "supervised" | "full"` (case-insensitive). - level: Option, - workspace_only: Option, - /// Replaces the shell command allow-list wholesale. - allowed_commands: Option>, - /// Replaces the forbidden-paths denylist wholesale. - forbidden_paths: Option>, - /// Replaces the trusted-roots allow-list wholesale. Each entry is - /// `{ "path": "/abs/dir", "access": "read" | "readwrite" }`. - trusted_roots: Option>, - allow_tool_install: Option, - // Accept u64 to match the published schema (`TypeSchema::U64`); clamped to the - // internal u32 at apply time. u32::MAX/hr is already effectively unlimited. - max_actions_per_hour: Option, - /// Replaces the "Always allow" allowlist wholesale — tool names the agent - /// may run without an approval prompt. Empty list clears it. - auto_approve: Option>, - require_task_plan_approval: Option, -} - -#[derive(Debug, Deserialize)] -struct AgentSettingsUpdate { - /// Tool/action wall-clock timeout in seconds (1–3600). Validated server-side. - agent_timeout_secs: Option, -} - -#[derive(Debug, Deserialize)] -struct AgentPathsUpdate { - /// New absolute action sandbox path. Empty string clears the override; - /// omitted leaves it unchanged. Validated server-side. - action_dir: Option, -} - -#[derive(Debug, Deserialize)] -struct ActivityLevelSettingsUpdate { - /// "off" | "minimal" | "moderate" | "active" | "always_on" (or "0"-"4"). - level: Option, -} - -pub fn all_controller_schemas() -> Vec { - vec![ - schemas("get_config"), - schemas("get_client_config"), - schemas("update_model_settings"), - schemas("update_memory_settings"), - schemas("update_screen_intelligence_settings"), - schemas("update_runtime_settings"), - schemas("update_browser_settings"), - schemas("update_local_ai_settings"), - schemas("resolve_api_url"), - schemas("get_runtime_flags"), - schemas("set_browser_allow_all"), - schemas("workspace_onboarding_flag_exists"), - schemas("workspace_onboarding_flag_set"), - schemas("update_analytics_settings"), - schemas("get_analytics_settings"), - schemas("get_dashboard_settings"), - schemas("update_meet_settings"), - schemas("get_meet_settings"), - schemas("agent_server_status"), - schemas("reset_local_data"), - schemas("get_data_paths"), - schemas("get_agent_paths"), - schemas("update_agent_paths"), - schemas("get_onboarding_completed"), - schemas("set_onboarding_completed"), - schemas("get_dictation_settings"), - schemas("update_dictation_settings"), - schemas("get_voice_server_settings"), - schemas("update_voice_server_settings"), - schemas("update_composio_trigger_settings"), - schemas("get_composio_trigger_settings"), - schemas("get_autonomy_settings"), - schemas("update_autonomy_settings"), - schemas("get_agent_settings"), - schemas("update_agent_settings"), - schemas("update_search_settings"), - schemas("get_search_settings"), - schemas("get_activity_level_settings"), - schemas("update_activity_level_settings"), - schemas("get_memory_sync_settings"), - schemas("update_memory_sync_settings"), - schemas("get_sandbox_settings"), - schemas("update_sandbox_settings"), - ] -} - -pub fn all_registered_controllers() -> Vec { - vec![ - RegisteredController { - schema: schemas("get_config"), - handler: handle_get_config, - }, - RegisteredController { - schema: schemas("get_client_config"), - handler: handle_get_client_config, - }, - RegisteredController { - schema: schemas("update_model_settings"), - handler: handle_update_model_settings, - }, - RegisteredController { - schema: schemas("update_memory_settings"), - handler: handle_update_memory_settings, - }, - RegisteredController { - schema: schemas("update_screen_intelligence_settings"), - handler: handle_update_screen_intelligence_settings, - }, - RegisteredController { - schema: schemas("update_runtime_settings"), - handler: handle_update_runtime_settings, - }, - RegisteredController { - schema: schemas("update_browser_settings"), - handler: handle_update_browser_settings, - }, - RegisteredController { - schema: schemas("update_local_ai_settings"), - handler: handle_update_local_ai_settings, - }, - RegisteredController { - schema: schemas("resolve_api_url"), - handler: handle_resolve_api_url, - }, - RegisteredController { - schema: schemas("get_runtime_flags"), - handler: handle_get_runtime_flags, - }, - RegisteredController { - schema: schemas("set_browser_allow_all"), - handler: handle_set_browser_allow_all, - }, - RegisteredController { - schema: schemas("workspace_onboarding_flag_exists"), - handler: handle_workspace_onboarding_flag_exists, - }, - RegisteredController { - schema: schemas("workspace_onboarding_flag_set"), - handler: handle_workspace_onboarding_flag_set, - }, - RegisteredController { - schema: schemas("update_analytics_settings"), - handler: handle_update_analytics_settings, - }, - RegisteredController { - schema: schemas("get_analytics_settings"), - handler: handle_get_analytics_settings, - }, - RegisteredController { - schema: schemas("get_dashboard_settings"), - handler: handle_get_dashboard_settings, - }, - RegisteredController { - schema: schemas("update_meet_settings"), - handler: handle_update_meet_settings, - }, - RegisteredController { - schema: schemas("get_meet_settings"), - handler: handle_get_meet_settings, - }, - RegisteredController { - schema: schemas("agent_server_status"), - handler: handle_agent_server_status, - }, - RegisteredController { - schema: schemas("reset_local_data"), - handler: handle_reset_local_data, - }, - RegisteredController { - schema: schemas("get_data_paths"), - handler: handle_get_data_paths, - }, - RegisteredController { - schema: schemas("get_agent_paths"), - handler: handle_get_agent_paths, - }, - RegisteredController { - schema: schemas("update_agent_paths"), - handler: handle_update_agent_paths, - }, - RegisteredController { - schema: schemas("get_onboarding_completed"), - handler: handle_get_onboarding_completed, - }, - RegisteredController { - schema: schemas("set_onboarding_completed"), - handler: handle_set_onboarding_completed, - }, - RegisteredController { - schema: schemas("get_dictation_settings"), - handler: handle_get_dictation_settings, - }, - RegisteredController { - schema: schemas("update_dictation_settings"), - handler: handle_update_dictation_settings, - }, - RegisteredController { - schema: schemas("get_voice_server_settings"), - handler: handle_get_voice_server_settings, - }, - RegisteredController { - schema: schemas("update_voice_server_settings"), - handler: handle_update_voice_server_settings, - }, - RegisteredController { - schema: schemas("update_composio_trigger_settings"), - handler: handle_update_composio_trigger_settings, - }, - RegisteredController { - schema: schemas("get_composio_trigger_settings"), - handler: handle_get_composio_trigger_settings, - }, - RegisteredController { - schema: schemas("get_autonomy_settings"), - handler: handle_get_autonomy_settings, - }, - RegisteredController { - schema: schemas("update_autonomy_settings"), - handler: handle_update_autonomy_settings, - }, - RegisteredController { - schema: schemas("get_agent_settings"), - handler: handle_get_agent_settings, - }, - RegisteredController { - schema: schemas("update_agent_settings"), - handler: handle_update_agent_settings, - }, - RegisteredController { - schema: schemas("update_search_settings"), - handler: handle_update_search_settings, - }, - RegisteredController { - schema: schemas("get_search_settings"), - handler: handle_get_search_settings, - }, - RegisteredController { - schema: schemas("get_activity_level_settings"), - handler: handle_get_activity_level_settings, - }, - RegisteredController { - schema: schemas("update_activity_level_settings"), - handler: handle_update_activity_level_settings, - }, - RegisteredController { - schema: schemas("get_memory_sync_settings"), - handler: handle_get_memory_sync_settings, - }, - RegisteredController { - schema: schemas("update_memory_sync_settings"), - handler: handle_update_memory_sync_settings, - }, - RegisteredController { - schema: schemas("get_sandbox_settings"), - handler: handle_get_sandbox_settings, - }, - RegisteredController { - schema: schemas("update_sandbox_settings"), - handler: handle_update_sandbox_settings, - }, - ] -} - -pub fn schemas(function: &str) -> ControllerSchema { - match function { - "get_config" => ControllerSchema { - namespace: "config", - function: "get", - description: "Read persisted config snapshot and resolved paths.", - inputs: vec![], - outputs: vec![FieldSchema { - name: "snapshot", - ty: TypeSchema::Json, - comment: "Config snapshot with workspace and config paths.", - required: true, - }], - }, - "get_client_config" => ControllerSchema { - namespace: "config", - function: "get_client_config", - description: "Read safe client-facing config fields (api_url, feature flags). No secrets.", - inputs: vec![], - outputs: vec![ - FieldSchema { - name: "api_url", - ty: TypeSchema::Option(Box::new(TypeSchema::String)), - comment: "Configured OpenHuman product backend URL, if any.", - required: false, - }, - FieldSchema { - name: "inference_url", - ty: TypeSchema::Option(Box::new(TypeSchema::String)), - comment: "Custom OpenAI-compatible LLM endpoint, if any. When set together with an api_key, inference goes direct to this URL.", - required: false, - }, - FieldSchema { - name: "default_model", - ty: TypeSchema::Option(Box::new(TypeSchema::String)), - comment: "Default model identifier.", - required: false, - }, - FieldSchema { - name: "app_version", - ty: TypeSchema::String, - comment: "OpenHuman core version.", - required: true, - }, - FieldSchema { - name: "api_key_set", - ty: TypeSchema::Bool, - comment: "True when a custom backend api_key is stored locally. The key itself is never returned over RPC.", - required: true, - }, - FieldSchema { - name: "model_routes", - ty: TypeSchema::Json, - comment: "Persisted task-hint -> model id pairs the core router will obey. Empty when the OpenHuman built-in router is active.", - required: true, - }, - ], - }, - "update_model_settings" => ControllerSchema { - namespace: "config", - function: "update_model_settings", - description: "Update model and backend connection settings, including a custom OpenAI-compatible backend (api_url + api_key).", - inputs: vec![ - optional_string("api_url", "OpenHuman product backend URL (auth/billing/voice). Almost always left blank; the inference URL is a separate `inference_url` field."), - optional_string("inference_url", "Custom OpenAI-compatible LLM endpoint. When set together with `api_key`, inference goes direct to this URL instead of the OpenHuman backend. Pass an empty string to clear."), - optional_string("api_key", "Optional API key for the configured inference endpoint. Pass an empty string to clear a previously stored key."), - optional_string("default_model", "Default model id."), - FieldSchema { - name: "default_temperature", - ty: TypeSchema::Option(Box::new(TypeSchema::F64)), - comment: "Default model temperature.", - required: false, - }, - FieldSchema { - name: "model_routes", - ty: TypeSchema::Option(Box::new(TypeSchema::Json)), - comment: "Optional list of {hint, model} pairs mapping task hints (reasoning, agentic, coding, summarization) to provider-specific model ids. Replaces config.model_routes wholesale; send [] to clear (e.g. when switching back to the OpenHuman built-in router).", - required: false, - }, - FieldSchema { - name: "cloud_providers", - ty: TypeSchema::Option(Box::new(TypeSchema::Json)), - comment: "Optional list of cloud provider entries {id, slug, label, endpoint, auth_style}. API keys are stored separately via cloud_provider_set_key. Replaces config.cloud_providers wholesale.", - required: false, - }, - optional_string("primary_cloud", "id of the cloud_providers entry used when a workload routes to 'cloud'. Empty string clears."), - optional_string("chat_provider", "Provider string for direct conversational chat workloads."), - optional_string("reasoning_provider", "Provider string for the main reasoning workload (e.g. 'cloud', 'ollama:llama3.1:8b', 'openai:gpt-4o')."), - optional_string("agentic_provider", "Provider string for sub-agent / tool-loop workloads."), - optional_string("coding_provider", "Provider string for code-generation workloads."), - optional_string("memory_provider", "Provider string for memory-tree extract + summarise."), - optional_string("embeddings_provider", "Provider string for embedding generation."), - optional_string("heartbeat_provider", "Provider string for the heartbeat background-reasoning loop."), - optional_string("learning_provider", "Provider string for learning / reflection passes."), - optional_string("subconscious_provider", "Provider string for subconscious evaluation."), - ], - outputs: vec![json_output("snapshot", "Updated config snapshot.")], - }, - "update_memory_settings" => ControllerSchema { - namespace: "config", - function: "update_memory_settings", - description: "Update memory backend and embedding settings.", - inputs: vec![ - optional_string("backend", "Memory backend identifier."), - FieldSchema { - name: "auto_save", - ty: TypeSchema::Option(Box::new(TypeSchema::Bool)), - comment: "Enable auto-save.", - required: false, - }, - optional_string("embedding_provider", "Embedding provider identifier."), - optional_string("embedding_model", "Embedding model identifier."), - FieldSchema { - name: "embedding_dimensions", - ty: TypeSchema::Option(Box::new(TypeSchema::U64)), - comment: "Embedding dimensions.", - required: false, - }, - optional_string( - "memory_window", - "Stepped long-term memory window preset: minimal | balanced | extended | maximum.", - ), - ], - outputs: vec![json_output("snapshot", "Updated config snapshot.")], - }, - "update_screen_intelligence_settings" => ControllerSchema { - namespace: "config", - function: "update_screen_intelligence_settings", - description: "Update screen intelligence runtime settings.", - inputs: vec![ - optional_bool("enabled", "Enable screen intelligence."), - optional_string("capture_policy", "Capture policy mode."), - optional_string("policy_mode", "Policy mode override."), - FieldSchema { - name: "baseline_fps", - ty: TypeSchema::Option(Box::new(TypeSchema::F64)), - comment: "Baseline capture FPS.", - required: false, - }, - optional_bool("vision_enabled", "Enable vision analysis."), - optional_bool("autocomplete_enabled", "Enable autocomplete integration."), - optional_bool( - "use_vision_model", - "Use a vision LLM for screenshot analysis (false = OCR + text LLM).", - ), - optional_bool("keep_screenshots", "Keep screenshots on disk after vision processing."), - FieldSchema { - name: "allowlist", - ty: TypeSchema::Option(Box::new(TypeSchema::Array(Box::new( - TypeSchema::String, - )))), - comment: "Allowed app list.", - required: false, - }, - FieldSchema { - name: "denylist", - ty: TypeSchema::Option(Box::new(TypeSchema::Array(Box::new( - TypeSchema::String, - )))), - comment: "Denied app list.", - required: false, - }, - ], - outputs: vec![json_output("snapshot", "Updated config snapshot.")], - }, - "update_runtime_settings" => ControllerSchema { - namespace: "config", - function: "update_runtime_settings", - description: "Update runtime execution strategy settings.", - inputs: vec![ - optional_string("kind", "Runtime kind."), - optional_bool("reasoning_enabled", "Enable reasoning mode."), - ], - outputs: vec![json_output("snapshot", "Updated config snapshot.")], - }, - "get_autonomy_settings" => ControllerSchema { - namespace: "config", - function: "get_autonomy_settings", - description: "Get the agent access-mode settings (autonomy level, workspace confinement, trusted roots, command allow-list, forbidden paths).", - inputs: vec![], - outputs: vec![json_output("autonomy", "Current [autonomy] config block.")], - }, - "update_autonomy_settings" => ControllerSchema { - namespace: "config", - function: "update_autonomy_settings", - description: "Update the agent access mode: autonomy level, workspace confinement, trusted-roots allow-list, command allow-list, forbidden paths, and OS-install permission. Applies live to active sessions.", - inputs: vec![ - optional_string("level", "Autonomy level: readonly | supervised | full."), - optional_bool("workspace_only", "Confine file/path access to the workspace directory."), - FieldSchema { - name: "allowed_commands", - ty: TypeSchema::Option(Box::new(TypeSchema::Array(Box::new(TypeSchema::String)))), - comment: "Replace the shell command allow-list (array of base command names).", - required: false, - }, - FieldSchema { - name: "forbidden_paths", - ty: TypeSchema::Option(Box::new(TypeSchema::Array(Box::new(TypeSchema::String)))), - comment: "Replace the forbidden-paths denylist (array of path prefixes).", - required: false, - }, - FieldSchema { - name: "trusted_roots", - ty: TypeSchema::Option(Box::new(TypeSchema::Json)), - comment: "Replace the trusted-roots allow-list: array of {path, access: read|readwrite}. Grants access outside the workspace; credential dirs (~/.ssh, ~/.gnupg, ~/.aws) stay blocked regardless.", - required: false, - }, - optional_bool("allow_tool_install", "Allow the agent to install OS packages via install_tool (intended for Full mode)."), - FieldSchema { - name: "max_actions_per_hour", - ty: TypeSchema::Option(Box::new(TypeSchema::U64)), - comment: "Rate limit for side-effecting actions per hour.", - required: false, - }, - FieldSchema { - name: "auto_approve", - ty: TypeSchema::Option(Box::new(TypeSchema::Array(Box::new(TypeSchema::String)))), - comment: "Replace the \"Always allow\" allowlist (array of tool names the agent runs without an approval prompt). Empty array clears it.", - required: false, - }, - optional_bool("require_task_plan_approval", "Require approval before an agent executes a task-board plan."), - ], - outputs: vec![json_output("snapshot", "Updated config snapshot.")], - }, - "get_agent_settings" => ControllerSchema { - namespace: "config", - function: "get_agent_settings", - description: "Read agent execution settings: the action/tool wall-clock timeout, the runtime-effective value, and whether the OPENHUMAN_TOOL_TIMEOUT_SECS env var overrides it.", - inputs: vec![], - outputs: vec![json_output( - "settings", - "Agent settings: agent_timeout_secs, effective_timeout_secs, env_override, min_timeout_secs, max_timeout_secs.", - )], - }, - "update_agent_settings" => ControllerSchema { - namespace: "config", - function: "update_agent_settings", - description: "Update agent execution settings. Currently the action/tool wall-clock timeout (seconds). Applies to the next tool call without a restart; the OPENHUMAN_TOOL_TIMEOUT_SECS env var still overrides it when set.", - inputs: vec![FieldSchema { - name: "agent_timeout_secs", - ty: TypeSchema::Option(Box::new(TypeSchema::U64)), - comment: "Wall-clock timeout for a single tool/action execution, in seconds (1–3600). Extend this when large local models are interrupted before finishing.", - required: false, - }], - outputs: vec![json_output("snapshot", "Updated config snapshot.")], - }, - "update_browser_settings" => ControllerSchema { - namespace: "config", - function: "update_browser_settings", - description: "Update browser automation settings.", - inputs: vec![optional_bool("enabled", "Enable browser integration.")], - outputs: vec![json_output("snapshot", "Updated config snapshot.")], - }, - "update_local_ai_settings" => ControllerSchema { - namespace: "config", - function: "update_local_ai_settings", - description: - "Update the local AI runtime master switch and per-feature usage flags.", - inputs: vec![ - optional_bool( - "runtime_enabled", - "Master switch — when false, no subsystem uses the selected local AI runtime.", - ), - optional_bool( - "opt_in_confirmed", - "MVP opt-in marker. Bootstrap hard-overrides to disabled when this is false, \ - regardless of `runtime_enabled`. Set in tandem with `runtime_enabled` from the \ - unified AI panel.", - ), - optional_string( - "provider", - "Local provider identifier. Supported values: ollama, lm_studio.", - ), - optional_json( - "base_url", - "Provider base URL string, or null to clear. For LM Studio this defaults to http://localhost:1234/v1.", - ), - optional_string("model_id", "Default local chat model identifier."), - optional_string("chat_model_id", "Local chat model identifier."), - optional_bool( - "usage_embeddings", - "Use the local model for embedding generation (when runtime_enabled).", - ), - optional_bool( - "usage_heartbeat", - "Use the local model inside the heartbeat loop (when runtime_enabled).", - ), - optional_bool( - "usage_learning_reflection", - "Use the local model for learning/reflection passes (when runtime_enabled).", - ), - optional_bool( - "usage_subconscious", - "Use the local model for subconscious evaluation (when runtime_enabled).", - ), - ], - outputs: vec![json_output("snapshot", "Updated config snapshot.")], - }, - "resolve_api_url" => ControllerSchema { - namespace: "config", - function: "resolve_api_url", - description: "Resolve effective API base URL using config/env/default from core.", - inputs: vec![], - outputs: vec![FieldSchema { - name: "api_url", - ty: TypeSchema::String, - comment: "Resolved backend API URL.", - required: true, - }], - }, - "get_runtime_flags" => ControllerSchema { - namespace: "config", - function: "get_runtime_flags", - description: "Read environment-driven runtime flags.", - inputs: vec![], - outputs: vec![FieldSchema { - name: "flags", - ty: TypeSchema::Ref("RuntimeFlagsOut"), - comment: "Runtime flag state.", - required: true, - }], - }, - "set_browser_allow_all" => ControllerSchema { - namespace: "config", - function: "set_browser_allow_all", - description: "Disable browser allow-all mode, or enable it only when operator opt-in is present.", - inputs: vec![FieldSchema { - name: "enabled", - ty: TypeSchema::Bool, - comment: "Whether to enable browser allow-all mode. Runtime enable is refused unless OPENHUMAN_BROWSER_ALLOW_ALL_RPC_ENABLE=1.", - required: true, - }], - outputs: vec![FieldSchema { - name: "flags", - ty: TypeSchema::Ref("RuntimeFlagsOut"), - comment: "Updated runtime flag state.", - required: true, - }], - }, - "workspace_onboarding_flag_exists" => ControllerSchema { - namespace: "config", - function: "workspace_onboarding_flag_exists", - description: "Check if onboarding flag file exists in workspace.", - inputs: vec![FieldSchema { - name: "flag_name", - ty: TypeSchema::Option(Box::new(TypeSchema::String)), - comment: "Optional onboarding flag name override.", - required: false, - }], - outputs: vec![FieldSchema { - name: "exists", - ty: TypeSchema::Bool, - comment: "True when the flag file is present.", - required: true, - }], - }, - "workspace_onboarding_flag_set" => ControllerSchema { - namespace: "config", - function: "workspace_onboarding_flag_set", - description: "Create or remove the onboarding flag file in workspace.", - inputs: vec![ - FieldSchema { - name: "flag_name", - ty: TypeSchema::Option(Box::new(TypeSchema::String)), - comment: "Optional onboarding flag name override.", - required: false, - }, - FieldSchema { - name: "value", - ty: TypeSchema::Bool, - comment: "True to create, false to remove.", - required: true, - }, - ], - outputs: vec![FieldSchema { - name: "exists", - ty: TypeSchema::Bool, - comment: "True when the flag file is present after the operation.", - required: true, - }], - }, - "update_analytics_settings" => ControllerSchema { - namespace: "config", - function: "update_analytics_settings", - description: "Enable or disable anonymized analytics and error reporting.", - inputs: vec![optional_bool( - "enabled", - "Enable anonymized analytics and crash reports.", - )], - outputs: vec![json_output("snapshot", "Updated config snapshot.")], - }, - "get_analytics_settings" => ControllerSchema { - namespace: "config", - function: "get_analytics_settings", - description: "Read current analytics settings.", - inputs: vec![], - outputs: vec![FieldSchema { - name: "enabled", - ty: TypeSchema::Bool, - comment: "Whether anonymized analytics is enabled.", - required: true, - }], - }, - "get_dashboard_settings" => ControllerSchema { - namespace: "config", - function: "get_dashboard_settings", - description: "Read dashboard settings, including the local architecture diagram viewer.", - inputs: vec![], - outputs: vec![FieldSchema { - name: "dashboard", - ty: TypeSchema::Json, - comment: "Current [dashboard] config block.", - required: true, - }], - }, - "update_meet_settings" => ControllerSchema { - namespace: "config", - function: "update_meet_settings", - description: - "Update Google Meet integration settings (currently the auto-orchestrator-handoff privacy gate).", - inputs: vec![optional_bool( - "auto_orchestrator_handoff", - "When true, ending a Meet call hands the transcript to the orchestrator for proactive follow-up actions.", - )], - outputs: vec![json_output("snapshot", "Updated config snapshot.")], - }, - "get_meet_settings" => ControllerSchema { - namespace: "config", - function: "get_meet_settings", - description: "Read current Google Meet integration settings.", - inputs: vec![], - outputs: vec![FieldSchema { - name: "auto_orchestrator_handoff", - ty: TypeSchema::Bool, - comment: "Whether the orchestrator handoff fires on Meet call end.", - required: true, - }], - }, - "update_search_settings" => ControllerSchema { - namespace: "config", - function: "update_search_settings", - description: "Update search engine selection and BYO API credentials.", - inputs: vec![ - optional_string( - "engine", - "Active engine: managed | parallel | brave | querit.", - ), - FieldSchema { - name: "max_results", - ty: TypeSchema::Option(Box::new(TypeSchema::U64)), - comment: "Maximum results per query (1-20).", - required: false, - }, - FieldSchema { - name: "timeout_secs", - ty: TypeSchema::Option(Box::new(TypeSchema::U64)), - comment: "Per-request timeout in seconds (1-120).", - required: false, - }, - optional_string( - "parallel_api_key", - "Parallel API key (empty string clears the stored key).", - ), - optional_string( - "brave_api_key", - "Brave Search API key (empty string clears the stored key).", - ), - optional_string( - "querit_api_key", - "Querit API key (empty string clears the stored key).", - ), - FieldSchema { - name: "allowed_domains", - ty: TypeSchema::Option(Box::new(TypeSchema::Array(Box::new( - TypeSchema::String, - )))), - comment: "Websites the assistant may open/read (web_fetch/curl). Exact hosts match their subdomains; \"*\" allows all public sites; empty blocks all web access.", - required: false, - }, - FieldSchema { - name: "allow_all", - ty: TypeSchema::Option(Box::new(TypeSchema::Bool)), - comment: "\"Allow all sites\" toggle. true sets the allowlist to [\"*\"]; false drops the wildcard, keeping explicit hosts.", - required: false, - }, - ], - outputs: vec![json_output("snapshot", "Updated config snapshot.")], - }, - "get_search_settings" => ControllerSchema { - namespace: "config", - function: "get_search_settings", - description: - "Read search engine settings. API keys are surfaced as presence booleans only.", - inputs: vec![], - outputs: vec![json_output( - "settings", - "Engine, effective engine, limits, and per-provider configuration flags.", - )], - }, - "get_activity_level_settings" => ControllerSchema { - namespace: "config", - function: "get_activity_level_settings", - description: "Get the agent activity level (0–4) and its derived settings: sync cadence, heartbeat/subconscious toggles, token budget, estimated monthly cost.", - inputs: vec![], - outputs: vec![json_output("settings", "Activity level settings with cost estimates.")], - }, - "update_activity_level_settings" => ControllerSchema { - namespace: "config", - function: "update_activity_level_settings", - description: "Set the agent activity level. Immediately updates the scheduler gate mode and persists the change.", - inputs: vec![optional_string("level", "Activity level: off | minimal | moderate | active | always_on (or 0–4).")], - outputs: vec![json_output("settings", "Updated activity level settings with cost estimates.")], - }, - "get_memory_sync_settings" => ControllerSchema { - namespace: "config", - function: "get_memory_sync_settings", - description: "Get the global memory-sync cadence applied to all opted-in sources: stored value, resolved selected cadence, manual/default flags, the 24h default, and the preset options (4h/12h/24h).", - inputs: vec![], - outputs: vec![json_output("settings", "Memory sync schedule settings.")], - }, - "update_memory_sync_settings" => ControllerSchema { - namespace: "config", - function: "update_memory_sync_settings", - description: "Set the global memory-sync cadence. Omit/null resets to the default; 0 means Manual only (auto-sync disabled); a positive value is seconds between syncs. Takes effect on the next scheduler tick.", - inputs: vec![FieldSchema { - name: "sync_interval_secs", - ty: TypeSchema::Option(Box::new(TypeSchema::U64)), - comment: "Seconds between auto-syncs. null = default (24h); 0 = Manual only; n>0 = sync every n seconds.", - required: false, - }], - outputs: vec![json_output("settings", "Updated memory sync schedule settings.")], - }, - "get_sandbox_settings" => ControllerSchema { - namespace: "config", - function: "get_sandbox_settings", - description: "Get sandbox execution backend settings: selected backend, Docker image/limits, env passthrough, Docker availability, and detected OS backend.", - inputs: vec![], - outputs: vec![json_output("settings", "Sandbox settings with status.")], - }, - "update_sandbox_settings" => ControllerSchema { - namespace: "config", - function: "update_sandbox_settings", - description: "Update sandbox execution backend settings: backend selection, Docker image, memory/CPU limits, and env passthrough. Applies to new agent sessions.", - inputs: vec![ - optional_string("backend", "Sandbox backend: auto | landlock | firejail | bubblewrap | docker | none."), - optional_bool("enabled", "Enable or disable sandbox execution."), - optional_string("docker_image", "Docker image for sandboxed execution (e.g. alpine:3.20)."), - FieldSchema { - name: "docker_memory_limit_mb", - ty: TypeSchema::Option(Box::new(TypeSchema::U64)), - comment: "Docker container memory limit in MB.", - required: false, - }, - FieldSchema { - name: "docker_cpu_limit", - ty: TypeSchema::Option(Box::new(TypeSchema::F64)), - comment: "Docker container CPU limit (e.g. 1.0 = one core).", - required: false, - }, - FieldSchema { - name: "env_passthrough", - ty: TypeSchema::Option(Box::new(TypeSchema::Array(Box::new(TypeSchema::String)))), - comment: "Environment variables to pass through into the sandbox.", - required: false, - }, - ], - outputs: vec![json_output("snapshot", "Updated config snapshot.")], - }, - "agent_server_status" => ControllerSchema { - namespace: "config", - function: "agent_server_status", - description: "Return agent server runtime URL and status.", - inputs: vec![], - outputs: vec![json_output("status", "Agent server status payload.")], - }, - "reset_local_data" => ControllerSchema { - namespace: "config", - function: "reset_local_data", - description: - "Delete local OpenHuman data for the active config/workspace so the next restart boots clean.", - inputs: vec![], - outputs: vec![json_output("result", "Reset result with removed paths.")], - }, - "get_data_paths" => ControllerSchema { - namespace: "config", - function: "get_data_paths", - description: - "Resolve the OpenHuman data directories (current workspace, default ~/.openhuman, active workspace marker) that reset_local_data would remove. Read-only — performs no filesystem changes.", - inputs: vec![], - outputs: vec![json_output( - "paths", - "Resolved data paths: current_openhuman_dir, default_openhuman_dir, active_workspace_marker_path.", - )], - }, - "get_agent_paths" => ControllerSchema { - namespace: "config", - function: "get_agent_paths", - description: - "Resolve the agent's filesystem roots (action_dir, workspace_dir, projects_dir) so the UI can render live values instead of hard-coded strings. Read-only. Also returns `action_dir_env_override: bool` so the UI knows when OPENHUMAN_ACTION_DIR is forcing the value (Settings → action_dir editing disabled in that case).", - inputs: vec![], - outputs: vec![json_output( - "paths", - "Resolved agent paths: action_dir (acting-tool CWD), workspace_dir (internal state, agent-blocked), projects_dir (default projects home), action_dir_source (env | override | default).", - )], - }, - "update_agent_paths" => ControllerSchema { - namespace: "config", - function: "update_agent_paths", - description: - "Update the agent's editable filesystem roots. Currently only action_dir (the acting-tool sandbox). The path must be absolute; a missing directory is auto-created; it cannot equal the internal workspace_dir. An empty string clears the override and reverts to the default. Applies to new sessions immediately (live policy hot-swap), no restart. OPENHUMAN_ACTION_DIR still overrides at runtime when set.", - inputs: vec![FieldSchema { - name: "action_dir", - ty: TypeSchema::Option(Box::new(TypeSchema::String)), - comment: "New absolute action sandbox path. Empty string clears the override (revert to default). Omit to leave unchanged.", - required: false, - }], - outputs: vec![json_output( - "paths", - "Updated agent paths (same shape as get_agent_paths): action_dir, workspace_dir, projects_dir, action_dir_source.", - )], - }, - "get_onboarding_completed" => ControllerSchema { - namespace: "config", - function: "get_onboarding_completed", - description: "Read whether the user has completed the onboarding flow.", - inputs: vec![], - outputs: vec![FieldSchema { - name: "completed", - ty: TypeSchema::Bool, - comment: "True when onboarding has been completed.", - required: true, - }], - }, - "get_dictation_settings" => ControllerSchema { - namespace: "config", - function: "get_dictation_settings", - description: "Read current voice dictation settings.", - inputs: vec![], - outputs: vec![json_output("settings", "Dictation settings payload.")], - }, - "update_dictation_settings" => ControllerSchema { - namespace: "config", - function: "update_dictation_settings", - description: "Update voice dictation settings.", - inputs: vec![ - optional_bool("enabled", "Enable voice dictation."), - optional_string("hotkey", "Global hotkey string (e.g. Fn)."), - optional_string("activation_mode", "Activation mode: toggle or push."), - optional_bool("llm_refinement", "Enable LLM post-processing of transcription."), - optional_bool("streaming", "Enable WebSocket streaming transcription."), - FieldSchema { - name: "streaming_interval_ms", - ty: TypeSchema::Option(Box::new(TypeSchema::U64)), - comment: "Interval between streaming inference passes (ms).", - required: false, - }, - ], - outputs: vec![json_output("snapshot", "Updated config snapshot.")], - }, - "get_voice_server_settings" => ControllerSchema { - namespace: "config", - function: "get_voice_server_settings", - description: "Read current voice server settings.", - inputs: vec![], - outputs: vec![json_output("settings", "Voice server settings payload.")], - }, - "update_voice_server_settings" => ControllerSchema { - namespace: "config", - function: "update_voice_server_settings", - description: "Update voice server settings.", - inputs: vec![ - optional_bool("auto_start", "Start the voice server automatically with the core."), - optional_string("hotkey", "Voice server hotkey string (e.g. Fn)."), - optional_string("activation_mode", "Activation mode: tap or push."), - optional_bool("skip_cleanup", "Skip LLM cleanup and keep dictation verbatim."), - FieldSchema { - name: "min_duration_secs", - ty: TypeSchema::Option(Box::new(TypeSchema::F64)), - comment: "Minimum recording duration in seconds.", - required: false, - }, - FieldSchema { - name: "silence_threshold", - ty: TypeSchema::Option(Box::new(TypeSchema::F64)), - comment: "RMS energy threshold for silence detection.", - required: false, - }, - FieldSchema { - name: "custom_dictionary", - ty: TypeSchema::Option(Box::new(TypeSchema::Json)), - comment: "Custom vocabulary words to bias whisper toward.", - required: false, - }, - optional_bool( - "always_on_enabled", - "Continuous always-on listening (no hotkey). Opt-in.", - ), - optional_string( - "wake_word", - "Always-on wake word; utterances must contain it (default 'Hey Tiny').", - ), - ], - outputs: vec![json_output("snapshot", "Updated config snapshot.")], - }, - "set_onboarding_completed" => ControllerSchema { - namespace: "config", - function: "set_onboarding_completed", - description: "Mark the onboarding flow as completed or reset it.", - inputs: vec![FieldSchema { - name: "value", - ty: TypeSchema::Bool, - comment: "True to mark completed, false to reset.", - required: true, - }], - outputs: vec![FieldSchema { - name: "completed", - ty: TypeSchema::Bool, - comment: "Updated onboarding completed state.", - required: true, - }], - }, - "update_composio_trigger_settings" => ControllerSchema { - namespace: "config", - function: "update_composio_trigger_settings", - description: - "Update Composio trigger-triage settings. When triage is disabled the \ - local LLM is NOT invoked per trigger — events are still archived to \ - trigger history.", - inputs: vec![ - optional_bool( - "triage_disabled", - "When true, skip the LLM triage turn for all Composio triggers globally.", - ), - FieldSchema { - name: "triage_disabled_toolkits", - ty: TypeSchema::Option(Box::new(TypeSchema::Array(Box::new( - TypeSchema::String, - )))), - comment: "Toolkit slugs that skip LLM triage (e.g. [\"gmail\", \"slack\"]).", - required: false, - }, - ], - outputs: vec![json_output("snapshot", "Updated config snapshot.")], - }, - "get_composio_trigger_settings" => ControllerSchema { - namespace: "config", - function: "get_composio_trigger_settings", - description: "Read current Composio trigger-triage settings.", - inputs: vec![], - outputs: vec![ - FieldSchema { - name: "triage_disabled", - ty: TypeSchema::Bool, - comment: "Whether the global triage-disabled flag is set.", - required: true, - }, - FieldSchema { - name: "triage_disabled_toolkits", - ty: TypeSchema::Array(Box::new(TypeSchema::String)), - comment: "Toolkit slugs that skip LLM triage.", - required: true, - }, - ], - }, - _ => ControllerSchema { - namespace: "config", - function: "unknown", - description: "Unknown config controller function.", - inputs: vec![], - outputs: vec![FieldSchema { - name: "error", - ty: TypeSchema::String, - comment: "Lookup error details.", - required: true, - }], - }, - } -} - -fn handle_get_config(_params: Map) -> ControllerFuture { - Box::pin(async { to_json(config_rpc::load_and_get_config_snapshot().await?) }) -} - -fn handle_get_client_config(_params: Map) -> ControllerFuture { - Box::pin(async move { - log::debug!("[config][rpc] get_client_config enter"); - match config_rpc::load_and_get_client_config_snapshot().await { - Ok(snapshot) => to_json(snapshot), - Err(err) => { - log::warn!("[config][rpc] get_client_config load failed: {err}"); - Err(err) - } - } - }) -} - -fn handle_update_model_settings(params: Map) -> ControllerFuture { - Box::pin(async move { - let update = deserialize_params::(params)?; - let patch = config_rpc::ModelSettingsPatch { - api_url: update.api_url, - inference_url: update.inference_url, - api_key: update.api_key, - default_model: update.default_model, - default_temperature: update.default_temperature, - model_routes: update.model_routes.map(|routes| { - routes - .into_iter() - .map(|r| crate::openhuman::config::ModelRouteConfig { - hint: r.hint, - model: r.model, - }) - .collect() - }), - cloud_providers: update - .cloud_providers - .map(|entries| { - use crate::openhuman::config::schema::cloud_providers::{ - generate_provider_id, is_slug_reserved, migrate_legacy_fields, AuthStyle, - CloudProviderCreds, - }; - let reserved_count = entries - .iter() - .filter(|e| { - let t = e.slug.trim(); - !t.is_empty() && is_slug_reserved(t) - }) - .count(); - if reserved_count > 0 { - log::debug!( - "[config] update_model_settings: dropping {} reserved cloud provider slug(s)", - reserved_count - ); - } - entries - .into_iter() - // Silently drop entries whose (non-empty) slug is reserved — - // typically the migration-seeded "openhuman" / "cloud" / - // "pid" built-ins that the frontend echoes back on every - // save (see `migrations::unify_ai_provider_settings`). - // Empty slugs still fall through so the explicit - // validation error below fires for actual frontend - // bugs. `apply_model_settings` re-injects the existing - // reserved entries from the stored config so they - // aren't dropped on save. - .filter(|e| { - let trimmed = e.slug.trim(); - trimmed.is_empty() || !is_slug_reserved(trimmed) - }) - .map(|e| { - let slug = e.slug.trim().to_string(); - if slug.is_empty() { - return Err( - "cloud provider slug must not be empty".to_string() - ); - } - let auth_style = match e - .auth_style - .as_deref() - .unwrap_or("bearer") - .to_ascii_lowercase() - .as_str() - { - "bearer" => AuthStyle::Bearer, - "anthropic" => AuthStyle::Anthropic, - "openhuman_jwt" | "openhumanjwt" => AuthStyle::OpenhumanJwt, - "none" => AuthStyle::None, - other => { - return Err(format!( - "unknown auth_style '{}'; valid: bearer, anthropic, openhuman_jwt, none", - other - )) - } - }; - let id = e - .id - .filter(|s| !s.trim().is_empty()) - .unwrap_or_else(|| generate_provider_id(&slug)); - let label = e - .label - .filter(|s| !s.trim().is_empty()) - .unwrap_or_else(|| slug.clone()); - let mut entry = CloudProviderCreds { - id, - slug, - label, - endpoint: e.endpoint, - auth_style, - legacy_type: e.legacy_type, - default_model: e.default_model, - }; - // Apply any remaining legacy-field migration. - migrate_legacy_fields(&mut entry); - Ok(entry) - }) - .collect::, String>>() - }) - .transpose()?, - primary_cloud: update.primary_cloud, - chat_provider: update.chat_provider, - reasoning_provider: update.reasoning_provider, - agentic_provider: update.agentic_provider, - coding_provider: update.coding_provider, - memory_provider: update.memory_provider, - embeddings_provider: update.embeddings_provider, - heartbeat_provider: update.heartbeat_provider, - learning_provider: update.learning_provider, - subconscious_provider: update.subconscious_provider, - }; - to_json(config_rpc::load_and_apply_model_settings(patch).await?) - }) -} - -fn handle_update_memory_settings(params: Map) -> ControllerFuture { - Box::pin(async move { - let update = deserialize_params::(params)?; - let patch = config_rpc::MemorySettingsPatch { - backend: update.backend, - auto_save: update.auto_save, - embedding_provider: update.embedding_provider, - embedding_model: update.embedding_model, - embedding_dimensions: update.embedding_dimensions, - memory_window: update.memory_window, - }; - to_json(config_rpc::load_and_apply_memory_settings(patch).await?) - }) -} - -fn handle_update_screen_intelligence_settings(params: Map) -> ControllerFuture { - Box::pin(async move { - let update = deserialize_params::(params)?; - let patch = config_rpc::ScreenIntelligenceSettingsPatch { - enabled: update.enabled, - capture_policy: update.capture_policy, - policy_mode: update.policy_mode, - baseline_fps: update.baseline_fps, - vision_enabled: update.vision_enabled, - autocomplete_enabled: update.autocomplete_enabled, - use_vision_model: update.use_vision_model, - keep_screenshots: update.keep_screenshots, - allowlist: update.allowlist, - denylist: update.denylist, - }; - to_json(config_rpc::load_and_apply_screen_intelligence_settings(patch).await?) - }) -} - -fn handle_update_runtime_settings(params: Map) -> ControllerFuture { - Box::pin(async move { - let update = deserialize_params::(params)?; - let patch = config_rpc::RuntimeSettingsPatch { - kind: update.kind, - reasoning_enabled: update.reasoning_enabled, - }; - to_json(config_rpc::load_and_apply_runtime_settings(patch).await?) - }) -} - -fn handle_get_autonomy_settings(_params: Map) -> ControllerFuture { - Box::pin(async move { to_json(config_rpc::get_autonomy_settings().await?) }) -} - -fn handle_update_autonomy_settings(params: Map) -> ControllerFuture { - Box::pin(async move { - let update = deserialize_params::(params)?; - let patch = config_rpc::AutonomySettingsPatch { - level: update.level, - workspace_only: update.workspace_only, - allowed_commands: update.allowed_commands, - forbidden_paths: update.forbidden_paths, - trusted_roots: update.trusted_roots, - allow_tool_install: update.allow_tool_install, - max_actions_per_hour: update - .max_actions_per_hour - .map(|v| u32::try_from(v).unwrap_or(u32::MAX)), - auto_approve: update.auto_approve, - require_task_plan_approval: update.require_task_plan_approval, - }; - to_json(config_rpc::load_and_apply_autonomy_settings(patch).await?) - }) -} - -fn handle_get_agent_settings(_params: Map) -> ControllerFuture { - Box::pin(async { - log::debug!("[config][rpc] get_agent_settings enter"); - match config_rpc::get_agent_settings().await { - Ok(outcome) => { - log::debug!("[config][rpc] get_agent_settings ok"); - to_json(outcome) - } - Err(err) => { - log::warn!("[config][rpc] get_agent_settings failed: {err}"); - Err(err) - } - } - }) -} - -fn handle_update_agent_settings(params: Map) -> ControllerFuture { - Box::pin(async move { - log::debug!("[config][rpc] update_agent_settings enter"); - let update = match deserialize_params::(params) { - Ok(u) => u, - Err(err) => { - log::warn!("[config][rpc] update_agent_settings invalid params: {err}"); - return Err(err); - } - }; - let patch = config_rpc::AgentSettingsPatch { - agent_timeout_secs: update.agent_timeout_secs, - }; - match config_rpc::load_and_apply_agent_settings(patch).await { - Ok(outcome) => { - log::debug!("[config][rpc] update_agent_settings ok"); - to_json(outcome) - } - Err(err) => { - log::warn!("[config][rpc] update_agent_settings failed: {err}"); - Err(err) - } - } - }) -} - -fn handle_update_browser_settings(params: Map) -> ControllerFuture { - Box::pin(async move { - let update = deserialize_params::(params)?; - let patch = config_rpc::BrowserSettingsPatch { - enabled: update.enabled, - }; - to_json(config_rpc::load_and_apply_browser_settings(patch).await?) - }) -} - -fn handle_update_local_ai_settings(params: Map) -> ControllerFuture { - Box::pin(async move { - let update = deserialize_params::(params)?; - let base_url = match update.base_url { - None => None, - Some(Value::Null) => Some(None), - Some(Value::String(value)) => Some(Some(value)), - Some(_) => return Err("invalid params: base_url must be a string or null".to_string()), - }; - let patch = config_rpc::LocalAiSettingsPatch { - runtime_enabled: update.runtime_enabled, - opt_in_confirmed: update.opt_in_confirmed, - provider: update.provider, - base_url, - model_id: update.model_id, - chat_model_id: update.chat_model_id, - usage_embeddings: update.usage_embeddings, - usage_heartbeat: update.usage_heartbeat, - usage_learning_reflection: update.usage_learning_reflection, - usage_subconscious: update.usage_subconscious, - }; - to_json(config_rpc::load_and_apply_local_ai_settings(patch).await?) - }) -} - -fn handle_get_runtime_flags(_params: Map) -> ControllerFuture { - Box::pin(async { to_json(config_rpc::get_runtime_flags()) }) -} - -fn handle_resolve_api_url(_params: Map) -> ControllerFuture { - Box::pin(async { to_json(config_rpc::load_and_resolve_api_url().await?) }) -} - -fn handle_set_browser_allow_all(params: Map) -> ControllerFuture { - Box::pin(async move { - let payload = deserialize_params::(params)?; - to_json(config_rpc::set_browser_allow_all(payload.enabled)?) - }) -} - -fn handle_workspace_onboarding_flag_exists(params: Map) -> ControllerFuture { - Box::pin(async move { - let payload = deserialize_params::(params)?; - to_json( - config_rpc::workspace_onboarding_flag_resolve( - payload.flag_name, - DEFAULT_ONBOARDING_FLAG_NAME, - ) - .await?, - ) - }) -} - -fn handle_workspace_onboarding_flag_set(params: Map) -> ControllerFuture { - Box::pin(async move { - let payload = deserialize_params::(params)?; - to_json( - config_rpc::workspace_onboarding_flag_set( - payload.flag_name, - DEFAULT_ONBOARDING_FLAG_NAME, - payload.value, - ) - .await?, - ) - }) -} - -fn handle_update_analytics_settings(params: Map) -> ControllerFuture { - Box::pin(async move { - let update = deserialize_params::(params)?; - let patch = config_rpc::AnalyticsSettingsPatch { - enabled: update.enabled, - }; - to_json(config_rpc::load_and_apply_analytics_settings(patch).await?) - }) -} - -fn handle_get_analytics_settings(_params: Map) -> ControllerFuture { - Box::pin(async { - let config = config_rpc::load_config_with_timeout().await?; - let result = serde_json::json!({ - "enabled": config.observability.analytics_enabled, - }); - to_json(RpcOutcome::new( - result, - vec!["analytics settings read".to_string()], - )) - }) -} - -fn handle_get_dashboard_settings(_params: Map) -> ControllerFuture { - Box::pin(async { to_json(config_rpc::get_dashboard_settings().await?) }) -} - -fn handle_update_meet_settings(params: Map) -> ControllerFuture { - Box::pin(async move { - log::debug!("[config][rpc] update_meet_settings enter"); - let update = match deserialize_params::(params) { - Ok(u) => u, - Err(err) => { - log::warn!("[config][rpc] update_meet_settings invalid params: {err}"); - return Err(err); - } - }; - log::debug!( - "[config][rpc] update_meet_settings patch auto_orchestrator_handoff={:?}", - update.auto_orchestrator_handoff - ); - let patch = config_rpc::MeetSettingsPatch { - auto_orchestrator_handoff: update.auto_orchestrator_handoff, - }; - match config_rpc::load_and_apply_meet_settings(patch).await { - Ok(outcome) => { - log::debug!("[config][rpc] update_meet_settings ok"); - to_json(outcome) - } - Err(err) => { - log::warn!("[config][rpc] update_meet_settings failed: {err}"); - Err(err) - } - } - }) -} - -fn handle_get_meet_settings(_params: Map) -> ControllerFuture { - Box::pin(async { - log::debug!("[config][rpc] get_meet_settings enter"); - let config = match config_rpc::load_config_with_timeout().await { - Ok(c) => c, - Err(err) => { - log::warn!("[config][rpc] get_meet_settings load failed: {err}"); - return Err(err); - } - }; - let auto_orchestrator_handoff = config.meet.auto_orchestrator_handoff; - log::debug!( - "[config][rpc] get_meet_settings ok auto_orchestrator_handoff={auto_orchestrator_handoff}" - ); - let result = serde_json::json!({ - "auto_orchestrator_handoff": auto_orchestrator_handoff, - }); - to_json(RpcOutcome::new( - result, - vec!["meet settings read".to_string()], - )) - }) -} - -fn handle_agent_server_status(_params: Map) -> ControllerFuture { - Box::pin(async { to_json(config_rpc::agent_server_status()) }) -} - -fn handle_reset_local_data(_params: Map) -> ControllerFuture { - Box::pin(async { to_json(config_rpc::reset_local_data().await?) }) -} - -fn handle_get_data_paths(_params: Map) -> ControllerFuture { - Box::pin(async { - log::debug!("[config][rpc] get_data_paths enter"); - match config_rpc::get_data_paths().await { - Ok(outcome) => { - log::debug!("[config][rpc] get_data_paths ok"); - to_json(outcome) - } - Err(err) => { - log::warn!("[config][rpc] get_data_paths fail: {err}"); - Err(err) - } - } - }) -} - -fn handle_get_agent_paths(_params: Map) -> ControllerFuture { - Box::pin(async { - log::debug!("[config][rpc] get_agent_paths enter"); - match config_rpc::get_agent_paths().await { - Ok(outcome) => { - log::debug!("[config][rpc] get_agent_paths ok"); - to_json(outcome) - } - Err(err) => { - log::warn!("[config][rpc] get_agent_paths fail: {err}"); - Err(err) - } - } - }) -} - -fn handle_update_agent_paths(params: Map) -> ControllerFuture { - Box::pin(async move { - log::debug!("[config][rpc] update_agent_paths enter"); - let update = match deserialize_params::(params) { - Ok(u) => u, - Err(err) => { - log::warn!("[config][rpc] update_agent_paths invalid params: {err}"); - return Err(err); - } - }; - let patch = config_rpc::AgentPathsPatch { - action_dir: update.action_dir, - }; - match config_rpc::load_and_apply_agent_paths_settings(patch).await { - Ok(outcome) => { - log::debug!("[config][rpc] update_agent_paths ok"); - to_json(outcome) - } - Err(err) => { - log::warn!("[config][rpc] update_agent_paths failed: {err}"); - Err(err) - } - } - }) -} - -fn handle_get_onboarding_completed(_params: Map) -> ControllerFuture { - Box::pin(async { to_json(config_rpc::get_onboarding_completed().await?) }) -} - -fn handle_get_dictation_settings(_params: Map) -> ControllerFuture { - Box::pin(async { to_json(config_rpc::get_dictation_settings().await?) }) -} - -fn handle_update_dictation_settings(params: Map) -> ControllerFuture { - Box::pin(async move { - let update = deserialize_params::(params)?; - let patch = config_rpc::DictationSettingsPatch { - enabled: update.enabled, - hotkey: update.hotkey, - activation_mode: update.activation_mode, - llm_refinement: update.llm_refinement, - streaming: update.streaming, - streaming_interval_ms: update.streaming_interval_ms, - }; - to_json(config_rpc::load_and_apply_dictation_settings(patch).await?) - }) -} - -fn handle_get_voice_server_settings(_params: Map) -> ControllerFuture { - Box::pin(async { to_json(config_rpc::get_voice_server_settings().await?) }) -} - -fn handle_update_voice_server_settings(params: Map) -> ControllerFuture { - Box::pin(async move { - let update = deserialize_params::(params)?; - let patch = config_rpc::VoiceServerSettingsPatch { - auto_start: update.auto_start, - hotkey: update.hotkey, - activation_mode: update.activation_mode, - skip_cleanup: update.skip_cleanup, - min_duration_secs: update.min_duration_secs, - silence_threshold: update.silence_threshold, - custom_dictionary: update.custom_dictionary, - always_on_enabled: update.always_on_enabled, - wake_word: update.wake_word, - }; - let result = config_rpc::load_and_apply_voice_server_settings(patch).await?; - // Apply the always-on toggle live (start/idle the capture loop) so the - // Settings switch takes effect without a restart. Don't fail the RPC if - // the reload hiccups, but DO surface it — otherwise the saved setting - // silently wouldn't apply until the next launch. - match config_rpc::load_config_with_timeout().await { - Ok(config) => { - log::debug!("[config][rpc] voice settings saved; applying live always-on state"); - crate::openhuman::voice::always_on::start_if_enabled(&config).await; - } - Err(error) => { - log::warn!( - "[config][rpc] voice settings saved, but live always-on apply was skipped \ - (config reload failed): {error}" - ); - } - } - to_json(result) - }) -} - -fn handle_set_onboarding_completed(params: Map) -> ControllerFuture { - Box::pin(async move { - let payload = deserialize_params::(params)?; - to_json(config_rpc::set_onboarding_completed(payload.value).await?) - }) -} - -fn handle_update_composio_trigger_settings(params: Map) -> ControllerFuture { - Box::pin(async move { - log::debug!("[config][rpc] update_composio_trigger_settings enter"); - let update = match deserialize_params::(params) { - Ok(u) => u, - Err(err) => { - log::warn!("[config][rpc] update_composio_trigger_settings invalid params: {err}"); - return Err(err); - } - }; - let patch = config_rpc::ComposioTriggerSettingsPatch { - triage_disabled: update.triage_disabled, - triage_disabled_toolkits: update.triage_disabled_toolkits, - }; - match config_rpc::load_and_apply_composio_trigger_settings(patch).await { - Ok(outcome) => { - log::debug!("[config][rpc] update_composio_trigger_settings ok"); - to_json(outcome) - } - Err(err) => { - log::warn!("[config][rpc] update_composio_trigger_settings failed: {err}"); - Err(err) - } - } - }) -} - -fn handle_get_composio_trigger_settings(_params: Map) -> ControllerFuture { - Box::pin(async { - log::debug!("[config][rpc] get_composio_trigger_settings enter"); - match config_rpc::get_composio_trigger_settings().await { - Ok(outcome) => { - log::debug!("[config][rpc] get_composio_trigger_settings ok"); - to_json(outcome) - } - Err(err) => { - log::warn!("[config][rpc] get_composio_trigger_settings failed: {err}"); - Err(err) - } - } - }) -} - -fn handle_update_search_settings(params: Map) -> ControllerFuture { - Box::pin(async move { - log::debug!("[config][rpc] update_search_settings enter"); - let update = match deserialize_params::(params) { - Ok(u) => u, - Err(err) => { - log::warn!("[config][rpc] update_search_settings invalid params: {err}"); - return Err(err); - } - }; - let patch = config_rpc::SearchSettingsPatch { - engine: update.engine, - max_results: update.max_results, - timeout_secs: update.timeout_secs, - parallel_api_key: update.parallel_api_key, - brave_api_key: update.brave_api_key, - querit_api_key: update.querit_api_key, - allowed_domains: update.allowed_domains, - allow_all: update.allow_all, - }; - match config_rpc::load_and_apply_search_settings(patch).await { - Ok(outcome) => { - log::debug!("[config][rpc] update_search_settings ok"); - to_json(outcome) - } - Err(err) => { - log::warn!("[config][rpc] update_search_settings failed: {err}"); - Err(err) - } - } - }) -} - -fn handle_get_search_settings(_params: Map) -> ControllerFuture { - Box::pin(async { - log::debug!("[config][rpc] get_search_settings enter"); - match config_rpc::get_search_settings().await { - Ok(outcome) => { - log::debug!("[config][rpc] get_search_settings ok"); - to_json(outcome) - } - Err(err) => { - log::warn!("[config][rpc] get_search_settings failed: {err}"); - Err(err) - } - } - }) -} - -fn handle_get_activity_level_settings(_params: Map) -> ControllerFuture { - Box::pin(async move { to_json(config_rpc::get_activity_level_settings().await?) }) -} - -fn handle_update_activity_level_settings(params: Map) -> ControllerFuture { - Box::pin(async move { - let update = deserialize_params::(params)?; - let patch = config_rpc::ActivityLevelSettingsPatch { - level: update.level, - }; - to_json(config_rpc::load_and_apply_activity_level_settings(patch).await?) - }) -} - -#[derive(Debug, Deserialize)] -struct MemorySyncSettingsUpdate { - sync_interval_secs: Option, -} - -fn handle_get_memory_sync_settings(_params: Map) -> ControllerFuture { - Box::pin(async move { to_json(config_rpc::get_memory_sync_settings().await?) }) -} - -fn handle_update_memory_sync_settings(params: Map) -> ControllerFuture { - Box::pin(async move { - let update = deserialize_params::(params)?; - let patch = config_rpc::MemorySyncSettingsPatch { - sync_interval_secs: update.sync_interval_secs, - }; - to_json(config_rpc::load_and_apply_memory_sync_settings(patch).await?) - }) -} - -#[derive(Debug, Deserialize)] -struct SandboxSettingsUpdate { - backend: Option, - enabled: Option, - docker_image: Option, - docker_memory_limit_mb: Option, - docker_cpu_limit: Option, - env_passthrough: Option>, -} - -fn handle_get_sandbox_settings(_params: Map) -> ControllerFuture { - Box::pin(async move { to_json(config_rpc::get_sandbox_settings().await?) }) -} - -fn handle_update_sandbox_settings(params: Map) -> ControllerFuture { - Box::pin(async move { - let update = deserialize_params::(params)?; - let patch = config_rpc::SandboxSettingsPatch { - backend: update.backend, - enabled: update.enabled, - docker_image: update.docker_image, - docker_memory_limit_mb: update.docker_memory_limit_mb, - docker_cpu_limit: update.docker_cpu_limit, - env_passthrough: update.env_passthrough, - }; - to_json(config_rpc::load_and_apply_sandbox_settings(patch).await?) - }) -} - -fn deserialize_params(params: Map) -> Result { - serde_json::from_value(Value::Object(params)).map_err(|e| format!("invalid params: {e}")) -} - -fn deserialize_present_json<'de, D>(deserializer: D) -> Result, D::Error> -where - D: Deserializer<'de>, -{ - Value::deserialize(deserializer).map(Some) -} - -fn optional_string(name: &'static str, comment: &'static str) -> FieldSchema { - FieldSchema { - name, - ty: TypeSchema::Option(Box::new(TypeSchema::String)), - comment, - required: false, - } -} - -fn optional_json(name: &'static str, comment: &'static str) -> FieldSchema { - FieldSchema { - name, - ty: TypeSchema::Option(Box::new(TypeSchema::Json)), - comment, - required: false, - } -} - -fn required_string(name: &'static str, comment: &'static str) -> FieldSchema { - FieldSchema { - name, - ty: TypeSchema::String, - comment, - required: true, - } -} - -fn optional_bool(name: &'static str, comment: &'static str) -> FieldSchema { - FieldSchema { - name, - ty: TypeSchema::Option(Box::new(TypeSchema::Bool)), - comment, - required: false, - } -} - -fn json_output(name: &'static str, comment: &'static str) -> FieldSchema { - FieldSchema { - name, - ty: TypeSchema::Json, - comment, - required: true, - } -} - -fn to_json(outcome: RpcOutcome) -> Result { - outcome.into_cli_compatible_json() -} - -#[cfg(test)] -#[path = "schemas_tests.rs"] -mod tests; diff --git a/src/openhuman/config/schemas/controllers.rs b/src/openhuman/config/schemas/controllers.rs new file mode 100644 index 0000000000..333da97627 --- /dev/null +++ b/src/openhuman/config/schemas/controllers.rs @@ -0,0 +1,921 @@ +use serde_json::{Map, Value}; + +use crate::core::all::{ControllerFuture, RegisteredController}; +use crate::core::ControllerSchema; +use crate::openhuman::config::rpc as config_rpc; + +use super::helpers::{ + deserialize_params, to_json, ActivityLevelSettingsUpdate, AgentPathsUpdate, + AgentSettingsUpdate, AnalyticsSettingsUpdate, AutonomySettingsUpdate, BrowserSettingsUpdate, + ComposioTriggerSettingsUpdate, DictationSettingsUpdate, LocalAiSettingsUpdate, + MeetSettingsUpdate, MemorySettingsUpdate, MemorySyncSettingsUpdate, ModelSettingsUpdate, + OnboardingCompletedSetParams, RuntimeSettingsUpdate, SandboxSettingsUpdate, + ScreenIntelligenceSettingsUpdate, SearchSettingsUpdate, SetBrowserAllowAllParams, + VoiceServerSettingsUpdate, WorkspaceOnboardingFlagParams, WorkspaceOnboardingFlagSetParams, + DEFAULT_ONBOARDING_FLAG_NAME, +}; +use super::schema_defs::schemas; + +pub fn all_controller_schemas() -> Vec { + vec![ + schemas("get_config"), + schemas("get_client_config"), + schemas("update_model_settings"), + schemas("update_memory_settings"), + schemas("update_screen_intelligence_settings"), + schemas("update_runtime_settings"), + schemas("update_browser_settings"), + schemas("update_local_ai_settings"), + schemas("resolve_api_url"), + schemas("get_runtime_flags"), + schemas("set_browser_allow_all"), + schemas("workspace_onboarding_flag_exists"), + schemas("workspace_onboarding_flag_set"), + schemas("update_analytics_settings"), + schemas("get_analytics_settings"), + schemas("get_dashboard_settings"), + schemas("update_meet_settings"), + schemas("get_meet_settings"), + schemas("agent_server_status"), + schemas("reset_local_data"), + schemas("get_data_paths"), + schemas("get_agent_paths"), + schemas("update_agent_paths"), + schemas("get_onboarding_completed"), + schemas("set_onboarding_completed"), + schemas("get_dictation_settings"), + schemas("update_dictation_settings"), + schemas("get_voice_server_settings"), + schemas("update_voice_server_settings"), + schemas("update_composio_trigger_settings"), + schemas("get_composio_trigger_settings"), + schemas("get_autonomy_settings"), + schemas("update_autonomy_settings"), + schemas("get_agent_settings"), + schemas("update_agent_settings"), + schemas("update_search_settings"), + schemas("get_search_settings"), + schemas("get_activity_level_settings"), + schemas("update_activity_level_settings"), + schemas("get_memory_sync_settings"), + schemas("update_memory_sync_settings"), + schemas("get_sandbox_settings"), + schemas("update_sandbox_settings"), + ] +} + +pub fn all_registered_controllers() -> Vec { + vec![ + RegisteredController { + schema: schemas("get_config"), + handler: handle_get_config, + }, + RegisteredController { + schema: schemas("get_client_config"), + handler: handle_get_client_config, + }, + RegisteredController { + schema: schemas("update_model_settings"), + handler: handle_update_model_settings, + }, + RegisteredController { + schema: schemas("update_memory_settings"), + handler: handle_update_memory_settings, + }, + RegisteredController { + schema: schemas("update_screen_intelligence_settings"), + handler: handle_update_screen_intelligence_settings, + }, + RegisteredController { + schema: schemas("update_runtime_settings"), + handler: handle_update_runtime_settings, + }, + RegisteredController { + schema: schemas("update_browser_settings"), + handler: handle_update_browser_settings, + }, + RegisteredController { + schema: schemas("update_local_ai_settings"), + handler: handle_update_local_ai_settings, + }, + RegisteredController { + schema: schemas("resolve_api_url"), + handler: handle_resolve_api_url, + }, + RegisteredController { + schema: schemas("get_runtime_flags"), + handler: handle_get_runtime_flags, + }, + RegisteredController { + schema: schemas("set_browser_allow_all"), + handler: handle_set_browser_allow_all, + }, + RegisteredController { + schema: schemas("workspace_onboarding_flag_exists"), + handler: handle_workspace_onboarding_flag_exists, + }, + RegisteredController { + schema: schemas("workspace_onboarding_flag_set"), + handler: handle_workspace_onboarding_flag_set, + }, + RegisteredController { + schema: schemas("update_analytics_settings"), + handler: handle_update_analytics_settings, + }, + RegisteredController { + schema: schemas("get_analytics_settings"), + handler: handle_get_analytics_settings, + }, + RegisteredController { + schema: schemas("get_dashboard_settings"), + handler: handle_get_dashboard_settings, + }, + RegisteredController { + schema: schemas("update_meet_settings"), + handler: handle_update_meet_settings, + }, + RegisteredController { + schema: schemas("get_meet_settings"), + handler: handle_get_meet_settings, + }, + RegisteredController { + schema: schemas("agent_server_status"), + handler: handle_agent_server_status, + }, + RegisteredController { + schema: schemas("reset_local_data"), + handler: handle_reset_local_data, + }, + RegisteredController { + schema: schemas("get_data_paths"), + handler: handle_get_data_paths, + }, + RegisteredController { + schema: schemas("get_agent_paths"), + handler: handle_get_agent_paths, + }, + RegisteredController { + schema: schemas("update_agent_paths"), + handler: handle_update_agent_paths, + }, + RegisteredController { + schema: schemas("get_onboarding_completed"), + handler: handle_get_onboarding_completed, + }, + RegisteredController { + schema: schemas("set_onboarding_completed"), + handler: handle_set_onboarding_completed, + }, + RegisteredController { + schema: schemas("get_dictation_settings"), + handler: handle_get_dictation_settings, + }, + RegisteredController { + schema: schemas("update_dictation_settings"), + handler: handle_update_dictation_settings, + }, + RegisteredController { + schema: schemas("get_voice_server_settings"), + handler: handle_get_voice_server_settings, + }, + RegisteredController { + schema: schemas("update_voice_server_settings"), + handler: handle_update_voice_server_settings, + }, + RegisteredController { + schema: schemas("update_composio_trigger_settings"), + handler: handle_update_composio_trigger_settings, + }, + RegisteredController { + schema: schemas("get_composio_trigger_settings"), + handler: handle_get_composio_trigger_settings, + }, + RegisteredController { + schema: schemas("get_autonomy_settings"), + handler: handle_get_autonomy_settings, + }, + RegisteredController { + schema: schemas("update_autonomy_settings"), + handler: handle_update_autonomy_settings, + }, + RegisteredController { + schema: schemas("get_agent_settings"), + handler: handle_get_agent_settings, + }, + RegisteredController { + schema: schemas("update_agent_settings"), + handler: handle_update_agent_settings, + }, + RegisteredController { + schema: schemas("update_search_settings"), + handler: handle_update_search_settings, + }, + RegisteredController { + schema: schemas("get_search_settings"), + handler: handle_get_search_settings, + }, + RegisteredController { + schema: schemas("get_activity_level_settings"), + handler: handle_get_activity_level_settings, + }, + RegisteredController { + schema: schemas("update_activity_level_settings"), + handler: handle_update_activity_level_settings, + }, + RegisteredController { + schema: schemas("get_memory_sync_settings"), + handler: handle_get_memory_sync_settings, + }, + RegisteredController { + schema: schemas("update_memory_sync_settings"), + handler: handle_update_memory_sync_settings, + }, + RegisteredController { + schema: schemas("get_sandbox_settings"), + handler: handle_get_sandbox_settings, + }, + RegisteredController { + schema: schemas("update_sandbox_settings"), + handler: handle_update_sandbox_settings, + }, + ] +} + +fn handle_get_config(_params: Map) -> ControllerFuture { + Box::pin(async { to_json(config_rpc::load_and_get_config_snapshot().await?) }) +} + +fn handle_get_client_config(_params: Map) -> ControllerFuture { + Box::pin(async move { + log::debug!("[config][rpc] get_client_config enter"); + match config_rpc::load_and_get_client_config_snapshot().await { + Ok(snapshot) => to_json(snapshot), + Err(err) => { + log::warn!("[config][rpc] get_client_config load failed: {err}"); + Err(err) + } + } + }) +} + +fn handle_update_model_settings(params: Map) -> ControllerFuture { + Box::pin(async move { + let update = deserialize_params::(params)?; + let patch = config_rpc::ModelSettingsPatch { + api_url: update.api_url, + inference_url: update.inference_url, + api_key: update.api_key, + default_model: update.default_model, + default_temperature: update.default_temperature, + model_routes: update.model_routes.map(|routes| { + routes + .into_iter() + .map(|r| crate::openhuman::config::ModelRouteConfig { + hint: r.hint, + model: r.model, + }) + .collect() + }), + cloud_providers: update + .cloud_providers + .map(|entries| { + use crate::openhuman::config::schema::cloud_providers::{ + generate_provider_id, is_slug_reserved, migrate_legacy_fields, AuthStyle, + CloudProviderCreds, + }; + let reserved_count = entries + .iter() + .filter(|e| { + let t = e.slug.trim(); + !t.is_empty() && is_slug_reserved(t) + }) + .count(); + if reserved_count > 0 { + log::debug!( + "[config] update_model_settings: dropping {} reserved cloud provider slug(s)", + reserved_count + ); + } + entries + .into_iter() + // Silently drop entries whose (non-empty) slug is reserved — + // typically the migration-seeded "openhuman" / "cloud" / + // "pid" built-ins that the frontend echoes back on every + // save (see `migrations::unify_ai_provider_settings`). + // Empty slugs still fall through so the explicit + // validation error below fires for actual frontend + // bugs. `apply_model_settings` re-injects the existing + // reserved entries from the stored config so they + // aren't dropped on save. + .filter(|e| { + let trimmed = e.slug.trim(); + trimmed.is_empty() || !is_slug_reserved(trimmed) + }) + .map(|e| { + let slug = e.slug.trim().to_string(); + if slug.is_empty() { + return Err( + "cloud provider slug must not be empty".to_string() + ); + } + let auth_style = match e + .auth_style + .as_deref() + .unwrap_or("bearer") + .to_ascii_lowercase() + .as_str() + { + "bearer" => AuthStyle::Bearer, + "anthropic" => AuthStyle::Anthropic, + "openhuman_jwt" | "openhumanjwt" => AuthStyle::OpenhumanJwt, + "none" => AuthStyle::None, + other => { + return Err(format!( + "unknown auth_style '{}'; valid: bearer, anthropic, openhuman_jwt, none", + other + )) + } + }; + let id = e + .id + .filter(|s| !s.trim().is_empty()) + .unwrap_or_else(|| generate_provider_id(&slug)); + let label = e + .label + .filter(|s| !s.trim().is_empty()) + .unwrap_or_else(|| slug.clone()); + let mut entry = CloudProviderCreds { + id, + slug, + label, + endpoint: e.endpoint, + auth_style, + legacy_type: e.legacy_type, + default_model: e.default_model, + }; + // Apply any remaining legacy-field migration. + migrate_legacy_fields(&mut entry); + Ok(entry) + }) + .collect::, String>>() + }) + .transpose()?, + primary_cloud: update.primary_cloud, + chat_provider: update.chat_provider, + reasoning_provider: update.reasoning_provider, + agentic_provider: update.agentic_provider, + coding_provider: update.coding_provider, + memory_provider: update.memory_provider, + embeddings_provider: update.embeddings_provider, + heartbeat_provider: update.heartbeat_provider, + learning_provider: update.learning_provider, + subconscious_provider: update.subconscious_provider, + }; + to_json(config_rpc::load_and_apply_model_settings(patch).await?) + }) +} + +fn handle_update_memory_settings(params: Map) -> ControllerFuture { + Box::pin(async move { + let update = deserialize_params::(params)?; + let patch = config_rpc::MemorySettingsPatch { + backend: update.backend, + auto_save: update.auto_save, + embedding_provider: update.embedding_provider, + embedding_model: update.embedding_model, + embedding_dimensions: update.embedding_dimensions, + memory_window: update.memory_window, + }; + to_json(config_rpc::load_and_apply_memory_settings(patch).await?) + }) +} + +fn handle_update_screen_intelligence_settings(params: Map) -> ControllerFuture { + Box::pin(async move { + let update = deserialize_params::(params)?; + let patch = config_rpc::ScreenIntelligenceSettingsPatch { + enabled: update.enabled, + capture_policy: update.capture_policy, + policy_mode: update.policy_mode, + baseline_fps: update.baseline_fps, + vision_enabled: update.vision_enabled, + autocomplete_enabled: update.autocomplete_enabled, + use_vision_model: update.use_vision_model, + keep_screenshots: update.keep_screenshots, + allowlist: update.allowlist, + denylist: update.denylist, + }; + to_json(config_rpc::load_and_apply_screen_intelligence_settings(patch).await?) + }) +} + +fn handle_update_runtime_settings(params: Map) -> ControllerFuture { + Box::pin(async move { + let update = deserialize_params::(params)?; + let patch = config_rpc::RuntimeSettingsPatch { + kind: update.kind, + reasoning_enabled: update.reasoning_enabled, + }; + to_json(config_rpc::load_and_apply_runtime_settings(patch).await?) + }) +} + +pub(super) fn handle_get_autonomy_settings(_params: Map) -> ControllerFuture { + Box::pin(async move { to_json(config_rpc::get_autonomy_settings().await?) }) +} + +pub(super) fn handle_update_autonomy_settings(params: Map) -> ControllerFuture { + Box::pin(async move { + let update = deserialize_params::(params)?; + let patch = config_rpc::AutonomySettingsPatch { + level: update.level, + workspace_only: update.workspace_only, + allowed_commands: update.allowed_commands, + forbidden_paths: update.forbidden_paths, + trusted_roots: update.trusted_roots, + allow_tool_install: update.allow_tool_install, + max_actions_per_hour: update + .max_actions_per_hour + .map(|v| u32::try_from(v).unwrap_or(u32::MAX)), + auto_approve: update.auto_approve, + require_task_plan_approval: update.require_task_plan_approval, + }; + to_json(config_rpc::load_and_apply_autonomy_settings(patch).await?) + }) +} + +fn handle_get_agent_settings(_params: Map) -> ControllerFuture { + Box::pin(async { + log::debug!("[config][rpc] get_agent_settings enter"); + match config_rpc::get_agent_settings().await { + Ok(outcome) => { + log::debug!("[config][rpc] get_agent_settings ok"); + to_json(outcome) + } + Err(err) => { + log::warn!("[config][rpc] get_agent_settings failed: {err}"); + Err(err) + } + } + }) +} + +fn handle_update_agent_settings(params: Map) -> ControllerFuture { + Box::pin(async move { + log::debug!("[config][rpc] update_agent_settings enter"); + let update = match deserialize_params::(params) { + Ok(u) => u, + Err(err) => { + log::warn!("[config][rpc] update_agent_settings invalid params: {err}"); + return Err(err); + } + }; + let patch = config_rpc::AgentSettingsPatch { + agent_timeout_secs: update.agent_timeout_secs, + }; + match config_rpc::load_and_apply_agent_settings(patch).await { + Ok(outcome) => { + log::debug!("[config][rpc] update_agent_settings ok"); + to_json(outcome) + } + Err(err) => { + log::warn!("[config][rpc] update_agent_settings failed: {err}"); + Err(err) + } + } + }) +} + +fn handle_update_browser_settings(params: Map) -> ControllerFuture { + Box::pin(async move { + let update = deserialize_params::(params)?; + let patch = config_rpc::BrowserSettingsPatch { + enabled: update.enabled, + }; + to_json(config_rpc::load_and_apply_browser_settings(patch).await?) + }) +} + +fn handle_update_local_ai_settings(params: Map) -> ControllerFuture { + Box::pin(async move { + let update = deserialize_params::(params)?; + let base_url = match update.base_url { + None => None, + Some(Value::Null) => Some(None), + Some(Value::String(value)) => Some(Some(value)), + Some(_) => return Err("invalid params: base_url must be a string or null".to_string()), + }; + let patch = config_rpc::LocalAiSettingsPatch { + runtime_enabled: update.runtime_enabled, + opt_in_confirmed: update.opt_in_confirmed, + provider: update.provider, + base_url, + model_id: update.model_id, + chat_model_id: update.chat_model_id, + usage_embeddings: update.usage_embeddings, + usage_heartbeat: update.usage_heartbeat, + usage_learning_reflection: update.usage_learning_reflection, + usage_subconscious: update.usage_subconscious, + }; + to_json(config_rpc::load_and_apply_local_ai_settings(patch).await?) + }) +} + +fn handle_get_runtime_flags(_params: Map) -> ControllerFuture { + Box::pin(async { to_json(config_rpc::get_runtime_flags()) }) +} + +fn handle_resolve_api_url(_params: Map) -> ControllerFuture { + Box::pin(async { to_json(config_rpc::load_and_resolve_api_url().await?) }) +} + +fn handle_set_browser_allow_all(params: Map) -> ControllerFuture { + Box::pin(async move { + let payload = deserialize_params::(params)?; + to_json(config_rpc::set_browser_allow_all(payload.enabled)?) + }) +} + +fn handle_workspace_onboarding_flag_exists(params: Map) -> ControllerFuture { + Box::pin(async move { + let payload = deserialize_params::(params)?; + to_json( + config_rpc::workspace_onboarding_flag_resolve( + payload.flag_name, + DEFAULT_ONBOARDING_FLAG_NAME, + ) + .await?, + ) + }) +} + +fn handle_workspace_onboarding_flag_set(params: Map) -> ControllerFuture { + Box::pin(async move { + let payload = deserialize_params::(params)?; + to_json( + config_rpc::workspace_onboarding_flag_set( + payload.flag_name, + DEFAULT_ONBOARDING_FLAG_NAME, + payload.value, + ) + .await?, + ) + }) +} + +fn handle_update_analytics_settings(params: Map) -> ControllerFuture { + Box::pin(async move { + let update = deserialize_params::(params)?; + let patch = config_rpc::AnalyticsSettingsPatch { + enabled: update.enabled, + }; + to_json(config_rpc::load_and_apply_analytics_settings(patch).await?) + }) +} + +fn handle_get_analytics_settings(_params: Map) -> ControllerFuture { + Box::pin(async { + use crate::rpc::RpcOutcome; + let config = config_rpc::load_config_with_timeout().await?; + let result = serde_json::json!({ + "enabled": config.observability.analytics_enabled, + }); + to_json(RpcOutcome::new( + result, + vec!["analytics settings read".to_string()], + )) + }) +} + +fn handle_get_dashboard_settings(_params: Map) -> ControllerFuture { + Box::pin(async { to_json(config_rpc::get_dashboard_settings().await?) }) +} + +fn handle_update_meet_settings(params: Map) -> ControllerFuture { + Box::pin(async move { + log::debug!("[config][rpc] update_meet_settings enter"); + let update = match deserialize_params::(params) { + Ok(u) => u, + Err(err) => { + log::warn!("[config][rpc] update_meet_settings invalid params: {err}"); + return Err(err); + } + }; + log::debug!( + "[config][rpc] update_meet_settings patch auto_orchestrator_handoff={:?}", + update.auto_orchestrator_handoff + ); + let patch = config_rpc::MeetSettingsPatch { + auto_orchestrator_handoff: update.auto_orchestrator_handoff, + }; + match config_rpc::load_and_apply_meet_settings(patch).await { + Ok(outcome) => { + log::debug!("[config][rpc] update_meet_settings ok"); + to_json(outcome) + } + Err(err) => { + log::warn!("[config][rpc] update_meet_settings failed: {err}"); + Err(err) + } + } + }) +} + +fn handle_get_meet_settings(_params: Map) -> ControllerFuture { + Box::pin(async { + use crate::rpc::RpcOutcome; + log::debug!("[config][rpc] get_meet_settings enter"); + let config = match config_rpc::load_config_with_timeout().await { + Ok(c) => c, + Err(err) => { + log::warn!("[config][rpc] get_meet_settings load failed: {err}"); + return Err(err); + } + }; + let auto_orchestrator_handoff = config.meet.auto_orchestrator_handoff; + log::debug!( + "[config][rpc] get_meet_settings ok auto_orchestrator_handoff={auto_orchestrator_handoff}" + ); + let result = serde_json::json!({ + "auto_orchestrator_handoff": auto_orchestrator_handoff, + }); + to_json(RpcOutcome::new( + result, + vec!["meet settings read".to_string()], + )) + }) +} + +fn handle_agent_server_status(_params: Map) -> ControllerFuture { + Box::pin(async { to_json(config_rpc::agent_server_status()) }) +} + +fn handle_reset_local_data(_params: Map) -> ControllerFuture { + Box::pin(async { to_json(config_rpc::reset_local_data().await?) }) +} + +fn handle_get_data_paths(_params: Map) -> ControllerFuture { + Box::pin(async { + log::debug!("[config][rpc] get_data_paths enter"); + match config_rpc::get_data_paths().await { + Ok(outcome) => { + log::debug!("[config][rpc] get_data_paths ok"); + to_json(outcome) + } + Err(err) => { + log::warn!("[config][rpc] get_data_paths fail: {err}"); + Err(err) + } + } + }) +} + +pub(super) fn handle_get_agent_paths(_params: Map) -> ControllerFuture { + Box::pin(async { + log::debug!("[config][rpc] get_agent_paths enter"); + match config_rpc::get_agent_paths().await { + Ok(outcome) => { + log::debug!("[config][rpc] get_agent_paths ok"); + to_json(outcome) + } + Err(err) => { + log::warn!("[config][rpc] get_agent_paths fail: {err}"); + Err(err) + } + } + }) +} + +fn handle_update_agent_paths(params: Map) -> ControllerFuture { + Box::pin(async move { + log::debug!("[config][rpc] update_agent_paths enter"); + let update = match deserialize_params::(params) { + Ok(u) => u, + Err(err) => { + log::warn!("[config][rpc] update_agent_paths invalid params: {err}"); + return Err(err); + } + }; + let patch = config_rpc::AgentPathsPatch { + action_dir: update.action_dir, + }; + match config_rpc::load_and_apply_agent_paths_settings(patch).await { + Ok(outcome) => { + log::debug!("[config][rpc] update_agent_paths ok"); + to_json(outcome) + } + Err(err) => { + log::warn!("[config][rpc] update_agent_paths failed: {err}"); + Err(err) + } + } + }) +} + +fn handle_get_onboarding_completed(_params: Map) -> ControllerFuture { + Box::pin(async { to_json(config_rpc::get_onboarding_completed().await?) }) +} + +fn handle_get_dictation_settings(_params: Map) -> ControllerFuture { + Box::pin(async { to_json(config_rpc::get_dictation_settings().await?) }) +} + +fn handle_update_dictation_settings(params: Map) -> ControllerFuture { + Box::pin(async move { + let update = deserialize_params::(params)?; + let patch = config_rpc::DictationSettingsPatch { + enabled: update.enabled, + hotkey: update.hotkey, + activation_mode: update.activation_mode, + llm_refinement: update.llm_refinement, + streaming: update.streaming, + streaming_interval_ms: update.streaming_interval_ms, + }; + to_json(config_rpc::load_and_apply_dictation_settings(patch).await?) + }) +} + +fn handle_get_voice_server_settings(_params: Map) -> ControllerFuture { + Box::pin(async { to_json(config_rpc::get_voice_server_settings().await?) }) +} + +fn handle_update_voice_server_settings(params: Map) -> ControllerFuture { + Box::pin(async move { + let update = deserialize_params::(params)?; + let patch = config_rpc::VoiceServerSettingsPatch { + auto_start: update.auto_start, + hotkey: update.hotkey, + activation_mode: update.activation_mode, + skip_cleanup: update.skip_cleanup, + min_duration_secs: update.min_duration_secs, + silence_threshold: update.silence_threshold, + custom_dictionary: update.custom_dictionary, + always_on_enabled: update.always_on_enabled, + wake_word: update.wake_word, + }; + let result = config_rpc::load_and_apply_voice_server_settings(patch).await?; + // Apply the always-on toggle live (start/idle the capture loop) so the + // Settings switch takes effect without a restart. Don't fail the RPC if + // the reload hiccups, but DO surface it — otherwise the saved setting + // silently wouldn't apply until the next launch. + match config_rpc::load_config_with_timeout().await { + Ok(config) => { + log::debug!("[config][rpc] voice settings saved; applying live always-on state"); + crate::openhuman::voice::always_on::start_if_enabled(&config).await; + } + Err(error) => { + log::warn!( + "[config][rpc] voice settings saved, but live always-on apply was skipped \ + (config reload failed): {error}" + ); + } + } + to_json(result) + }) +} + +fn handle_set_onboarding_completed(params: Map) -> ControllerFuture { + Box::pin(async move { + let payload = deserialize_params::(params)?; + to_json(config_rpc::set_onboarding_completed(payload.value).await?) + }) +} + +fn handle_update_composio_trigger_settings(params: Map) -> ControllerFuture { + Box::pin(async move { + log::debug!("[config][rpc] update_composio_trigger_settings enter"); + let update = match deserialize_params::(params) { + Ok(u) => u, + Err(err) => { + log::warn!("[config][rpc] update_composio_trigger_settings invalid params: {err}"); + return Err(err); + } + }; + let patch = config_rpc::ComposioTriggerSettingsPatch { + triage_disabled: update.triage_disabled, + triage_disabled_toolkits: update.triage_disabled_toolkits, + }; + match config_rpc::load_and_apply_composio_trigger_settings(patch).await { + Ok(outcome) => { + log::debug!("[config][rpc] update_composio_trigger_settings ok"); + to_json(outcome) + } + Err(err) => { + log::warn!("[config][rpc] update_composio_trigger_settings failed: {err}"); + Err(err) + } + } + }) +} + +fn handle_get_composio_trigger_settings(_params: Map) -> ControllerFuture { + Box::pin(async { + log::debug!("[config][rpc] get_composio_trigger_settings enter"); + match config_rpc::get_composio_trigger_settings().await { + Ok(outcome) => { + log::debug!("[config][rpc] get_composio_trigger_settings ok"); + to_json(outcome) + } + Err(err) => { + log::warn!("[config][rpc] get_composio_trigger_settings failed: {err}"); + Err(err) + } + } + }) +} + +fn handle_update_search_settings(params: Map) -> ControllerFuture { + Box::pin(async move { + log::debug!("[config][rpc] update_search_settings enter"); + let update = match deserialize_params::(params) { + Ok(u) => u, + Err(err) => { + log::warn!("[config][rpc] update_search_settings invalid params: {err}"); + return Err(err); + } + }; + let patch = config_rpc::SearchSettingsPatch { + engine: update.engine, + max_results: update.max_results, + timeout_secs: update.timeout_secs, + parallel_api_key: update.parallel_api_key, + brave_api_key: update.brave_api_key, + querit_api_key: update.querit_api_key, + allowed_domains: update.allowed_domains, + allow_all: update.allow_all, + }; + match config_rpc::load_and_apply_search_settings(patch).await { + Ok(outcome) => { + log::debug!("[config][rpc] update_search_settings ok"); + to_json(outcome) + } + Err(err) => { + log::warn!("[config][rpc] update_search_settings failed: {err}"); + Err(err) + } + } + }) +} + +fn handle_get_search_settings(_params: Map) -> ControllerFuture { + Box::pin(async { + log::debug!("[config][rpc] get_search_settings enter"); + match config_rpc::get_search_settings().await { + Ok(outcome) => { + log::debug!("[config][rpc] get_search_settings ok"); + to_json(outcome) + } + Err(err) => { + log::warn!("[config][rpc] get_search_settings failed: {err}"); + Err(err) + } + } + }) +} + +fn handle_get_activity_level_settings(_params: Map) -> ControllerFuture { + Box::pin(async move { to_json(config_rpc::get_activity_level_settings().await?) }) +} + +fn handle_update_activity_level_settings(params: Map) -> ControllerFuture { + Box::pin(async move { + let update = deserialize_params::(params)?; + let patch = config_rpc::ActivityLevelSettingsPatch { + level: update.level, + }; + to_json(config_rpc::load_and_apply_activity_level_settings(patch).await?) + }) +} + +fn handle_get_memory_sync_settings(_params: Map) -> ControllerFuture { + Box::pin(async move { to_json(config_rpc::get_memory_sync_settings().await?) }) +} + +fn handle_update_memory_sync_settings(params: Map) -> ControllerFuture { + Box::pin(async move { + let update = deserialize_params::(params)?; + let patch = config_rpc::MemorySyncSettingsPatch { + sync_interval_secs: update.sync_interval_secs, + }; + to_json(config_rpc::load_and_apply_memory_sync_settings(patch).await?) + }) +} + +fn handle_get_sandbox_settings(_params: Map) -> ControllerFuture { + Box::pin(async move { to_json(config_rpc::get_sandbox_settings().await?) }) +} + +fn handle_update_sandbox_settings(params: Map) -> ControllerFuture { + Box::pin(async move { + let update = deserialize_params::(params)?; + let patch = config_rpc::SandboxSettingsPatch { + backend: update.backend, + enabled: update.enabled, + docker_image: update.docker_image, + docker_memory_limit_mb: update.docker_memory_limit_mb, + docker_cpu_limit: update.docker_cpu_limit, + env_passthrough: update.env_passthrough, + }; + to_json(config_rpc::load_and_apply_sandbox_settings(patch).await?) + }) +} diff --git a/src/openhuman/config/schemas/helpers.rs b/src/openhuman/config/schemas/helpers.rs new file mode 100644 index 0000000000..f8914c2300 --- /dev/null +++ b/src/openhuman/config/schemas/helpers.rs @@ -0,0 +1,320 @@ +use serde::de::{DeserializeOwned, Deserializer}; +use serde::Deserialize; +use serde_json::{Map, Value}; + +use crate::core::{FieldSchema, TypeSchema}; +use crate::rpc::RpcOutcome; + +pub(super) const DEFAULT_ONBOARDING_FLAG_NAME: &str = ".skip_onboarding"; + +#[derive(Debug, Deserialize)] +pub(super) struct ModelRouteUpdate { + pub(super) hint: String, + pub(super) model: String, +} + +#[derive(Debug, Deserialize)] +pub(super) struct CloudProviderUpdate { + /// Opaque stable id. Empty / missing → server generates a new id. + pub(super) id: Option, + /// Routing slug, e.g. "openai", "my-deepseek". Must be unique per config. + pub(super) slug: String, + /// Human-readable label. + #[serde(default)] + pub(super) label: Option, + pub(super) endpoint: String, + /// Auth style: "bearer" | "anthropic" | "openhuman_jwt" | "none". + #[serde(default)] + pub(super) auth_style: Option, + /// Legacy field — tolerated on read for back-compat but not required. + #[serde(rename = "type", default)] + pub(super) legacy_type: Option, + /// Legacy field — tolerated on read. + #[serde(default)] + pub(super) default_model: Option, +} + +#[derive(Debug, Deserialize)] +pub(super) struct ModelSettingsUpdate { + /// OpenHuman product backend URL. Used for auth, billing, voice, and + /// every non-inference HTTP call. Almost always left blank so it + /// defaults to the canonical hosted backend. + pub(super) api_url: Option, + /// Custom OpenAI-compatible LLM endpoint. When set together with + /// `api_key`, inference talks directly to this URL instead of routing + /// through the OpenHuman backend. Send an empty string to clear. + pub(super) inference_url: Option, + /// Optional API key for OpenAI-compatible backends. Stored verbatim in + /// `config.toml` on the user's machine — see #1342 (local-first / pluggable + /// backends). The key is never echoed back over RPC; `get_client_config` + /// only reports `api_key_set: bool`. + pub(super) api_key: Option, + pub(super) default_model: Option, + pub(super) default_temperature: Option, + /// When present, REPLACES `config.model_routes` wholesale with these + /// `(hint, model)` pairs. Send `Some([])` to clear all routes (used when + /// the user switches back to the OpenHuman backend whose built-in router + /// picks per-task models on its own). Omit to leave existing routes + /// untouched. + pub(super) model_routes: Option>, + /// When present, REPLACES `config.cloud_providers` wholesale. The keys + /// themselves live in `auth-profiles.json` via + /// `cloud_provider_set_key` — they are NOT carried here. + pub(super) cloud_providers: Option>, + pub(super) primary_cloud: Option, + pub(super) chat_provider: Option, + pub(super) reasoning_provider: Option, + pub(super) agentic_provider: Option, + pub(super) coding_provider: Option, + pub(super) memory_provider: Option, + pub(super) embeddings_provider: Option, + pub(super) heartbeat_provider: Option, + pub(super) learning_provider: Option, + pub(super) subconscious_provider: Option, +} + +#[derive(Debug, Deserialize)] +pub(super) struct MemorySettingsUpdate { + pub(super) backend: Option, + pub(super) auto_save: Option, + pub(super) embedding_provider: Option, + pub(super) embedding_model: Option, + pub(super) embedding_dimensions: Option, + /// One of `"minimal" | "balanced" | "extended" | "maximum"`. + pub(super) memory_window: Option, +} + +#[derive(Debug, Deserialize)] +pub(super) struct RuntimeSettingsUpdate { + pub(super) kind: Option, + pub(super) reasoning_enabled: Option, +} + +#[derive(Debug, Deserialize)] +pub(super) struct BrowserSettingsUpdate { + pub(super) enabled: Option, +} + +#[derive(Debug, Deserialize)] +pub(super) struct ScreenIntelligenceSettingsUpdate { + pub(super) enabled: Option, + pub(super) capture_policy: Option, + pub(super) policy_mode: Option, + pub(super) baseline_fps: Option, + pub(super) vision_enabled: Option, + pub(super) autocomplete_enabled: Option, + pub(super) use_vision_model: Option, + pub(super) keep_screenshots: Option, + pub(super) allowlist: Option>, + pub(super) denylist: Option>, +} + +#[derive(Debug, Deserialize)] +pub(super) struct AnalyticsSettingsUpdate { + pub(super) enabled: Option, +} + +#[derive(Debug, Deserialize)] +pub(super) struct MeetSettingsUpdate { + pub(super) auto_orchestrator_handoff: Option, +} + +#[derive(Debug, Deserialize)] +pub(super) struct SearchSettingsUpdate { + pub(super) engine: Option, + pub(super) max_results: Option, + pub(super) timeout_secs: Option, + pub(super) parallel_api_key: Option, + pub(super) brave_api_key: Option, + pub(super) querit_api_key: Option, + pub(super) allowed_domains: Option>, + pub(super) allow_all: Option, +} + +#[derive(Debug, Deserialize)] +pub(super) struct LocalAiSettingsUpdate { + pub(super) runtime_enabled: Option, + /// MVP opt-in marker. Tied to `runtime_enabled` from the unified AI + /// panel toggle (both flip on enable, both flip off on disable) so + /// the user gets local AI working with a single click instead of + /// having to also apply a tier preset. + pub(super) opt_in_confirmed: Option, + pub(super) provider: Option, + #[serde(default, deserialize_with = "deserialize_present_json")] + pub(super) base_url: Option, + pub(super) model_id: Option, + pub(super) chat_model_id: Option, + pub(super) usage_embeddings: Option, + pub(super) usage_heartbeat: Option, + pub(super) usage_learning_reflection: Option, + pub(super) usage_subconscious: Option, +} + +#[derive(Debug, Deserialize)] +pub(super) struct SetBrowserAllowAllParams { + pub(super) enabled: bool, +} + +#[derive(Debug, Deserialize)] +pub(super) struct WorkspaceOnboardingFlagParams { + pub(super) flag_name: Option, +} + +#[derive(Debug, Deserialize)] +pub(super) struct WorkspaceOnboardingFlagSetParams { + pub(super) flag_name: Option, + pub(super) value: bool, +} + +#[derive(Debug, Deserialize)] +pub(super) struct OnboardingCompletedSetParams { + pub(super) value: bool, +} + +#[derive(Debug, Deserialize)] +pub(super) struct DictationSettingsUpdate { + pub(super) enabled: Option, + pub(super) hotkey: Option, + pub(super) activation_mode: Option, + pub(super) llm_refinement: Option, + pub(super) streaming: Option, + pub(super) streaming_interval_ms: Option, +} + +#[derive(Debug, Deserialize)] +pub(super) struct VoiceServerSettingsUpdate { + pub(super) auto_start: Option, + pub(super) hotkey: Option, + pub(super) activation_mode: Option, + pub(super) skip_cleanup: Option, + pub(super) min_duration_secs: Option, + pub(super) silence_threshold: Option, + pub(super) custom_dictionary: Option>, + pub(super) always_on_enabled: Option, + pub(super) wake_word: Option, +} + +#[derive(Debug, Deserialize)] +pub(super) struct ComposioTriggerSettingsUpdate { + pub(super) triage_disabled: Option, + pub(super) triage_disabled_toolkits: Option>, +} + +#[derive(Debug, Deserialize)] +pub(super) struct AutonomySettingsUpdate { + /// `"readonly" | "supervised" | "full"` (case-insensitive). + pub(super) level: Option, + pub(super) workspace_only: Option, + /// Replaces the shell command allow-list wholesale. + pub(super) allowed_commands: Option>, + /// Replaces the forbidden-paths denylist wholesale. + pub(super) forbidden_paths: Option>, + /// Replaces the trusted-roots allow-list wholesale. Each entry is + /// `{ "path": "/abs/dir", "access": "read" | "readwrite" }`. + pub(super) trusted_roots: Option>, + pub(super) allow_tool_install: Option, + // Accept u64 to match the published schema (`TypeSchema::U64`); clamped to the + // internal u32 at apply time. u32::MAX/hr is already effectively unlimited. + pub(super) max_actions_per_hour: Option, + /// Replaces the "Always allow" allowlist wholesale — tool names the agent + /// may run without an approval prompt. Empty list clears it. + pub(super) auto_approve: Option>, + pub(super) require_task_plan_approval: Option, +} + +#[derive(Debug, Deserialize)] +pub(super) struct AgentSettingsUpdate { + /// Tool/action wall-clock timeout in seconds (1–3600). Validated server-side. + pub(super) agent_timeout_secs: Option, +} + +#[derive(Debug, Deserialize)] +pub(super) struct AgentPathsUpdate { + /// New absolute action sandbox path. Empty string clears the override; + /// omitted leaves it unchanged. Validated server-side. + pub(super) action_dir: Option, +} + +#[derive(Debug, Deserialize)] +pub(super) struct ActivityLevelSettingsUpdate { + /// "off" | "minimal" | "moderate" | "active" | "always_on" (or "0"-"4"). + pub(super) level: Option, +} + +#[derive(Debug, Deserialize)] +pub(super) struct MemorySyncSettingsUpdate { + pub(super) sync_interval_secs: Option, +} + +#[derive(Debug, Deserialize)] +pub(super) struct SandboxSettingsUpdate { + pub(super) backend: Option, + pub(super) enabled: Option, + pub(super) docker_image: Option, + pub(super) docker_memory_limit_mb: Option, + pub(super) docker_cpu_limit: Option, + pub(super) env_passthrough: Option>, +} + +pub(super) fn deserialize_params( + params: Map, +) -> Result { + serde_json::from_value(Value::Object(params)).map_err(|e| format!("invalid params: {e}")) +} + +pub(super) fn deserialize_present_json<'de, D>(deserializer: D) -> Result, D::Error> +where + D: Deserializer<'de>, +{ + Value::deserialize(deserializer).map(Some) +} + +pub fn optional_string(name: &'static str, comment: &'static str) -> FieldSchema { + FieldSchema { + name, + ty: TypeSchema::Option(Box::new(TypeSchema::String)), + comment, + required: false, + } +} + +pub fn optional_json(name: &'static str, comment: &'static str) -> FieldSchema { + FieldSchema { + name, + ty: TypeSchema::Option(Box::new(TypeSchema::Json)), + comment, + required: false, + } +} + +#[allow(dead_code)] +pub fn required_string(name: &'static str, comment: &'static str) -> FieldSchema { + FieldSchema { + name, + ty: TypeSchema::String, + comment, + required: true, + } +} + +pub fn optional_bool(name: &'static str, comment: &'static str) -> FieldSchema { + FieldSchema { + name, + ty: TypeSchema::Option(Box::new(TypeSchema::Bool)), + comment, + required: false, + } +} + +pub fn json_output(name: &'static str, comment: &'static str) -> FieldSchema { + FieldSchema { + name, + ty: TypeSchema::Json, + comment, + required: true, + } +} + +pub(super) fn to_json(outcome: RpcOutcome) -> Result { + outcome.into_cli_compatible_json() +} diff --git a/src/openhuman/config/schemas/mod.rs b/src/openhuman/config/schemas/mod.rs new file mode 100644 index 0000000000..f44ff11f46 --- /dev/null +++ b/src/openhuman/config/schemas/mod.rs @@ -0,0 +1,35 @@ +mod controllers; +mod helpers; +mod schema_defs; + +pub use controllers::{all_controller_schemas, all_registered_controllers}; +pub use schema_defs::schemas; + +// Re-export items that schemas_tests.rs accesses via `use super::*`. +// The test module is `schemas::tests` so `super::` resolves to `schemas`. +#[cfg(test)] +use crate::core::TypeSchema; +#[cfg(test)] +use crate::rpc::RpcOutcome; +#[cfg(test)] +use controllers::{ + handle_get_agent_paths, handle_get_autonomy_settings, handle_update_autonomy_settings, +}; +#[cfg(test)] +use helpers::{ + deserialize_params, json_output, optional_bool, optional_json, optional_string, + required_string, to_json, ActivityLevelSettingsUpdate, AgentPathsUpdate, AgentSettingsUpdate, + AnalyticsSettingsUpdate, AutonomySettingsUpdate, BrowserSettingsUpdate, + ComposioTriggerSettingsUpdate, DictationSettingsUpdate, LocalAiSettingsUpdate, + MeetSettingsUpdate, MemorySettingsUpdate, MemorySyncSettingsUpdate, ModelSettingsUpdate, + OnboardingCompletedSetParams, RuntimeSettingsUpdate, SandboxSettingsUpdate, + ScreenIntelligenceSettingsUpdate, SearchSettingsUpdate, SetBrowserAllowAllParams, + VoiceServerSettingsUpdate, WorkspaceOnboardingFlagParams, WorkspaceOnboardingFlagSetParams, + DEFAULT_ONBOARDING_FLAG_NAME, +}; +#[cfg(test)] +use serde_json::{Map, Value}; + +#[cfg(test)] +#[path = "../schemas_tests.rs"] +mod tests; diff --git a/src/openhuman/config/schemas/schema_defs.rs b/src/openhuman/config/schemas/schema_defs.rs new file mode 100644 index 0000000000..b1157bbb06 --- /dev/null +++ b/src/openhuman/config/schemas/schema_defs.rs @@ -0,0 +1,782 @@ +use crate::core::{ControllerSchema, FieldSchema, TypeSchema}; + +use super::helpers::{json_output, optional_bool, optional_json, optional_string}; + +pub fn schemas(function: &str) -> ControllerSchema { + match function { + "get_config" => ControllerSchema { + namespace: "config", + function: "get", + description: "Read persisted config snapshot and resolved paths.", + inputs: vec![], + outputs: vec![FieldSchema { + name: "snapshot", + ty: TypeSchema::Json, + comment: "Config snapshot with workspace and config paths.", + required: true, + }], + }, + "get_client_config" => ControllerSchema { + namespace: "config", + function: "get_client_config", + description: "Read safe client-facing config fields (api_url, feature flags). No secrets.", + inputs: vec![], + outputs: vec![ + FieldSchema { + name: "api_url", + ty: TypeSchema::Option(Box::new(TypeSchema::String)), + comment: "Configured OpenHuman product backend URL, if any.", + required: false, + }, + FieldSchema { + name: "inference_url", + ty: TypeSchema::Option(Box::new(TypeSchema::String)), + comment: "Custom OpenAI-compatible LLM endpoint, if any. When set together with an api_key, inference goes direct to this URL.", + required: false, + }, + FieldSchema { + name: "default_model", + ty: TypeSchema::Option(Box::new(TypeSchema::String)), + comment: "Default model identifier.", + required: false, + }, + FieldSchema { + name: "app_version", + ty: TypeSchema::String, + comment: "OpenHuman core version.", + required: true, + }, + FieldSchema { + name: "api_key_set", + ty: TypeSchema::Bool, + comment: "True when a custom backend api_key is stored locally. The key itself is never returned over RPC.", + required: true, + }, + FieldSchema { + name: "model_routes", + ty: TypeSchema::Json, + comment: "Persisted task-hint -> model id pairs the core router will obey. Empty when the OpenHuman built-in router is active.", + required: true, + }, + ], + }, + "update_model_settings" => ControllerSchema { + namespace: "config", + function: "update_model_settings", + description: "Update model and backend connection settings, including a custom OpenAI-compatible backend (api_url + api_key).", + inputs: vec![ + optional_string("api_url", "OpenHuman product backend URL (auth/billing/voice). Almost always left blank; the inference URL is a separate `inference_url` field."), + optional_string("inference_url", "Custom OpenAI-compatible LLM endpoint. When set together with `api_key`, inference goes direct to this URL instead of the OpenHuman backend. Pass an empty string to clear."), + optional_string("api_key", "Optional API key for the configured inference endpoint. Pass an empty string to clear a previously stored key."), + optional_string("default_model", "Default model id."), + FieldSchema { + name: "default_temperature", + ty: TypeSchema::Option(Box::new(TypeSchema::F64)), + comment: "Default model temperature.", + required: false, + }, + FieldSchema { + name: "model_routes", + ty: TypeSchema::Option(Box::new(TypeSchema::Json)), + comment: "Optional list of {hint, model} pairs mapping task hints (reasoning, agentic, coding, summarization) to provider-specific model ids. Replaces config.model_routes wholesale; send [] to clear (e.g. when switching back to the OpenHuman built-in router).", + required: false, + }, + FieldSchema { + name: "cloud_providers", + ty: TypeSchema::Option(Box::new(TypeSchema::Json)), + comment: "Optional list of cloud provider entries {id, slug, label, endpoint, auth_style}. API keys are stored separately via cloud_provider_set_key. Replaces config.cloud_providers wholesale.", + required: false, + }, + optional_string("primary_cloud", "id of the cloud_providers entry used when a workload routes to 'cloud'. Empty string clears."), + optional_string("chat_provider", "Provider string for direct conversational chat workloads."), + optional_string("reasoning_provider", "Provider string for the main reasoning workload (e.g. 'cloud', 'ollama:llama3.1:8b', 'openai:gpt-4o')."), + optional_string("agentic_provider", "Provider string for sub-agent / tool-loop workloads."), + optional_string("coding_provider", "Provider string for code-generation workloads."), + optional_string("memory_provider", "Provider string for memory-tree extract + summarise."), + optional_string("embeddings_provider", "Provider string for embedding generation."), + optional_string("heartbeat_provider", "Provider string for the heartbeat background-reasoning loop."), + optional_string("learning_provider", "Provider string for learning / reflection passes."), + optional_string("subconscious_provider", "Provider string for subconscious evaluation."), + ], + outputs: vec![json_output("snapshot", "Updated config snapshot.")], + }, + "update_memory_settings" => ControllerSchema { + namespace: "config", + function: "update_memory_settings", + description: "Update memory backend and embedding settings.", + inputs: vec![ + optional_string("backend", "Memory backend identifier."), + FieldSchema { + name: "auto_save", + ty: TypeSchema::Option(Box::new(TypeSchema::Bool)), + comment: "Enable auto-save.", + required: false, + }, + optional_string("embedding_provider", "Embedding provider identifier."), + optional_string("embedding_model", "Embedding model identifier."), + FieldSchema { + name: "embedding_dimensions", + ty: TypeSchema::Option(Box::new(TypeSchema::U64)), + comment: "Embedding dimensions.", + required: false, + }, + optional_string( + "memory_window", + "Stepped long-term memory window preset: minimal | balanced | extended | maximum.", + ), + ], + outputs: vec![json_output("snapshot", "Updated config snapshot.")], + }, + "update_screen_intelligence_settings" => ControllerSchema { + namespace: "config", + function: "update_screen_intelligence_settings", + description: "Update screen intelligence runtime settings.", + inputs: vec![ + optional_bool("enabled", "Enable screen intelligence."), + optional_string("capture_policy", "Capture policy mode."), + optional_string("policy_mode", "Policy mode override."), + FieldSchema { + name: "baseline_fps", + ty: TypeSchema::Option(Box::new(TypeSchema::F64)), + comment: "Baseline capture FPS.", + required: false, + }, + optional_bool("vision_enabled", "Enable vision analysis."), + optional_bool("autocomplete_enabled", "Enable autocomplete integration."), + optional_bool( + "use_vision_model", + "Use a vision LLM for screenshot analysis (false = OCR + text LLM).", + ), + optional_bool("keep_screenshots", "Keep screenshots on disk after vision processing."), + FieldSchema { + name: "allowlist", + ty: TypeSchema::Option(Box::new(TypeSchema::Array(Box::new( + TypeSchema::String, + )))), + comment: "Allowed app list.", + required: false, + }, + FieldSchema { + name: "denylist", + ty: TypeSchema::Option(Box::new(TypeSchema::Array(Box::new( + TypeSchema::String, + )))), + comment: "Denied app list.", + required: false, + }, + ], + outputs: vec![json_output("snapshot", "Updated config snapshot.")], + }, + "update_runtime_settings" => ControllerSchema { + namespace: "config", + function: "update_runtime_settings", + description: "Update runtime execution strategy settings.", + inputs: vec![ + optional_string("kind", "Runtime kind."), + optional_bool("reasoning_enabled", "Enable reasoning mode."), + ], + outputs: vec![json_output("snapshot", "Updated config snapshot.")], + }, + "get_autonomy_settings" => ControllerSchema { + namespace: "config", + function: "get_autonomy_settings", + description: "Get the agent access-mode settings (autonomy level, workspace confinement, trusted roots, command allow-list, forbidden paths).", + inputs: vec![], + outputs: vec![json_output("autonomy", "Current [autonomy] config block.")], + }, + "update_autonomy_settings" => ControllerSchema { + namespace: "config", + function: "update_autonomy_settings", + description: "Update the agent access mode: autonomy level, workspace confinement, trusted-roots allow-list, command allow-list, forbidden paths, and OS-install permission. Applies live to active sessions.", + inputs: vec![ + optional_string("level", "Autonomy level: readonly | supervised | full."), + optional_bool("workspace_only", "Confine file/path access to the workspace directory."), + FieldSchema { + name: "allowed_commands", + ty: TypeSchema::Option(Box::new(TypeSchema::Array(Box::new(TypeSchema::String)))), + comment: "Replace the shell command allow-list (array of base command names).", + required: false, + }, + FieldSchema { + name: "forbidden_paths", + ty: TypeSchema::Option(Box::new(TypeSchema::Array(Box::new(TypeSchema::String)))), + comment: "Replace the forbidden-paths denylist (array of path prefixes).", + required: false, + }, + FieldSchema { + name: "trusted_roots", + ty: TypeSchema::Option(Box::new(TypeSchema::Json)), + comment: "Replace the trusted-roots allow-list: array of {path, access: read|readwrite}. Grants access outside the workspace; credential dirs (~/.ssh, ~/.gnupg, ~/.aws) stay blocked regardless.", + required: false, + }, + optional_bool("allow_tool_install", "Allow the agent to install OS packages via install_tool (intended for Full mode)."), + FieldSchema { + name: "max_actions_per_hour", + ty: TypeSchema::Option(Box::new(TypeSchema::U64)), + comment: "Rate limit for side-effecting actions per hour.", + required: false, + }, + FieldSchema { + name: "auto_approve", + ty: TypeSchema::Option(Box::new(TypeSchema::Array(Box::new(TypeSchema::String)))), + comment: "Replace the \"Always allow\" allowlist (array of tool names the agent runs without an approval prompt). Empty array clears it.", + required: false, + }, + optional_bool("require_task_plan_approval", "Require approval before an agent executes a task-board plan."), + ], + outputs: vec![json_output("snapshot", "Updated config snapshot.")], + }, + "get_agent_settings" => ControllerSchema { + namespace: "config", + function: "get_agent_settings", + description: "Read agent execution settings: the action/tool wall-clock timeout, the runtime-effective value, and whether the OPENHUMAN_TOOL_TIMEOUT_SECS env var overrides it.", + inputs: vec![], + outputs: vec![json_output( + "settings", + "Agent settings: agent_timeout_secs, effective_timeout_secs, env_override, min_timeout_secs, max_timeout_secs.", + )], + }, + "update_agent_settings" => ControllerSchema { + namespace: "config", + function: "update_agent_settings", + description: "Update agent execution settings. Currently the action/tool wall-clock timeout (seconds). Applies to the next tool call without a restart; the OPENHUMAN_TOOL_TIMEOUT_SECS env var still overrides it when set.", + inputs: vec![FieldSchema { + name: "agent_timeout_secs", + ty: TypeSchema::Option(Box::new(TypeSchema::U64)), + comment: "Wall-clock timeout for a single tool/action execution, in seconds (1–3600). Extend this when large local models are interrupted before finishing.", + required: false, + }], + outputs: vec![json_output("snapshot", "Updated config snapshot.")], + }, + "update_browser_settings" => ControllerSchema { + namespace: "config", + function: "update_browser_settings", + description: "Update browser automation settings.", + inputs: vec![optional_bool("enabled", "Enable browser integration.")], + outputs: vec![json_output("snapshot", "Updated config snapshot.")], + }, + "update_local_ai_settings" => ControllerSchema { + namespace: "config", + function: "update_local_ai_settings", + description: + "Update the local AI runtime master switch and per-feature usage flags.", + inputs: vec![ + optional_bool( + "runtime_enabled", + "Master switch — when false, no subsystem uses the selected local AI runtime.", + ), + optional_bool( + "opt_in_confirmed", + "MVP opt-in marker. Bootstrap hard-overrides to disabled when this is false, \ + regardless of `runtime_enabled`. Set in tandem with `runtime_enabled` from the \ + unified AI panel.", + ), + optional_string( + "provider", + "Local provider identifier. Supported values: ollama, lm_studio.", + ), + optional_json( + "base_url", + "Provider base URL string, or null to clear. For LM Studio this defaults to http://localhost:1234/v1.", + ), + optional_string("model_id", "Default local chat model identifier."), + optional_string("chat_model_id", "Local chat model identifier."), + optional_bool( + "usage_embeddings", + "Use the local model for embedding generation (when runtime_enabled).", + ), + optional_bool( + "usage_heartbeat", + "Use the local model inside the heartbeat loop (when runtime_enabled).", + ), + optional_bool( + "usage_learning_reflection", + "Use the local model for learning/reflection passes (when runtime_enabled).", + ), + optional_bool( + "usage_subconscious", + "Use the local model for subconscious evaluation (when runtime_enabled).", + ), + ], + outputs: vec![json_output("snapshot", "Updated config snapshot.")], + }, + "resolve_api_url" => ControllerSchema { + namespace: "config", + function: "resolve_api_url", + description: "Resolve effective API base URL using config/env/default from core.", + inputs: vec![], + outputs: vec![FieldSchema { + name: "api_url", + ty: TypeSchema::String, + comment: "Resolved backend API URL.", + required: true, + }], + }, + "get_runtime_flags" => ControllerSchema { + namespace: "config", + function: "get_runtime_flags", + description: "Read environment-driven runtime flags.", + inputs: vec![], + outputs: vec![FieldSchema { + name: "flags", + ty: TypeSchema::Ref("RuntimeFlagsOut"), + comment: "Runtime flag state.", + required: true, + }], + }, + "set_browser_allow_all" => ControllerSchema { + namespace: "config", + function: "set_browser_allow_all", + description: "Disable browser allow-all mode, or enable it only when operator opt-in is present.", + inputs: vec![FieldSchema { + name: "enabled", + ty: TypeSchema::Bool, + comment: "Whether to enable browser allow-all mode. Runtime enable is refused unless OPENHUMAN_BROWSER_ALLOW_ALL_RPC_ENABLE=1.", + required: true, + }], + outputs: vec![FieldSchema { + name: "flags", + ty: TypeSchema::Ref("RuntimeFlagsOut"), + comment: "Updated runtime flag state.", + required: true, + }], + }, + "workspace_onboarding_flag_exists" => ControllerSchema { + namespace: "config", + function: "workspace_onboarding_flag_exists", + description: "Check if onboarding flag file exists in workspace.", + inputs: vec![FieldSchema { + name: "flag_name", + ty: TypeSchema::Option(Box::new(TypeSchema::String)), + comment: "Optional onboarding flag name override.", + required: false, + }], + outputs: vec![FieldSchema { + name: "exists", + ty: TypeSchema::Bool, + comment: "True when the flag file is present.", + required: true, + }], + }, + "workspace_onboarding_flag_set" => ControllerSchema { + namespace: "config", + function: "workspace_onboarding_flag_set", + description: "Create or remove the onboarding flag file in workspace.", + inputs: vec![ + FieldSchema { + name: "flag_name", + ty: TypeSchema::Option(Box::new(TypeSchema::String)), + comment: "Optional onboarding flag name override.", + required: false, + }, + FieldSchema { + name: "value", + ty: TypeSchema::Bool, + comment: "True to create, false to remove.", + required: true, + }, + ], + outputs: vec![FieldSchema { + name: "exists", + ty: TypeSchema::Bool, + comment: "True when the flag file is present after the operation.", + required: true, + }], + }, + "update_analytics_settings" => ControllerSchema { + namespace: "config", + function: "update_analytics_settings", + description: "Enable or disable anonymized analytics and error reporting.", + inputs: vec![optional_bool( + "enabled", + "Enable anonymized analytics and crash reports.", + )], + outputs: vec![json_output("snapshot", "Updated config snapshot.")], + }, + "get_analytics_settings" => ControllerSchema { + namespace: "config", + function: "get_analytics_settings", + description: "Read current analytics settings.", + inputs: vec![], + outputs: vec![FieldSchema { + name: "enabled", + ty: TypeSchema::Bool, + comment: "Whether anonymized analytics is enabled.", + required: true, + }], + }, + "get_dashboard_settings" => ControllerSchema { + namespace: "config", + function: "get_dashboard_settings", + description: "Read dashboard settings, including the local architecture diagram viewer.", + inputs: vec![], + outputs: vec![FieldSchema { + name: "dashboard", + ty: TypeSchema::Json, + comment: "Current [dashboard] config block.", + required: true, + }], + }, + "update_meet_settings" => ControllerSchema { + namespace: "config", + function: "update_meet_settings", + description: + "Update Google Meet integration settings (currently the auto-orchestrator-handoff privacy gate).", + inputs: vec![optional_bool( + "auto_orchestrator_handoff", + "When true, ending a Meet call hands the transcript to the orchestrator for proactive follow-up actions.", + )], + outputs: vec![json_output("snapshot", "Updated config snapshot.")], + }, + "get_meet_settings" => ControllerSchema { + namespace: "config", + function: "get_meet_settings", + description: "Read current Google Meet integration settings.", + inputs: vec![], + outputs: vec![FieldSchema { + name: "auto_orchestrator_handoff", + ty: TypeSchema::Bool, + comment: "Whether the orchestrator handoff fires on Meet call end.", + required: true, + }], + }, + "update_search_settings" => ControllerSchema { + namespace: "config", + function: "update_search_settings", + description: "Update search engine selection and BYO API credentials.", + inputs: vec![ + optional_string( + "engine", + "Active engine: managed | parallel | brave | querit.", + ), + FieldSchema { + name: "max_results", + ty: TypeSchema::Option(Box::new(TypeSchema::U64)), + comment: "Maximum results per query (1-20).", + required: false, + }, + FieldSchema { + name: "timeout_secs", + ty: TypeSchema::Option(Box::new(TypeSchema::U64)), + comment: "Per-request timeout in seconds (1-120).", + required: false, + }, + optional_string( + "parallel_api_key", + "Parallel API key (empty string clears the stored key).", + ), + optional_string( + "brave_api_key", + "Brave Search API key (empty string clears the stored key).", + ), + optional_string( + "querit_api_key", + "Querit API key (empty string clears the stored key).", + ), + FieldSchema { + name: "allowed_domains", + ty: TypeSchema::Option(Box::new(TypeSchema::Array(Box::new( + TypeSchema::String, + )))), + comment: "Websites the assistant may open/read (web_fetch/curl). Exact hosts match their subdomains; \"*\" allows all public sites; empty blocks all web access.", + required: false, + }, + FieldSchema { + name: "allow_all", + ty: TypeSchema::Option(Box::new(TypeSchema::Bool)), + comment: "\"Allow all sites\" toggle. true sets the allowlist to [\"*\"]; false drops the wildcard, keeping explicit hosts.", + required: false, + }, + ], + outputs: vec![json_output("snapshot", "Updated config snapshot.")], + }, + "get_search_settings" => ControllerSchema { + namespace: "config", + function: "get_search_settings", + description: + "Read search engine settings. API keys are surfaced as presence booleans only.", + inputs: vec![], + outputs: vec![json_output( + "settings", + "Engine, effective engine, limits, and per-provider configuration flags.", + )], + }, + "get_activity_level_settings" => ControllerSchema { + namespace: "config", + function: "get_activity_level_settings", + description: "Get the agent activity level (0–4) and its derived settings: sync cadence, heartbeat/subconscious toggles, token budget, estimated monthly cost.", + inputs: vec![], + outputs: vec![json_output("settings", "Activity level settings with cost estimates.")], + }, + "update_activity_level_settings" => ControllerSchema { + namespace: "config", + function: "update_activity_level_settings", + description: "Set the agent activity level. Immediately updates the scheduler gate mode and persists the change.", + inputs: vec![optional_string("level", "Activity level: off | minimal | moderate | active | always_on (or 0–4).")], + outputs: vec![json_output("settings", "Updated activity level settings with cost estimates.")], + }, + "get_memory_sync_settings" => ControllerSchema { + namespace: "config", + function: "get_memory_sync_settings", + description: "Get the global memory-sync cadence applied to all opted-in sources: stored value, resolved selected cadence, manual/default flags, the 24h default, and the preset options (4h/12h/24h).", + inputs: vec![], + outputs: vec![json_output("settings", "Memory sync schedule settings.")], + }, + "update_memory_sync_settings" => ControllerSchema { + namespace: "config", + function: "update_memory_sync_settings", + description: "Set the global memory-sync cadence. Omit/null resets to the default; 0 means Manual only (auto-sync disabled); a positive value is seconds between syncs. Takes effect on the next scheduler tick.", + inputs: vec![FieldSchema { + name: "sync_interval_secs", + ty: TypeSchema::Option(Box::new(TypeSchema::U64)), + comment: "Seconds between auto-syncs. null = default (24h); 0 = Manual only; n>0 = sync every n seconds.", + required: false, + }], + outputs: vec![json_output("settings", "Updated memory sync schedule settings.")], + }, + "get_sandbox_settings" => ControllerSchema { + namespace: "config", + function: "get_sandbox_settings", + description: "Get sandbox execution backend settings: selected backend, Docker image/limits, env passthrough, Docker availability, and detected OS backend.", + inputs: vec![], + outputs: vec![json_output("settings", "Sandbox settings with status.")], + }, + "update_sandbox_settings" => ControllerSchema { + namespace: "config", + function: "update_sandbox_settings", + description: "Update sandbox execution backend settings: backend selection, Docker image, memory/CPU limits, and env passthrough. Applies to new agent sessions.", + inputs: vec![ + optional_string("backend", "Sandbox backend: auto | landlock | firejail | bubblewrap | docker | none."), + optional_bool("enabled", "Enable or disable sandbox execution."), + optional_string("docker_image", "Docker image for sandboxed execution (e.g. alpine:3.20)."), + FieldSchema { + name: "docker_memory_limit_mb", + ty: TypeSchema::Option(Box::new(TypeSchema::U64)), + comment: "Docker container memory limit in MB.", + required: false, + }, + FieldSchema { + name: "docker_cpu_limit", + ty: TypeSchema::Option(Box::new(TypeSchema::F64)), + comment: "Docker container CPU limit (e.g. 1.0 = one core).", + required: false, + }, + FieldSchema { + name: "env_passthrough", + ty: TypeSchema::Option(Box::new(TypeSchema::Array(Box::new(TypeSchema::String)))), + comment: "Environment variables to pass through into the sandbox.", + required: false, + }, + ], + outputs: vec![json_output("snapshot", "Updated config snapshot.")], + }, + "agent_server_status" => ControllerSchema { + namespace: "config", + function: "agent_server_status", + description: "Return agent server runtime URL and status.", + inputs: vec![], + outputs: vec![json_output("status", "Agent server status payload.")], + }, + "reset_local_data" => ControllerSchema { + namespace: "config", + function: "reset_local_data", + description: + "Delete local OpenHuman data for the active config/workspace so the next restart boots clean.", + inputs: vec![], + outputs: vec![json_output("result", "Reset result with removed paths.")], + }, + "get_data_paths" => ControllerSchema { + namespace: "config", + function: "get_data_paths", + description: + "Resolve the OpenHuman data directories (current workspace, default ~/.openhuman, active workspace marker) that reset_local_data would remove. Read-only — performs no filesystem changes.", + inputs: vec![], + outputs: vec![json_output( + "paths", + "Resolved data paths: current_openhuman_dir, default_openhuman_dir, active_workspace_marker_path.", + )], + }, + "get_agent_paths" => ControllerSchema { + namespace: "config", + function: "get_agent_paths", + description: + "Resolve the agent's filesystem roots (action_dir, workspace_dir, projects_dir) so the UI can render live values instead of hard-coded strings. Read-only. Also returns `action_dir_env_override: bool` so the UI knows when OPENHUMAN_ACTION_DIR is forcing the value (Settings → action_dir editing disabled in that case).", + inputs: vec![], + outputs: vec![json_output( + "paths", + "Resolved agent paths: action_dir (acting-tool CWD), workspace_dir (internal state, agent-blocked), projects_dir (default projects home), action_dir_source (env | override | default).", + )], + }, + "update_agent_paths" => ControllerSchema { + namespace: "config", + function: "update_agent_paths", + description: + "Update the agent's editable filesystem roots. Currently only action_dir (the acting-tool sandbox). The path must be absolute; a missing directory is auto-created; it cannot equal the internal workspace_dir. An empty string clears the override and reverts to the default. Applies to new sessions immediately (live policy hot-swap), no restart. OPENHUMAN_ACTION_DIR still overrides at runtime when set.", + inputs: vec![FieldSchema { + name: "action_dir", + ty: TypeSchema::Option(Box::new(TypeSchema::String)), + comment: "New absolute action sandbox path. Empty string clears the override (revert to default). Omit to leave unchanged.", + required: false, + }], + outputs: vec![json_output( + "paths", + "Updated agent paths (same shape as get_agent_paths): action_dir, workspace_dir, projects_dir, action_dir_source.", + )], + }, + "get_onboarding_completed" => ControllerSchema { + namespace: "config", + function: "get_onboarding_completed", + description: "Read whether the user has completed the onboarding flow.", + inputs: vec![], + outputs: vec![FieldSchema { + name: "completed", + ty: TypeSchema::Bool, + comment: "True when onboarding has been completed.", + required: true, + }], + }, + "get_dictation_settings" => ControllerSchema { + namespace: "config", + function: "get_dictation_settings", + description: "Read current voice dictation settings.", + inputs: vec![], + outputs: vec![json_output("settings", "Dictation settings payload.")], + }, + "update_dictation_settings" => ControllerSchema { + namespace: "config", + function: "update_dictation_settings", + description: "Update voice dictation settings.", + inputs: vec![ + optional_bool("enabled", "Enable voice dictation."), + optional_string("hotkey", "Global hotkey string (e.g. Fn)."), + optional_string("activation_mode", "Activation mode: toggle or push."), + optional_bool("llm_refinement", "Enable LLM post-processing of transcription."), + optional_bool("streaming", "Enable WebSocket streaming transcription."), + FieldSchema { + name: "streaming_interval_ms", + ty: TypeSchema::Option(Box::new(TypeSchema::U64)), + comment: "Interval between streaming inference passes (ms).", + required: false, + }, + ], + outputs: vec![json_output("snapshot", "Updated config snapshot.")], + }, + "get_voice_server_settings" => ControllerSchema { + namespace: "config", + function: "get_voice_server_settings", + description: "Read current voice server settings.", + inputs: vec![], + outputs: vec![json_output("settings", "Voice server settings payload.")], + }, + "update_voice_server_settings" => ControllerSchema { + namespace: "config", + function: "update_voice_server_settings", + description: "Update voice server settings.", + inputs: vec![ + optional_bool("auto_start", "Start the voice server automatically with the core."), + optional_string("hotkey", "Voice server hotkey string (e.g. Fn)."), + optional_string("activation_mode", "Activation mode: tap or push."), + optional_bool("skip_cleanup", "Skip LLM cleanup and keep dictation verbatim."), + FieldSchema { + name: "min_duration_secs", + ty: TypeSchema::Option(Box::new(TypeSchema::F64)), + comment: "Minimum recording duration in seconds.", + required: false, + }, + FieldSchema { + name: "silence_threshold", + ty: TypeSchema::Option(Box::new(TypeSchema::F64)), + comment: "RMS energy threshold for silence detection.", + required: false, + }, + FieldSchema { + name: "custom_dictionary", + ty: TypeSchema::Option(Box::new(TypeSchema::Json)), + comment: "Custom vocabulary words to bias whisper toward.", + required: false, + }, + optional_bool( + "always_on_enabled", + "Continuous always-on listening (no hotkey). Opt-in.", + ), + optional_string( + "wake_word", + "Always-on wake word; utterances must contain it (default 'Hey Tiny').", + ), + ], + outputs: vec![json_output("snapshot", "Updated config snapshot.")], + }, + "set_onboarding_completed" => ControllerSchema { + namespace: "config", + function: "set_onboarding_completed", + description: "Mark the onboarding flow as completed or reset it.", + inputs: vec![FieldSchema { + name: "value", + ty: TypeSchema::Bool, + comment: "True to mark completed, false to reset.", + required: true, + }], + outputs: vec![FieldSchema { + name: "completed", + ty: TypeSchema::Bool, + comment: "Updated onboarding completed state.", + required: true, + }], + }, + "update_composio_trigger_settings" => ControllerSchema { + namespace: "config", + function: "update_composio_trigger_settings", + description: + "Update Composio trigger-triage settings. When triage is disabled the \ + local LLM is NOT invoked per trigger — events are still archived to \ + trigger history.", + inputs: vec![ + optional_bool( + "triage_disabled", + "When true, skip the LLM triage turn for all Composio triggers globally.", + ), + FieldSchema { + name: "triage_disabled_toolkits", + ty: TypeSchema::Option(Box::new(TypeSchema::Array(Box::new( + TypeSchema::String, + )))), + comment: "Toolkit slugs that skip LLM triage (e.g. [\"gmail\", \"slack\"]).", + required: false, + }, + ], + outputs: vec![json_output("snapshot", "Updated config snapshot.")], + }, + "get_composio_trigger_settings" => ControllerSchema { + namespace: "config", + function: "get_composio_trigger_settings", + description: "Read current Composio trigger-triage settings.", + inputs: vec![], + outputs: vec![ + FieldSchema { + name: "triage_disabled", + ty: TypeSchema::Bool, + comment: "Whether the global triage-disabled flag is set.", + required: true, + }, + FieldSchema { + name: "triage_disabled_toolkits", + ty: TypeSchema::Array(Box::new(TypeSchema::String)), + comment: "Toolkit slugs that skip LLM triage.", + required: true, + }, + ], + }, + _ => ControllerSchema { + namespace: "config", + function: "unknown", + description: "Unknown config controller function.", + inputs: vec![], + outputs: vec![FieldSchema { + name: "error", + ty: TypeSchema::String, + comment: "Lookup error details.", + required: true, + }], + }, + } +} diff --git a/src/openhuman/inference/local/service/ollama_admin.rs b/src/openhuman/inference/local/service/ollama_admin.rs deleted file mode 100644 index bf40cb2e92..0000000000 --- a/src/openhuman/inference/local/service/ollama_admin.rs +++ /dev/null @@ -1,1586 +0,0 @@ -use std::path::{Path, PathBuf}; - -use futures_util::StreamExt; - -use crate::openhuman::config::Config; -use crate::openhuman::inference::local::install::{ - find_system_ollama_binary, run_ollama_install_script, -}; -use crate::openhuman::inference::local::lm_studio::lm_studio_base_url; -use crate::openhuman::inference::local::model_requirements::{ - evaluate_context, ContextEligibility, MIN_CONTEXT_TOKENS, -}; -use crate::openhuman::inference::local::ollama::{ - ollama_base_url, ollama_base_url_from_config, validate_ollama_url, OllamaModelShow, - OllamaModelTag, OllamaPullEvent, OllamaPullProgress, OllamaPullRequest, OllamaShowRequest, - OllamaShowResponse, OllamaTagsResponse, -}; -use crate::openhuman::inference::local::process_util::apply_no_window; -use crate::openhuman::inference::local::provider::{provider_from_config, LocalAiProvider}; -use crate::openhuman::inference::model_ids; -use crate::openhuman::inference::paths::{find_workspace_ollama_binary, workspace_ollama_binary}; -use crate::openhuman::inference::presets::{self, VisionMode}; - -use super::spawn_marker::{self, OllamaSpawnMarker}; -use super::LocalAiService; - -fn lm_studio_models_error_means_unreachable(error: &str) -> bool { - error.starts_with("lm studio models request failed:") -} - -impl LocalAiService { - pub(in crate::openhuman::inference::local::service) async fn ensure_ollama_server( - &self, - config: &Config, - ) -> Result<(), String> { - let base_url = ollama_base_url_from_config(config); - if self.ollama_healthy_at(&base_url).await { - if self.ollama_runner_ok_at(&base_url).await { - return Ok(()); - } - log::warn!("[local_ai] Ollama server responds but runner is broken"); - return Err( - "Configured Ollama runtime is reachable but cannot execute models. Restart the external runtime and retry." - .to_string(), - ); - } - Err(format!( - "OpenHuman no longer starts or installs Ollama automatically. Start your inference runtime yourself and make sure it is reachable at {base_url}." - )) - } - - /// Alias of `ensure_ollama_server` in external-runtime mode. - /// OpenHuman no longer installs or starts Ollama automatically; the - /// "fresh" retry path is a no-op that defers to the standard check. - pub(in crate::openhuman::inference::local::service) async fn ensure_ollama_server_fresh( - &self, - config: &Config, - ) -> Result<(), String> { - self.ensure_ollama_server(config).await - } - - /// Check if a healthy daemon on `:11434` is actually openhuman's own - /// orphan from a prior session (i.e. we crashed before the graceful - /// shutdown hook fired). If so, kill it so the upcoming spawn can - /// resume owned-child tracking. External daemons are never touched. - async fn reclaim_orphan_if_ours(&self, config: &Config) { - let Some(marker) = spawn_marker::read_marker(config) else { - return; - }; - if !spawn_marker::pid_is_alive(marker.pid) { - log::debug!( - "[local_ai] stale ollama spawn marker (pid={} no longer alive); clearing", - marker.pid - ); - spawn_marker::clear_marker(config); - return; - } - let base_url = ollama_base_url_from_config(config); - if !self.ollama_healthy_at(&base_url).await { - // PID is alive but :11434 isn't healthy — either Ollama is - // mid-boot or the recorded PID was reused for an unrelated - // process. Leave the marker; either the daemon will come up - // and the next call will reclaim it, or `start_and_wait_for_server` - // will overwrite it on a fresh spawn. - log::debug!( - "[local_ai] ollama spawn marker pid={} alive but :11434 not healthy yet; \ - deferring reclaim", - marker.pid - ); - return; - } - log::info!( - "[local_ai] reclaiming openhuman-owned ollama orphan from prior session \ - (pid={}, binary={})", - marker.pid, - marker.binary_path - ); - kill_pid_by_id(marker.pid); - spawn_marker::clear_marker(config); - // Brief settle so the listener releases :11434 before we respawn. - tokio::time::sleep(std::time::Duration::from_millis(500)).await; - } - - async fn start_and_wait_for_server( - &self, - config: &Config, - ollama_cmd: &Path, - ) -> Result<(), String> { - let base_url = ollama_base_url_from_config(config); - if self.ollama_healthy_at(&base_url).await { - // A daemon is already up — adopt it. We did NOT spawn it (or any - // prior spawn was already reclaimed in `reclaim_orphan_if_ours`), - // so `owned_ollama` stays `None` and the daemon survives openhuman - // exit. This is the contract: external/adopted daemons are never - // killed; only our own children die with us. - return Ok(()); - } - - // Defensive: if a previous spawn attempt left a stale `Child` in - // `owned_ollama` (e.g. ensure_ollama_server_fresh after a failed - // first pass), clear it before respawning. Without this, the new - // child would replace the field and the old one would be leaked. - self.kill_ollama_server().await; - spawn_marker::clear_marker(config); - - let mut version_cmd = tokio::process::Command::new(ollama_cmd); - version_cmd - .arg("--version") - .stdout(std::process::Stdio::null()) - .stderr(std::process::Stdio::null()); - apply_no_window(&mut version_cmd); - if let Err(err) = version_cmd.status().await { - return Err(format!( - "Ollama binary not available ({}; error: {err}).", - ollama_cmd.display() - )); - } - - let mut serve_cmd = tokio::process::Command::new(ollama_cmd); - serve_cmd - .arg("serve") - .stdout(std::process::Stdio::null()) - // Pipe stderr so we can detect specific failure modes — most - // importantly Windows Controlled Folder Access blocks, which - // surface as "Access is denied" / "operation was blocked" / - // 0x80070005 in Ollama's own stderr when CFA refuses writes - // to the model cache or even prevents the binary from running. - .stderr(std::process::Stdio::piped()); - apply_no_window(&mut serve_cmd); - let mut serve_child = match serve_cmd.spawn() { - Ok(child) => { - log::debug!( - "[local_ai] spawned `ollama serve` from {}", - ollama_cmd.display() - ); - child - } - Err(err) => { - log::warn!( - "[local_ai] failed to spawn `ollama serve` from {}: {err}", - ollama_cmd.display() - ); - return Err(format!( - "Failed to start Ollama server ({}): {err}", - ollama_cmd.display() - )); - } - }; - - // Drain stderr into a bounded buffer in the background. We keep - // the last ~16KB so we can quote it back to the user / Sentry on - // failure but don't grow unbounded if Ollama logs heavily. - let stderr_buffer = std::sync::Arc::new(parking_lot::Mutex::new(String::new())); - if let Some(stderr) = serve_child.stderr.take() { - let buf = std::sync::Arc::clone(&stderr_buffer); - tokio::spawn(async move { - use tokio::io::{AsyncBufReadExt, BufReader}; - let mut reader = BufReader::new(stderr); - let mut line = String::new(); - while reader - .read_line(&mut line) - .await - .map(|n| n > 0) - .unwrap_or(false) - { - let mut b = buf.lock(); - let new_len = b.len() + line.len(); - if new_len > 16 * 1024 { - let drop_n = new_len - 16 * 1024; - let drop_n = std::cmp::min(drop_n, b.len()); - b.drain(0..drop_n); - } - b.push_str(&line); - line.clear(); - } - }); - } - - for _ in 0..20 { - if self.ollama_healthy_at(&base_url).await { - // Daemon is up. Take ownership so we can kill it on exit and - // write the spawn marker so a crashed openhuman can reclaim - // this PID on next launch instead of orphaning it forever. - let pid = serve_child.id().unwrap_or(0); - if pid == 0 { - log::warn!( - "[local_ai] spawned ollama child has no PID — owned-child kill \ - will be a no-op but daemon is healthy, continuing" - ); - } else { - let marker = OllamaSpawnMarker::new(pid, ollama_cmd); - if let Err(e) = spawn_marker::write_marker(config, &marker) { - // Marker write failure is non-fatal — graceful shutdown - // still kills via the in-memory `Child` handle. Only - // crash-recovery on next launch is degraded. - log::warn!( - "[local_ai] failed to write ollama spawn marker (pid={pid}): {e}" - ); - } - } - *self.owned_ollama.lock() = Some(serve_child); - return Ok(()); - } - tokio::time::sleep(std::time::Duration::from_millis(300)).await; - } - - // Health probe timed out. The serve child is unhealthy and may be - // holding the Ollama port — kill it before returning so the next - // bootstrap attempt isn't blocked by a zombie listener. - if let Err(err) = serve_child.kill().await { - log::warn!("[local_ai] failed to kill unhealthy `ollama serve` child: {err}"); - } - - // Classify the failure from captured stderr. - let stderr_snapshot = stderr_buffer.lock().clone(); - let lowered = stderr_snapshot.to_ascii_lowercase(); - // Match only explicit Controlled Folder Access markers. Generic - // strings like "access is denied" or "is not recognized as a trusted" - // appear in many unrelated Windows errors and previously caused us - // to surface a misleading CFA remediation message. - let cfa_signatures = ["controlled folder access", "operation was blocked"]; - let cfa_hit = cfa_signatures.iter().any(|sig| lowered.contains(sig)); - if cfa_hit { - log::warn!( - "[local_ai] Ollama failed to start — Controlled Folder Access blocked it. \ - stderr tail: {stderr_snapshot}" - ); - self.status.lock().error_detail = Some(stderr_snapshot); - return Err(format!( - "Ollama was blocked by Windows Controlled Folder Access. \ - Open Windows Security → Ransomware protection → Allow an app \ - through Controlled folder access, and add `{}`.", - ollama_cmd.display() - )); - } - // Non-CFA timeout — surface the stderr tail anyway for diagnosis. - if !stderr_snapshot.is_empty() { - log::warn!("[local_ai] Ollama not reachable. stderr tail: {stderr_snapshot}"); - self.status.lock().error_detail = Some(stderr_snapshot); - } - Err("Ollama runtime is not reachable after fresh install. Start `ollama serve` manually and retry.".to_string()) - } - - async fn resolve_or_install_ollama_binary(&self, config: &Config) -> Result { - // 1. Check user-configured ollama_binary_path from Settings. - if let Some(ref custom_path) = config.local_ai.ollama_binary_path { - let path = PathBuf::from(custom_path); - if path.is_file() { - log::debug!( - "[local_ai] using configured ollama_binary_path: {}", - path.display() - ); - return Ok(path); - } - log::warn!( - "[local_ai] configured ollama_binary_path does not exist: {}, falling through", - path.display() - ); - } - - // 2. OLLAMA_BIN env var. - if let Some(from_env) = std::env::var("OLLAMA_BIN") - .ok() - .filter(|v| !v.trim().is_empty()) - { - let path = PathBuf::from(from_env); - if path.exists() { - return Ok(path); - } - } - - if let Some(workspace_bin) = find_workspace_ollama_binary(config) { - if self.command_works(&workspace_bin).await { - log::debug!( - "[local_ai] using workspace-managed ollama binary: {}", - workspace_bin.display() - ); - return Ok(workspace_bin); - } - log::warn!( - "[local_ai] workspace-managed ollama binary is present but not executable, reinstalling: {}", - workspace_bin.display() - ); - } - - if self.command_works(Path::new("ollama")).await { - return Ok(PathBuf::from("ollama")); - } - - self.download_and_install_ollama(config).await?; - if let Some(installed) = find_workspace_ollama_binary(config) { - Ok(installed) - } else if let Some(system_bin) = find_system_ollama_binary() { - log::debug!( - "[local_ai] workspace binary not found after install, using system binary: {}", - system_bin.display() - ); - Ok(system_bin) - } else { - Err("Ollama download completed but executable is missing. \ - The installer may have placed it in an unexpected location. \ - Set OLLAMA_BIN or configure the path in Settings > Local Model." - .to_string()) - } - } - - async fn command_works(&self, command: &Path) -> bool { - let mut cmd = tokio::process::Command::new(command); - cmd.arg("--version") - .stdout(std::process::Stdio::null()) - .stderr(std::process::Stdio::null()); - apply_no_window(&mut cmd); - cmd.status().await.map(|s| s.success()).unwrap_or(false) - } - - async fn download_and_install_ollama(&self, config: &Config) -> Result<(), String> { - let install_dir = crate::openhuman::inference::paths::workspace_ollama_dir(config); - tokio::fs::create_dir_all(&install_dir) - .await - .map_err(|e| format!("failed to create Ollama install directory: {e}"))?; - - // Crash-resume guard: Inno Setup's installer is spawned via - // PowerShell's `Start-Process`, which creates a top-level process. - // It outlives OpenHuman crashing, the user closing the app, or - // the bootstrap task being cancelled. If a prior launch left an - // OllamaSetup.exe running, wait for it instead of starting a - // second one — two concurrent installers race on the same dir - // and corrupt the install. - if crate::openhuman::inference::local::install::is_ollama_installer_running() { - log::info!( - "[local_ai] detected in-flight OllamaSetup.exe — \ - waiting for it to finish before deciding whether to install" - ); - { - let mut status = self.status.lock(); - status.state = "installing".to_string(); - status.warning = Some("Resuming Ollama install from a previous launch".to_string()); - status.error_detail = None; - status.error_category = None; - } - // Bounded wait: a stuck OllamaSetup.exe (e.g. Inno Setup dialog - // waiting on user input) must not block app startup forever. Five - // minutes covers a slow download + UAC prompt; past that we mark - // the install as failed-but-recoverable and let the caller decide. - let wait_start = std::time::Instant::now(); - const INSTALLER_WAIT_TIMEOUT: std::time::Duration = - std::time::Duration::from_secs(5 * 60); - let mut timed_out = false; - while crate::openhuman::inference::local::install::is_ollama_installer_running() { - if wait_start.elapsed() >= INSTALLER_WAIT_TIMEOUT { - timed_out = true; - break; - } - tokio::time::sleep(std::time::Duration::from_secs(2)).await; - } - if timed_out { - log::warn!( - "[local_ai] OllamaSetup.exe still running after {}s — giving up the wait", - INSTALLER_WAIT_TIMEOUT.as_secs() - ); - let mut status = self.status.lock(); - status.state = "install_failed".to_string(); - status.warning = None; - status.error_category = Some("install_stuck".to_string()); - status.error_detail = Some(format!( - "Previous OllamaSetup.exe install was still running after {}s. \ - Cancel the installer (System tray / Task Manager) and retry.", - INSTALLER_WAIT_TIMEOUT.as_secs() - )); - return Err("Previous Ollama installer is stuck. Cancel it and retry.".to_string()); - } - // The prior installer is gone. If it succeeded, our regular - // discovery paths will find the binary and we can short-circuit - // the install entirely. If it failed, fall through and run a - // fresh install below. - if find_workspace_ollama_binary(config).is_some() - || find_system_ollama_binary().is_some() - { - log::info!("[local_ai] resumed prior install completed successfully"); - return Ok(()); - } - log::warn!( - "[local_ai] prior installer exited but binary not found — running fresh install" - ); - } - - { - let mut status = self.status.lock(); - status.state = "installing".to_string(); - status.warning = Some("Installing Ollama runtime (first run)".to_string()); - status.download_progress = None; - status.downloaded_bytes = None; - status.total_bytes = None; - status.download_speed_bps = None; - status.eta_seconds = None; - status.error_detail = None; - status.error_category = None; - } - - let result = run_ollama_install_script(&install_dir).await?; - if !result.exit_status.success() { - let stderr_tail: String = result - .stderr - .lines() - .rev() - .take(20) - .collect::>() - .into_iter() - .rev() - .collect::>() - .join("\n"); - log::warn!( - "[local_ai] Ollama install script failed (exit={})\nstdout: {}\nstderr: {}", - result.exit_status, - result.stdout, - result.stderr, - ); - { - let mut status = self.status.lock(); - status.error_detail = Some(if stderr_tail.is_empty() { - result - .stdout - .lines() - .rev() - .take(20) - .collect::>() - .into_iter() - .rev() - .collect::>() - .join("\n") - } else { - stderr_tail - }); - status.error_category = Some("install".to_string()); - } - return Err(format!( - "Ollama install script failed (exit code {}). \ - Install Ollama manually from https://ollama.com or set its path in Settings > Local Model.", - result.exit_status.code().unwrap_or(-1) - )); - } - - log::debug!( - "[local_ai] Ollama install script succeeded, stdout: {}", - result.stdout.chars().take(500).collect::(), - ); - - let installed = find_workspace_ollama_binary(config) - .or_else(find_system_ollama_binary) - .ok_or_else(|| "Ollama installer finished but binary was not found".to_string())?; - log::debug!( - "[local_ai] Ollama install finished with binary at {}", - installed.display() - ); - - { - let mut status = self.status.lock(); - status.warning = Some("Ollama runtime installed".to_string()); - status.download_progress = Some(1.0); - } - Ok(()) - } - - /// Check Ollama health against the given base URL. - pub(in crate::openhuman::inference::local::service) async fn ollama_healthy_at( - &self, - base_url: &str, - ) -> bool { - tracing::debug!( - target: "local_ai::ollama_admin", - %base_url, - "[local_ai:ollama_admin] ollama_healthy_at: checking" - ); - self.http - .get(format!("{base_url}/api/tags")) - .timeout(std::time::Duration::from_secs(2)) - .send() - .await - .map(|r| r.status().is_success()) - .unwrap_or(false) - } - - /// Backward-compat wrapper — resolves the URL from env vars only (no config). - /// Prefer [`ollama_healthy_at`] when a `Config` is available. - pub(in crate::openhuman::inference::local::service) async fn ollama_healthy(&self) -> bool { - self.ollama_healthy_at(&ollama_base_url()).await - } - - /// Filesystem-only precondition: is *any* Ollama binary discoverable? - /// - /// This is the cheapest possible check — no process spawns, no HTTP, no - /// timeouts. Callers that need to decide whether it's even worth talking - /// to `/api/tags` should consult this first. Returning `false` here means - /// the UI should drive the user to install Ollama instead of polling for - /// model state that can never appear. - pub(in crate::openhuman::inference::local::service) fn ollama_binary_present( - &self, - config: &Config, - ) -> bool { - if let Some(ref custom) = config.local_ai.ollama_binary_path { - if PathBuf::from(custom).is_file() { - return true; - } - } - if let Some(env_path) = std::env::var("OLLAMA_BIN") - .ok() - .filter(|v| !v.trim().is_empty()) - { - if PathBuf::from(env_path).is_file() { - return true; - } - } - if find_workspace_ollama_binary(config).is_some() { - return true; - } - find_system_ollama_binary().is_some() - } - - pub(in crate::openhuman::inference::local::service) async fn ensure_models_available( - &self, - config: &Config, - ) -> Result<(), String> { - let chat_model = model_ids::effective_chat_model_id(config); - self.ensure_ollama_model_available(config, &chat_model, "chat") - .await?; - - match presets::vision_mode_for_config(&config.local_ai) { - VisionMode::Disabled => { - self.status.lock().vision_state = "disabled".to_string(); - } - VisionMode::Ondemand => { - self.status.lock().vision_state = "idle".to_string(); - } - VisionMode::Bundled => { - let vision_model = model_ids::effective_vision_model_id(config); - self.ensure_ollama_model_available(config, &vision_model, "vision") - .await?; - self.status.lock().vision_state = "ready".to_string(); - } - } - - let embedding_model = model_ids::effective_embedding_model_id(config); - if config.local_ai.preload_embedding_model { - self.ensure_ollama_model_available(config, &embedding_model, "embedding") - .await?; - self.status.lock().embedding_state = "ready".to_string(); - } - - if config.local_ai.preload_stt_model { - self.ensure_stt_asset_available(config).await?; - } - - if config.local_ai.preload_tts_voice { - self.ensure_tts_asset_available(config).await?; - } - - Ok(()) - } - - pub(in crate::openhuman::inference::local::service) async fn ensure_ollama_model_available( - &self, - config: &Config, - model_id: &str, - label: &str, - ) -> Result<(), String> { - let base_url = ollama_base_url_from_config(config); - if self.has_model_at(&base_url, model_id).await? { - return Ok(()); - } - - { - let mut status = self.status.lock(); - status.state = "downloading".to_string(); - status.warning = Some(format!( - "Pulling {} model `{}` from Ollama library", - label, model_id - )); - match label { - "vision" => status.vision_state = "downloading".to_string(), - "embedding" => status.embedding_state = "downloading".to_string(), - _ => {} - } - status.download_progress = Some(0.0); - status.downloaded_bytes = Some(0); - status.total_bytes = None; - status.download_speed_bps = Some(0); - status.eta_seconds = None; - } - - const MAX_PULL_RETRIES: usize = 3; - const PULL_RETRY_BACKOFF_MS: u64 = 1_500; - const PULL_INTERRUPT_SETTLE_SECS: u64 = 20; - let mut last_error: Option = None; - - for attempt in 1..=MAX_PULL_RETRIES { - if attempt > 1 { - let retry_msg = format!( - "Ollama pull stream interrupted. Retrying {}/{}...", - attempt, MAX_PULL_RETRIES - ); - { - let mut status = self.status.lock(); - status.state = "downloading".to_string(); - status.warning = Some(retry_msg.clone()); - } - log::warn!( - "[local_ai] pull retry {}/{} for model `{}` after interruption", - attempt, - MAX_PULL_RETRIES, - model_id - ); - tokio::time::sleep(std::time::Duration::from_millis( - PULL_RETRY_BACKOFF_MS * attempt as u64, - )) - .await; - } - - let response = match self - .http - .post(format!("{base_url}/api/pull")) - .json(&OllamaPullRequest { - name: model_id.to_string(), - stream: true, - }) - // Model pulls are long-running streaming responses; the default 30s - // client timeout can interrupt healthy downloads mid-stream. - .timeout(std::time::Duration::from_secs(30 * 60)) - .send() - .await - { - Ok(response) => response, - Err(e) => { - let err = format!("ollama pull request failed: {e}"); - last_error = Some(err.clone()); - if attempt < MAX_PULL_RETRIES { - continue; - } - return Err(format!("{err} after {MAX_PULL_RETRIES} attempts")); - } - }; - if !response.status().is_success() { - let status = response.status(); - let body = response.text().await.unwrap_or_default(); - let detail = body.trim(); - return Err(format!( - "ollama pull failed with status {}{}", - status, - if detail.is_empty() { - String::new() - } else { - format!(": {detail}") - } - )); - } - - let mut stream = response.bytes_stream(); - let mut pending = String::new(); - let mut stream_error: Option = None; - let started_at = std::time::Instant::now(); - let mut progress = OllamaPullProgress::default(); - let mut observed_bytes = false; - while let Some(item) = stream.next().await { - let chunk = match item { - Ok(value) => value, - Err(e) => { - stream_error = Some(format!("ollama pull stream error: {e}")); - break; - } - }; - pending.push_str(&String::from_utf8_lossy(&chunk)); - while let Some(pos) = pending.find('\n') { - let line = pending[..pos].trim().to_string(); - pending = pending[pos + 1..].to_string(); - if line.is_empty() { - continue; - } - let event: OllamaPullEvent = match serde_json::from_str(&line) { - Ok(v) => v, - Err(_) => continue, - }; - if let Some(err) = event.error { - return Err(format!("ollama pull error: {err}")); - } - - progress.observe(&event); - let completed = progress.aggregate_downloaded(); - let total = progress.aggregate_total(); - let elapsed = started_at.elapsed().as_secs_f64().max(0.001); - let speed_bps = (completed as f64 / elapsed).round().max(0.0) as u64; - let eta_seconds = total.and_then(|t| { - if completed >= t || speed_bps == 0 { - None - } else { - Some((t.saturating_sub(completed)) / speed_bps.max(1)) - } - }); - observed_bytes |= completed > 0; - - let mut status = self.status.lock(); - if let Some(status_text) = event.status.as_deref() { - status.warning = Some(format!("Ollama pull: {status_text}")); - if status_text.eq_ignore_ascii_case("success") { - status.download_progress = Some(1.0); - } - } - status.downloaded_bytes = Some(completed); - status.total_bytes = total; - status.download_speed_bps = Some(speed_bps); - status.eta_seconds = eta_seconds; - status.download_progress = total - .map(|t| (completed as f32 / t as f32).clamp(0.0, 1.0)) - .or(Some(0.0)); - } - } - - if let Some(err) = stream_error { - last_error = Some(err.clone()); - let resumed = self - .wait_for_model_after_pull_interruption( - &base_url, - model_id, - attempt, - MAX_PULL_RETRIES, - observed_bytes, - PULL_INTERRUPT_SETTLE_SECS, - ) - .await?; - if resumed { - break; - } - if attempt < MAX_PULL_RETRIES { - continue; - } - return Err(format!("{err} after {MAX_PULL_RETRIES} attempts")); - } - - if self.has_model_at(&base_url, model_id).await? { - break; - } - - last_error = Some(format!( - "ollama pull finished but model `{}` was not found", - model_id - )); - let resumed = self - .wait_for_model_after_pull_interruption( - &base_url, - model_id, - attempt, - MAX_PULL_RETRIES, - observed_bytes, - PULL_INTERRUPT_SETTLE_SECS, - ) - .await?; - if resumed { - break; - } - if attempt < MAX_PULL_RETRIES { - continue; - } - } - - if !self.has_model_at(&base_url, model_id).await? { - return Err(last_error.unwrap_or_else(|| { - format!( - "ollama pull finished but model `{}` was not found", - model_id - ) - })); - } - - match label { - "vision" => self.status.lock().vision_state = "ready".to_string(), - "embedding" => self.status.lock().embedding_state = "ready".to_string(), - _ => {} - } - - Ok(()) - } - - async fn wait_for_model_after_pull_interruption( - &self, - base_url: &str, - model_id: &str, - attempt: usize, - max_attempts: usize, - observed_bytes: bool, - settle_window_secs: u64, - ) -> Result { - let wait_secs = interrupted_pull_settle_window_secs(observed_bytes, settle_window_secs); - if wait_secs == 0 { - return Ok(false); - } - - { - let mut status = self.status.lock(); - status.state = "downloading".to_string(); - status.warning = Some(format!( - "Ollama pull stream disconnected. Waiting up to {wait_secs}s for ongoing download to resume before retry {}/{}.", - attempt + 1, - max_attempts - )); - } - log::warn!( - "[local_ai] pull stream interrupted for model `{}`; waiting up to {}s before retry {}/{}", - model_id, - wait_secs, - attempt + 1, - max_attempts - ); - - let deadline = std::time::Instant::now() + std::time::Duration::from_secs(wait_secs); - while std::time::Instant::now() < deadline { - if self.has_model_at(base_url, model_id).await? { - log::info!( - "[local_ai] model `{}` became available after interrupted pull stream", - model_id - ); - return Ok(true); - } - tokio::time::sleep(std::time::Duration::from_secs(1)).await; - } - - Ok(false) - } - - /// Run full diagnostics: check Ollama server health, list installed models, - /// and verify expected models are present. Returns a JSON-serializable report. - pub async fn diagnostics(&self, config: &Config) -> Result { - if provider_from_config(config) == LocalAiProvider::LmStudio { - return self.lm_studio_diagnostics(config).await; - } - - let base_url = ollama_base_url_from_config(config); - let healthy = self.ollama_healthy_at(&base_url).await; - let runner_ok = if healthy { - self.ollama_runner_ok_at(&base_url).await - } else { - false - }; - - log::debug!( - "[local_ai] diagnostics: entry base_url={} healthy={}", - base_url, - healthy - ); - - let (models, tags_error) = if healthy { - match self.list_models_at(&base_url).await { - Ok(models) => (models, None), - Err(e) => (vec![], Some(e)), - } - } else { - (vec![], None) - }; - - let expected_chat = model_ids::effective_chat_model_id(config); - let expected_embedding = model_ids::effective_embedding_model_id(config); - let expected_vision = model_ids::effective_vision_model_id(config); - - let model_names: Vec = models.iter().map(|m| m.name.to_ascii_lowercase()).collect(); - let has = |target: &str| -> bool { - let t = target.to_ascii_lowercase(); - model_names - .iter() - .any(|n| *n == t || n.starts_with(&(t.clone() + ":"))) - }; - - let chat_found = has(&expected_chat); - let embedding_found = has(&expected_embedding); - let vision_found = has(&expected_vision); - - // Per-model native context window (vs the memory-layer minimum) and - // chat-capability. `/api/show` is one bounded round-trip per installed - // model, fetched concurrently and only on this diagnostics path; the - // single call yields both signals. - let model_shows: Vec = if healthy { - futures_util::future::join_all( - models - .iter() - .map(|m| self.fetch_model_show_at(&base_url, &m.name)), - ) - .await - } else { - Vec::new() - }; - let model_eligibilities: Vec = model_shows - .iter() - .map(|s| evaluate_context(s.context_length)) - .collect(); - - let installed_models: Vec = models - .iter() - .enumerate() - .map(|(i, m)| { - let eligibility = model_eligibilities.get(i).cloned(); - let context_length = match eligibility.as_ref() { - Some(ContextEligibility::Ok { context_length }) - | Some(ContextEligibility::BelowMinimum { context_length, .. }) => { - Some(*context_length) - } - _ => None, - }; - // `chat_capable: false` → embedding-only model the chat picker - // must hide; `null`/`true` → keep visible (fail-open). - // TAURI-RUST-4P6. - let chat_capable = model_shows.get(i).and_then(|s| s.chat_capable); - serde_json::json!({ - "name": m.name, - "size": m.size, - "modified_at": m.modified_at, - "context_length": context_length, - "eligibility": eligibility, - "chat_capable": chat_capable, - }) - }) - .collect(); - - // Resolve the eligibility of an expected (active) model by tag prefix. - let eligibility_for = |target: &str| -> Option { - let t = target.to_ascii_lowercase(); - models - .iter() - .zip(model_eligibilities.iter()) - .find(|(m, _)| { - let n = m.name.to_ascii_lowercase(); - n == t || n.starts_with(&(t.clone() + ":")) - }) - .map(|(_, e)| e.clone()) - }; - let chat_eligibility = eligibility_for(&expected_chat); - let embedding_eligibility = eligibility_for(&expected_embedding); - - let binary_path = self.resolve_binary_path(config); - - let mut issues: Vec = Vec::new(); - let repair_actions: Vec = Vec::new(); - - if !healthy { - issues.push(format!( - "Ollama server is not running or not reachable at {}", - base_url - )); - } - if healthy && !runner_ok { - issues.push( - "Configured Ollama runtime is reachable but cannot execute models. Restart the external runtime and retry." - .to_string(), - ); - } - if healthy && !chat_found { - issues.push(format!("Chat model `{}` is not installed", expected_chat)); - } - if healthy && config.local_ai.preload_embedding_model && !embedding_found { - issues.push(format!( - "Embedding model `{}` is not installed", - expected_embedding - )); - } - if healthy - && matches!( - presets::vision_mode_for_config(&config.local_ai), - VisionMode::Bundled - ) - && !vision_found - { - issues.push(format!( - "Vision model `{}` is not installed", - expected_vision - )); - } - if let Some(ref e) = tags_error { - issues.push(format!("Failed to list models: {e}")); - } - // Reject installed-but-too-small active models: a context window - // below the memory-layer minimum silently truncates chunks / - // summaries and corrupts recall. - if let Some(ContextEligibility::BelowMinimum { - context_length, - required, - }) = embedding_eligibility.as_ref() - { - issues.push(format!( - "Embedding model `{}` has a {}-token context window; the memory layer \ - requires at least {}. Choose an embedding model with a larger context \ - (e.g. bge-m3).", - expected_embedding, context_length, required - )); - } - if let Some(ContextEligibility::BelowMinimum { - context_length, - required, - }) = chat_eligibility.as_ref() - { - issues.push(format!( - "Chat model `{}` has a {}-token context window; the memory layer \ - requires at least {}.", - expected_chat, context_length, required - )); - } - - log::debug!( - "[local_ai] diagnostics: healthy={} models={} issues={} repair_actions={}", - healthy, - models.len(), - issues.len(), - repair_actions.len(), - ); - - Ok(serde_json::json!({ - "ollama_running": healthy, - "ollama_runner_ok": runner_ok, - "ollama_base_url": base_url, - "ollama_binary_path": binary_path, - "installed_models": installed_models, - "context_requirement": { - "min_context_tokens": MIN_CONTEXT_TOKENS, - }, - "vision_mode": presets::vision_mode_for_config(&config.local_ai), - "expected": { - "chat_model": expected_chat, - "chat_found": chat_found, - "chat_eligibility": chat_eligibility, - "embedding_model": expected_embedding, - "embedding_found": embedding_found, - "embedding_eligibility": embedding_eligibility, - "vision_model": expected_vision, - "vision_found": vision_found, - }, - "issues": issues, - "repair_actions": repair_actions, - "ok": issues.is_empty(), - })) - } - - async fn list_models_at(&self, base: &str) -> Result, String> { - let url = format!("{base}/api/tags"); - tracing::debug!( - target: "local_ai::ollama_admin", - %base, - %url, - "[local_ai:ollama_admin] list_models: sending GET" - ); - - let response = self - .http - .get(&url) - .timeout(std::time::Duration::from_secs(5)) - .send() - .await - .map_err(|e| { - tracing::error!( - target: "local_ai::ollama_admin", - %url, - error = %e, - "[local_ai:ollama_admin] list_models: request send failed" - ); - format!("ollama tags request failed: {e}") - })?; - - let status = response.status(); - tracing::debug!( - target: "local_ai::ollama_admin", - %url, - %status, - "[local_ai:ollama_admin] list_models: received response" - ); - - if !status.is_success() { - let body = response.text().await.unwrap_or_default(); - tracing::error!( - target: "local_ai::ollama_admin", - %url, - %status, - body = %body, - "[local_ai:ollama_admin] list_models: non-success response" - ); - return Err(format!( - "ollama tags failed with status {}: {}", - status, - body.trim() - )); - } - - // Read the body as text first so we can log it if JSON parsing fails. - let body = response.text().await.map_err(|e| { - tracing::error!( - target: "local_ai::ollama_admin", - %url, - error = %e, - "[local_ai:ollama_admin] list_models: failed to read response body" - ); - format!("ollama tags body read failed: {e}") - })?; - - let payload: OllamaTagsResponse = serde_json::from_str(&body).map_err(|e| { - tracing::error!( - target: "local_ai::ollama_admin", - %url, - body = %body, - error = %e, - "[local_ai:ollama_admin] list_models: JSON parse failed" - ); - format!("ollama tags parse failed: {e}") - })?; - - tracing::debug!( - target: "local_ai::ollama_admin", - %url, - models = payload.models.len(), - "[local_ai:ollama_admin] list_models: parsed successfully" - ); - - Ok(payload.models) - } - - /// Fetch a model's native context window and chat-capability via Ollama - /// `POST /api/show`. - /// - /// Both fields default to `None` on any failure (unreachable, non-2xx, - /// parse error, or the metadata key is absent) — the caller maps a `None` - /// context to an `Unknown` eligibility verdict, and a `None` chat-capable - /// to "keep visible" (fail-open). One bounded HTTP round-trip per model; - /// only ever invoked from the diagnostics path. The single round-trip - /// yields both signals (context for the memory-layer gate, capability for - /// the chat-picker filter — TAURI-RUST-4P6). - async fn fetch_model_show_at(&self, base_url: &str, model: &str) -> OllamaModelShow { - let url = format!("{}/api/show", base_url.trim_end_matches('/')); - let resp = match self - .http - .post(&url) - .json(&OllamaShowRequest { - model: model.to_string(), - }) - .timeout(std::time::Duration::from_secs(5)) - .send() - .await - { - Ok(resp) => resp, - Err(e) => { - tracing::debug!( - target: "local_ai::ollama_admin", - %url, model, error = %e, - "[local_ai:ollama_admin] fetch_model_show: request failed" - ); - return OllamaModelShow::default(); - } - }; - let status = resp.status(); - if !status.is_success() { - tracing::debug!( - target: "local_ai::ollama_admin", - %url, model, %status, - "[local_ai:ollama_admin] fetch_model_show: non-success response" - ); - return OllamaModelShow::default(); - } - let parsed: OllamaShowResponse = match resp.json().await { - Ok(parsed) => parsed, - Err(e) => { - tracing::debug!( - target: "local_ai::ollama_admin", - %url, model, error = %e, - "[local_ai:ollama_admin] fetch_model_show: JSON parse failed" - ); - return OllamaModelShow::default(); - } - }; - let show = OllamaModelShow { - context_length: parsed.context_length(), - chat_capable: parsed.chat_capability(), - }; - tracing::debug!( - target: "local_ai::ollama_admin", - model, - context_length = ?show.context_length, - chat_capable = ?show.chat_capable, - "[local_ai:ollama_admin] fetch_model_show: resolved" - ); - show - } - - async fn lm_studio_diagnostics(&self, config: &Config) -> Result { - let base_url = lm_studio_base_url(config); - let models_result = self.list_lm_studio_models(config).await; - let (models, models_error, healthy) = match models_result { - Ok(models) => (models, None, true), - Err(err) => { - let reachable = !lm_studio_models_error_means_unreachable(&err); - (vec![], Some(err), reachable) - } - }; - - let expected_chat = model_ids::effective_chat_model_id(config); - let model_names: Vec = models.iter().map(|m| m.name.to_ascii_lowercase()).collect(); - let chat_found = model_names - .iter() - .any(|name| name == &expected_chat.to_ascii_lowercase()); - - let mut issues: Vec = Vec::new(); - let repair_actions: Vec = Vec::new(); - - if !healthy { - let detail = models_error - .as_deref() - .map(|err| format!(": {err}")) - .unwrap_or_default(); - issues.push(format!( - "LM Studio server is not running or not reachable at {}{}", - base_url, detail - )); - } - if healthy && models_error.is_none() && models.is_empty() { - issues.push("LM Studio is reachable but no models are loaded".to_string()); - } else if healthy && models_error.is_none() && !chat_found { - issues.push(format!( - "Chat model `{}` is not loaded in LM Studio", - expected_chat - )); - } - if healthy { - if let Some(ref err) = models_error { - issues.push(format!("Failed to list LM Studio models: {err}")); - } - } - - tracing::debug!( - provider = "lm_studio", - %base_url, - healthy, - models = models.len(), - issues = issues.len(), - "[local_ai] diagnostics" - ); - - Ok(serde_json::json!({ - "provider": "lm_studio", - "lm_studio_running": healthy, - "lm_studio_base_url": base_url, - "ollama_running": false, - "ollama_base_url": serde_json::Value::Null, - "ollama_binary_path": serde_json::Value::Null, - "installed_models": models, - "vision_mode": "disabled", - "expected": { - "chat_model": expected_chat, - "chat_found": chat_found, - "embedding_model": model_ids::effective_embedding_model_id(config), - "embedding_found": false, - "vision_model": model_ids::effective_vision_model_id(config), - "vision_found": false, - }, - "issues": issues, - "repair_actions": repair_actions, - "ok": issues.is_empty(), - })) - } - - fn resolve_binary_path(&self, config: &Config) -> Option { - // 1. Explicit user-configured path in Settings. - if let Some(ref custom) = config.local_ai.ollama_binary_path { - let p = PathBuf::from(custom); - if p.is_file() { - log::debug!( - "[local_ai] resolve_binary_path: using configured path {}", - p.display() - ); - return Some(custom.clone()); - } - } - - // 2. OLLAMA_BIN env var (mirrors bootstrap detection). - if let Some(from_env) = std::env::var("OLLAMA_BIN") - .ok() - .filter(|v| !v.trim().is_empty()) - { - let p = PathBuf::from(&from_env); - if p.is_file() { - log::debug!( - "[local_ai] resolve_binary_path: using OLLAMA_BIN {}", - p.display() - ); - return Some(from_env); - } - } - - // 3. Workspace-managed binary installed by the app. - let workspace_bin = workspace_ollama_binary(config); - if workspace_bin.is_file() { - log::debug!( - "[local_ai] resolve_binary_path: using workspace binary {}", - workspace_bin.display() - ); - return Some(workspace_bin.display().to_string()); - } - - // 4. Bare `ollama` on PATH — same as bootstrap's `which ollama` step. - let binary_name = if cfg!(windows) { - "ollama.exe" - } else { - "ollama" - }; - if let Some(path_var) = std::env::var_os("PATH") { - for dir in std::env::split_paths(&path_var) { - let candidate = dir.join(binary_name); - if candidate.is_file() { - log::debug!( - "[local_ai] resolve_binary_path: found on PATH at {}", - candidate.display() - ); - return Some(candidate.display().to_string()); - } - } - } - - // 5. Platform-specific well-known locations (macOS bundles, Windows, Linux). - crate::openhuman::inference::local::install::find_system_ollama_binary() - .map(|p| p.display().to_string()) - } - - /// Quick check that the Ollama runner can actually exec models against the given URL. - async fn ollama_runner_ok_at(&self, base_url: &str) -> bool { - let resp = self - .http - .get(format!("{base_url}/api/tags")) - .timeout(std::time::Duration::from_secs(3)) - .send() - .await; - match resp { - Ok(r) if r.status().is_success() => { - // Tags endpoint works — but the runner error only shows up on model exec. - // Do a lightweight pull-status check (won't download, just checks). - let check = self - .http - .post(format!("{base_url}/api/show")) - .json(&serde_json::json!({"name": "___nonexistent_probe___"})) - .timeout(std::time::Duration::from_secs(3)) - .send() - .await; - match check { - Ok(r) => { - let status = r.status().as_u16(); - let body = r.text().await.unwrap_or_default(); - // 404 = model not found — runner is fine. 500 with fork/exec = broken. - if status == 500 && body.contains("fork/exec") { - log::warn!("[local_ai] ollama runner broken: {body}"); - return false; - } - true - } - Err(_) => true, // network error, assume ok - } - } - _ => false, - } - } - - /// Kill any running Ollama server process so we can restart with the correct binary. - /// Kill the `ollama serve` daemon openhuman itself spawned, if any. - /// - /// **No-op when openhuman never spawned a daemon** (i.e. it adopted an - /// externally-managed one via the `ollama_healthy()` fast-path, or no - /// daemon was started at all). This avoids the friendly-fire bug from - /// the previous blanket `taskkill /IM ollama.exe` / `pkill -f` which - /// would terminate any Ollama on the host — including ones started by - /// the user's CLI, tray app, or other tooling. - /// - /// External daemons can be replaced/restarted by the user; killing - /// them out from under their owner is never the right move from inside - /// a desktop app. - async fn kill_ollama_server(&self) { - let maybe_child = self.owned_ollama.lock().take(); - let Some(mut child) = maybe_child else { - log::debug!( - "[local_ai] kill_ollama_server: no openhuman-owned daemon; \ - leaving any external Ollama on :11434 untouched" - ); - return; - }; - let pid = child.id().unwrap_or(0); - match child.kill().await { - Ok(()) => { - log::info!("[local_ai] killed openhuman-owned ollama serve (pid={pid})"); - // Reap so the OS doesn't keep the zombie around on Unix. - let _ = child.wait().await; - } - Err(err) => { - log::warn!("[local_ai] kill of owned ollama serve pid={pid} failed: {err}"); - } - } - // Give the kernel a moment to release :11434 before any imminent - // respawn races for the same port. - tokio::time::sleep(std::time::Duration::from_millis(500)).await; - } - - /// Public shutdown hook for the Tauri exit lifecycle. - /// - /// Kills the openhuman-owned `ollama serve` (if any) and clears the - /// spawn marker so the next launch doesn't try to reclaim a daemon - /// that's already dead. Idempotent — safe to call from both - /// `RunEvent::ExitRequested` and window-close paths. - pub async fn shutdown_owned_ollama(&self, config: &Config) { - self.kill_ollama_server().await; - spawn_marker::clear_marker(config); - } - - pub(in crate::openhuman::inference::local::service) async fn has_model( - &self, - model: &str, - ) -> Result { - self.has_model_at(&ollama_base_url(), model).await - } - - pub(in crate::openhuman::inference::local::service) async fn has_model_for_config( - &self, - config: &Config, - model: &str, - ) -> Result { - self.has_model_at(&ollama_base_url_from_config(config), model) - .await - } - - async fn has_model_at(&self, base_url: &str, model: &str) -> Result { - // Issue the /api/tags GET directly. We previously short-circuited via - // ollama_healthy(), but that doubled the number of /api/tags round-trips - // on healthy polls (one probe + one tags fetch). With three has_model() - // calls per assets_status poll (chat, vision, embedding) that was 6 - // network calls instead of 3. The 500ms connect_timeout on the shared - // reqwest client (set in bootstrap.rs) bounds the cost when the server - // is down — the connect failure surfaces as Err, same as ollama_healthy() - // would have surfaced as `false`. - log::debug!("[local_ai] has_model_at: checking for model `{model}` at {base_url}"); - let response = self - .http - .get(format!("{base_url}/api/tags")) - // Per-request timeout matches list_models (5s). The shared client's - // connect_timeout only bounds the TCP handshake; without this a - // hung server (accepted connection, no response body) would block - // assets_status polls indefinitely. - .timeout(std::time::Duration::from_secs(5)) - .send() - .await - .map_err(|e| format!("ollama tags request failed: {e}"))?; - if !response.status().is_success() { - let status = response.status(); - let body = response.text().await.unwrap_or_default(); - let detail = body.trim(); - return Err(format!( - "ollama tags failed with status {}{}", - status, - if detail.is_empty() { - String::new() - } else { - format!(": {detail}") - } - )); - } - let payload: OllamaTagsResponse = response - .json() - .await - .map_err(|e| format!("ollama tags parse failed: {e}"))?; - - let target = model.to_ascii_lowercase(); - Ok(payload.models.iter().any(|m| { - let name = m.name.to_ascii_lowercase(); - name == target || name.starts_with(&(target.clone() + ":")) - })) - } -} - -/// Test connectivity to a user-supplied Ollama URL. -/// -/// Validates the URL via [`validate_ollama_url`], then issues a GET to -/// `{normalized_url}/api/tags` with a 3-second timeout. -/// Returns a JSON object with `reachable`, optional `error`, and -/// `models_count` when reachable. -pub(crate) async fn test_ollama_connection(url: &str) -> Result { - let normalized = validate_ollama_url(url)?; - log::debug!("[local_ai] test_ollama_connection: testing url={normalized}"); - - let client = reqwest::Client::builder() - .timeout(std::time::Duration::from_secs(3)) - .build() - .map_err(|e| format!("failed to build HTTP client: {e}"))?; - - match client.get(format!("{normalized}/api/tags")).send().await { - Ok(resp) if resp.status().is_success() => { - let models_count = resp - .json::() - .await - .map(|t| t.models.len()) - .unwrap_or(0); - log::debug!( - "[local_ai] test_ollama_connection: reachable url={normalized} models={models_count}" - ); - Ok(serde_json::json!({ - "reachable": true, - "error": null, - "models_count": models_count, - })) - } - Ok(resp) => { - let status = resp.status(); - let body = resp.text().await.unwrap_or_default(); - let err = format!("server responded with status {status}: {}", body.trim()); - log::debug!( - "[local_ai] test_ollama_connection: unreachable url={normalized} err={err}" - ); - Ok(serde_json::json!({ - "reachable": false, - "error": err, - "models_count": null, - })) - } - Err(e) => { - let err = e.to_string(); - log::debug!( - "[local_ai] test_ollama_connection: connection failed url={normalized} err={err}" - ); - Ok(serde_json::json!({ - "reachable": false, - "error": err, - "models_count": null, - })) - } - } -} - -fn interrupted_pull_settle_window_secs(observed_bytes: bool, settle_window_secs: u64) -> u64 { - if observed_bytes { - settle_window_secs.max(1) - } else { - 0 - } -} - -/// Kill a process by PID using `sysinfo`'s cross-platform `Process::kill`. -/// -/// Used by `reclaim_orphan_if_ours` where we no longer have the original -/// `tokio::process::Child` handle (the spawning openhuman crashed) but -/// recorded the PID in the spawn marker. -fn kill_pid_by_id(pid: u32) { - use sysinfo::{Pid, ProcessesToUpdate, System}; - let target = Pid::from_u32(pid); - let mut sys = System::new(); - sys.refresh_processes(ProcessesToUpdate::Some(&[target]), true); - match sys.process(target) { - Some(proc) => { - if proc.kill() { - log::info!("[local_ai] killed reclaimed ollama orphan pid={pid}"); - } else { - // sysinfo's kill returns false if the platform refused - // (permissions, race with exit). The next ollama_healthy() - // check will reveal whether the daemon is actually gone. - log::warn!("[local_ai] sysinfo Process::kill returned false for pid={pid}"); - } - } - None => { - log::debug!("[local_ai] kill_pid_by_id: pid={pid} no longer present"); - } - } -} - -#[cfg(test)] -#[path = "ollama_admin_tests.rs"] -mod tests; diff --git a/src/openhuman/inference/local/service/ollama_admin/binary.rs b/src/openhuman/inference/local/service/ollama_admin/binary.rs new file mode 100644 index 0000000000..1cee6e8467 --- /dev/null +++ b/src/openhuman/inference/local/service/ollama_admin/binary.rs @@ -0,0 +1,241 @@ +use std::path::{Path, PathBuf}; + +use crate::openhuman::config::Config; +use crate::openhuman::inference::local::install::{ + find_system_ollama_binary, run_ollama_install_script, +}; +use crate::openhuman::inference::local::process_util::apply_no_window; +use crate::openhuman::inference::paths::find_workspace_ollama_binary; + +use super::super::LocalAiService; + +impl LocalAiService { + pub(in crate::openhuman::inference::local::service) async fn resolve_or_install_ollama_binary( + &self, + config: &Config, + ) -> Result { + // 1. Check user-configured ollama_binary_path from Settings. + if let Some(ref custom_path) = config.local_ai.ollama_binary_path { + let path = PathBuf::from(custom_path); + if path.is_file() { + log::debug!( + "[local_ai] using configured ollama_binary_path: {}", + path.display() + ); + return Ok(path); + } + log::warn!( + "[local_ai] configured ollama_binary_path does not exist: {}, falling through", + path.display() + ); + } + + // 2. OLLAMA_BIN env var. + if let Some(from_env) = std::env::var("OLLAMA_BIN") + .ok() + .filter(|v| !v.trim().is_empty()) + { + let path = PathBuf::from(from_env); + if path.exists() { + return Ok(path); + } + } + + if let Some(workspace_bin) = find_workspace_ollama_binary(config) { + if self.command_works(&workspace_bin).await { + log::debug!( + "[local_ai] using workspace-managed ollama binary: {}", + workspace_bin.display() + ); + return Ok(workspace_bin); + } + log::warn!( + "[local_ai] workspace-managed ollama binary is present but not executable, reinstalling: {}", + workspace_bin.display() + ); + } + + if self.command_works(Path::new("ollama")).await { + return Ok(PathBuf::from("ollama")); + } + + self.download_and_install_ollama(config).await?; + if let Some(installed) = find_workspace_ollama_binary(config) { + Ok(installed) + } else if let Some(system_bin) = find_system_ollama_binary() { + log::debug!( + "[local_ai] workspace binary not found after install, using system binary: {}", + system_bin.display() + ); + Ok(system_bin) + } else { + Err("Ollama download completed but executable is missing. \ + The installer may have placed it in an unexpected location. \ + Set OLLAMA_BIN or configure the path in Settings > Local Model." + .to_string()) + } + } + + pub(in crate::openhuman::inference::local::service) async fn command_works( + &self, + command: &Path, + ) -> bool { + let mut cmd = tokio::process::Command::new(command); + cmd.arg("--version") + .stdout(std::process::Stdio::null()) + .stderr(std::process::Stdio::null()); + apply_no_window(&mut cmd); + cmd.status().await.map(|s| s.success()).unwrap_or(false) + } + + pub(in crate::openhuman::inference::local::service) async fn download_and_install_ollama( + &self, + config: &Config, + ) -> Result<(), String> { + let install_dir = crate::openhuman::inference::paths::workspace_ollama_dir(config); + tokio::fs::create_dir_all(&install_dir) + .await + .map_err(|e| format!("failed to create Ollama install directory: {e}"))?; + + // Crash-resume guard: Inno Setup's installer is spawned via + // PowerShell's `Start-Process`, which creates a top-level process. + // It outlives OpenHuman crashing, the user closing the app, or + // the bootstrap task being cancelled. If a prior launch left an + // OllamaSetup.exe running, wait for it instead of starting a + // second one — two concurrent installers race on the same dir + // and corrupt the install. + if crate::openhuman::inference::local::install::is_ollama_installer_running() { + log::info!( + "[local_ai] detected in-flight OllamaSetup.exe — \ + waiting for it to finish before deciding whether to install" + ); + { + let mut status = self.status.lock(); + status.state = "installing".to_string(); + status.warning = Some("Resuming Ollama install from a previous launch".to_string()); + status.error_detail = None; + status.error_category = None; + } + // Bounded wait: a stuck OllamaSetup.exe (e.g. Inno Setup dialog + // waiting on user input) must not block app startup forever. Five + // minutes covers a slow download + UAC prompt; past that we mark + // the install as failed-but-recoverable and let the caller decide. + let wait_start = std::time::Instant::now(); + const INSTALLER_WAIT_TIMEOUT: std::time::Duration = + std::time::Duration::from_secs(5 * 60); + let mut timed_out = false; + while crate::openhuman::inference::local::install::is_ollama_installer_running() { + if wait_start.elapsed() >= INSTALLER_WAIT_TIMEOUT { + timed_out = true; + break; + } + tokio::time::sleep(std::time::Duration::from_secs(2)).await; + } + if timed_out { + log::warn!( + "[local_ai] OllamaSetup.exe still running after {}s — giving up the wait", + INSTALLER_WAIT_TIMEOUT.as_secs() + ); + let mut status = self.status.lock(); + status.state = "install_failed".to_string(); + status.warning = None; + status.error_category = Some("install_stuck".to_string()); + status.error_detail = Some(format!( + "Previous OllamaSetup.exe install was still running after {}s. \ + Cancel the installer (System tray / Task Manager) and retry.", + INSTALLER_WAIT_TIMEOUT.as_secs() + )); + return Err("Previous Ollama installer is stuck. Cancel it and retry.".to_string()); + } + // The prior installer is gone. If it succeeded, our regular + // discovery paths will find the binary and we can short-circuit + // the install entirely. If it failed, fall through and run a + // fresh install below. + if find_workspace_ollama_binary(config).is_some() + || find_system_ollama_binary().is_some() + { + log::info!("[local_ai] resumed prior install completed successfully"); + return Ok(()); + } + log::warn!( + "[local_ai] prior installer exited but binary not found — running fresh install" + ); + } + + { + let mut status = self.status.lock(); + status.state = "installing".to_string(); + status.warning = Some("Installing Ollama runtime (first run)".to_string()); + status.download_progress = None; + status.downloaded_bytes = None; + status.total_bytes = None; + status.download_speed_bps = None; + status.eta_seconds = None; + status.error_detail = None; + status.error_category = None; + } + + let result = run_ollama_install_script(&install_dir).await?; + if !result.exit_status.success() { + let stderr_tail: String = result + .stderr + .lines() + .rev() + .take(20) + .collect::>() + .into_iter() + .rev() + .collect::>() + .join("\n"); + log::warn!( + "[local_ai] Ollama install script failed (exit={})\nstdout: {}\nstderr: {}", + result.exit_status, + result.stdout, + result.stderr, + ); + { + let mut status = self.status.lock(); + status.error_detail = Some(if stderr_tail.is_empty() { + result + .stdout + .lines() + .rev() + .take(20) + .collect::>() + .into_iter() + .rev() + .collect::>() + .join("\n") + } else { + stderr_tail + }); + status.error_category = Some("install".to_string()); + } + return Err(format!( + "Ollama install script failed (exit code {}). \ + Install Ollama manually from https://ollama.com or set its path in Settings > Local Model.", + result.exit_status.code().unwrap_or(-1) + )); + } + + log::debug!( + "[local_ai] Ollama install script succeeded, stdout: {}", + result.stdout.chars().take(500).collect::(), + ); + + let installed = find_workspace_ollama_binary(config) + .or_else(find_system_ollama_binary) + .ok_or_else(|| "Ollama installer finished but binary was not found".to_string())?; + log::debug!( + "[local_ai] Ollama install finished with binary at {}", + installed.display() + ); + + { + let mut status = self.status.lock(); + status.warning = Some("Ollama runtime installed".to_string()); + status.download_progress = Some(1.0); + } + Ok(()) + } +} diff --git a/src/openhuman/inference/local/service/ollama_admin/diagnostics.rs b/src/openhuman/inference/local/service/ollama_admin/diagnostics.rs new file mode 100644 index 0000000000..cb3bbafc29 --- /dev/null +++ b/src/openhuman/inference/local/service/ollama_admin/diagnostics.rs @@ -0,0 +1,456 @@ +use crate::openhuman::config::Config; +use crate::openhuman::inference::local::lm_studio::lm_studio_base_url; +use crate::openhuman::inference::local::model_requirements::{ + evaluate_context, ContextEligibility, MIN_CONTEXT_TOKENS, +}; +use crate::openhuman::inference::local::ollama::{ + ollama_base_url_from_config, OllamaModelShow, OllamaModelTag, OllamaShowRequest, + OllamaShowResponse, OllamaTagsResponse, +}; +use crate::openhuman::inference::local::provider::{provider_from_config, LocalAiProvider}; +use crate::openhuman::inference::model_ids; +use crate::openhuman::inference::presets::{self, VisionMode}; + +use super::super::LocalAiService; +use super::util::lm_studio_models_error_means_unreachable; + +impl LocalAiService { + /// Run full diagnostics: check Ollama server health, list installed models, + /// and verify expected models are present. Returns a JSON-serializable report. + pub async fn diagnostics(&self, config: &Config) -> Result { + if provider_from_config(config) == LocalAiProvider::LmStudio { + return self.lm_studio_diagnostics(config).await; + } + + let base_url = ollama_base_url_from_config(config); + let healthy = self.ollama_healthy_at(&base_url).await; + let runner_ok = if healthy { + self.ollama_runner_ok_at(&base_url).await + } else { + false + }; + + log::debug!( + "[local_ai] diagnostics: entry base_url={} healthy={}", + base_url, + healthy + ); + + let (models, tags_error) = if healthy { + match self.list_models_at(&base_url).await { + Ok(models) => (models, None), + Err(e) => (vec![], Some(e)), + } + } else { + (vec![], None) + }; + + let expected_chat = model_ids::effective_chat_model_id(config); + let expected_embedding = model_ids::effective_embedding_model_id(config); + let expected_vision = model_ids::effective_vision_model_id(config); + + let model_names: Vec = models.iter().map(|m| m.name.to_ascii_lowercase()).collect(); + let has = |target: &str| -> bool { + let t = target.to_ascii_lowercase(); + model_names + .iter() + .any(|n| *n == t || n.starts_with(&(t.clone() + ":"))) + }; + + let chat_found = has(&expected_chat); + let embedding_found = has(&expected_embedding); + let vision_found = has(&expected_vision); + + // Per-model native context window (vs the memory-layer minimum) and + // chat-capability. `/api/show` is one bounded round-trip per installed + // model, fetched concurrently and only on this diagnostics path; the + // single call yields both signals. + let model_shows: Vec = if healthy { + futures_util::future::join_all( + models + .iter() + .map(|m| self.fetch_model_show_at(&base_url, &m.name)), + ) + .await + } else { + Vec::new() + }; + let model_eligibilities: Vec = model_shows + .iter() + .map(|s| evaluate_context(s.context_length)) + .collect(); + + let installed_models: Vec = models + .iter() + .enumerate() + .map(|(i, m)| { + let eligibility = model_eligibilities.get(i).cloned(); + let context_length = match eligibility.as_ref() { + Some(ContextEligibility::Ok { context_length }) + | Some(ContextEligibility::BelowMinimum { context_length, .. }) => { + Some(*context_length) + } + _ => None, + }; + // `chat_capable: false` → embedding-only model the chat picker + // must hide; `null`/`true` → keep visible (fail-open). + // TAURI-RUST-4P6. + let chat_capable = model_shows.get(i).and_then(|s| s.chat_capable); + serde_json::json!({ + "name": m.name, + "size": m.size, + "modified_at": m.modified_at, + "context_length": context_length, + "eligibility": eligibility, + "chat_capable": chat_capable, + }) + }) + .collect(); + + // Resolve the eligibility of an expected (active) model by tag prefix. + let eligibility_for = |target: &str| -> Option { + let t = target.to_ascii_lowercase(); + models + .iter() + .zip(model_eligibilities.iter()) + .find(|(m, _)| { + let n = m.name.to_ascii_lowercase(); + n == t || n.starts_with(&(t.clone() + ":")) + }) + .map(|(_, e)| e.clone()) + }; + let chat_eligibility = eligibility_for(&expected_chat); + let embedding_eligibility = eligibility_for(&expected_embedding); + + let binary_path = self.resolve_binary_path(config); + + let mut issues: Vec = Vec::new(); + let repair_actions: Vec = Vec::new(); + + if !healthy { + issues.push(format!( + "Ollama server is not running or not reachable at {}", + base_url + )); + } + if healthy && !runner_ok { + issues.push( + "Configured Ollama runtime is reachable but cannot execute models. Restart the external runtime and retry." + .to_string(), + ); + } + if healthy && !chat_found { + issues.push(format!("Chat model `{}` is not installed", expected_chat)); + } + if healthy && config.local_ai.preload_embedding_model && !embedding_found { + issues.push(format!( + "Embedding model `{}` is not installed", + expected_embedding + )); + } + if healthy + && matches!( + presets::vision_mode_for_config(&config.local_ai), + VisionMode::Bundled + ) + && !vision_found + { + issues.push(format!( + "Vision model `{}` is not installed", + expected_vision + )); + } + if let Some(ref e) = tags_error { + issues.push(format!("Failed to list models: {e}")); + } + // Reject installed-but-too-small active models: a context window + // below the memory-layer minimum silently truncates chunks / + // summaries and corrupts recall. + if let Some(ContextEligibility::BelowMinimum { + context_length, + required, + }) = embedding_eligibility.as_ref() + { + issues.push(format!( + "Embedding model `{}` has a {}-token context window; the memory layer \ + requires at least {}. Choose an embedding model with a larger context \ + (e.g. bge-m3).", + expected_embedding, context_length, required + )); + } + if let Some(ContextEligibility::BelowMinimum { + context_length, + required, + }) = chat_eligibility.as_ref() + { + issues.push(format!( + "Chat model `{}` has a {}-token context window; the memory layer \ + requires at least {}.", + expected_chat, context_length, required + )); + } + + log::debug!( + "[local_ai] diagnostics: healthy={} models={} issues={} repair_actions={}", + healthy, + models.len(), + issues.len(), + repair_actions.len(), + ); + + Ok(serde_json::json!({ + "ollama_running": healthy, + "ollama_runner_ok": runner_ok, + "ollama_base_url": base_url, + "ollama_binary_path": binary_path, + "installed_models": installed_models, + "context_requirement": { + "min_context_tokens": MIN_CONTEXT_TOKENS, + }, + "vision_mode": presets::vision_mode_for_config(&config.local_ai), + "expected": { + "chat_model": expected_chat, + "chat_found": chat_found, + "chat_eligibility": chat_eligibility, + "embedding_model": expected_embedding, + "embedding_found": embedding_found, + "embedding_eligibility": embedding_eligibility, + "vision_model": expected_vision, + "vision_found": vision_found, + }, + "issues": issues, + "repair_actions": repair_actions, + "ok": issues.is_empty(), + })) + } + + pub(in crate::openhuman::inference::local::service) async fn list_models_at( + &self, + base: &str, + ) -> Result, String> { + let url = format!("{base}/api/tags"); + tracing::debug!( + target: "local_ai::ollama_admin", + %base, + %url, + "[local_ai:ollama_admin] list_models: sending GET" + ); + + let response = self + .http + .get(&url) + .timeout(std::time::Duration::from_secs(5)) + .send() + .await + .map_err(|e| { + tracing::error!( + target: "local_ai::ollama_admin", + %url, + error = %e, + "[local_ai:ollama_admin] list_models: request send failed" + ); + format!("ollama tags request failed: {e}") + })?; + + let status = response.status(); + tracing::debug!( + target: "local_ai::ollama_admin", + %url, + %status, + "[local_ai:ollama_admin] list_models: received response" + ); + + if !status.is_success() { + let body = response.text().await.unwrap_or_default(); + tracing::error!( + target: "local_ai::ollama_admin", + %url, + %status, + body = %body, + "[local_ai:ollama_admin] list_models: non-success response" + ); + return Err(format!( + "ollama tags failed with status {}: {}", + status, + body.trim() + )); + } + + // Read the body as text first so we can log it if JSON parsing fails. + let body = response.text().await.map_err(|e| { + tracing::error!( + target: "local_ai::ollama_admin", + %url, + error = %e, + "[local_ai:ollama_admin] list_models: failed to read response body" + ); + format!("ollama tags body read failed: {e}") + })?; + + let payload: OllamaTagsResponse = serde_json::from_str(&body).map_err(|e| { + tracing::error!( + target: "local_ai::ollama_admin", + %url, + body = %body, + error = %e, + "[local_ai:ollama_admin] list_models: JSON parse failed" + ); + format!("ollama tags parse failed: {e}") + })?; + + tracing::debug!( + target: "local_ai::ollama_admin", + %url, + models = payload.models.len(), + "[local_ai:ollama_admin] list_models: parsed successfully" + ); + + Ok(payload.models) + } + + /// Fetch a model's native context window and chat-capability via Ollama + /// `POST /api/show`. + /// + /// Both fields default to `None` on any failure (unreachable, non-2xx, + /// parse error, or the metadata key is absent) — the caller maps a `None` + /// context to an `Unknown` eligibility verdict, and a `None` chat-capable + /// to "keep visible" (fail-open). One bounded HTTP round-trip per model; + /// only ever invoked from the diagnostics path. The single round-trip + /// yields both signals (context for the memory-layer gate, capability for + /// the chat-picker filter — TAURI-RUST-4P6). + pub(in crate::openhuman::inference::local::service) async fn fetch_model_show_at( + &self, + base_url: &str, + model: &str, + ) -> OllamaModelShow { + let url = format!("{}/api/show", base_url.trim_end_matches('/')); + let resp = match self + .http + .post(&url) + .json(&OllamaShowRequest { + model: model.to_string(), + }) + .timeout(std::time::Duration::from_secs(5)) + .send() + .await + { + Ok(resp) => resp, + Err(e) => { + tracing::debug!( + target: "local_ai::ollama_admin", + %url, model, error = %e, + "[local_ai:ollama_admin] fetch_model_show: request failed" + ); + return OllamaModelShow::default(); + } + }; + let status = resp.status(); + if !status.is_success() { + tracing::debug!( + target: "local_ai::ollama_admin", + %url, model, %status, + "[local_ai:ollama_admin] fetch_model_show: non-success response" + ); + return OllamaModelShow::default(); + } + let parsed: OllamaShowResponse = match resp.json().await { + Ok(parsed) => parsed, + Err(e) => { + tracing::debug!( + target: "local_ai::ollama_admin", + %url, model, error = %e, + "[local_ai:ollama_admin] fetch_model_show: JSON parse failed" + ); + return OllamaModelShow::default(); + } + }; + let show = OllamaModelShow { + context_length: parsed.context_length(), + chat_capable: parsed.chat_capability(), + }; + tracing::debug!( + target: "local_ai::ollama_admin", + model, + context_length = ?show.context_length, + chat_capable = ?show.chat_capable, + "[local_ai:ollama_admin] fetch_model_show: resolved" + ); + show + } + + async fn lm_studio_diagnostics(&self, config: &Config) -> Result { + let base_url = lm_studio_base_url(config); + let models_result = self.list_lm_studio_models(config).await; + let (models, models_error, healthy) = match models_result { + Ok(models) => (models, None, true), + Err(err) => { + let reachable = !lm_studio_models_error_means_unreachable(&err); + (vec![], Some(err), reachable) + } + }; + + let expected_chat = model_ids::effective_chat_model_id(config); + let model_names: Vec = models.iter().map(|m| m.name.to_ascii_lowercase()).collect(); + let chat_found = model_names + .iter() + .any(|name| name == &expected_chat.to_ascii_lowercase()); + + let mut issues: Vec = Vec::new(); + let repair_actions: Vec = Vec::new(); + + if !healthy { + let detail = models_error + .as_deref() + .map(|err| format!(": {err}")) + .unwrap_or_default(); + issues.push(format!( + "LM Studio server is not running or not reachable at {}{}", + base_url, detail + )); + } + if healthy && models_error.is_none() && models.is_empty() { + issues.push("LM Studio is reachable but no models are loaded".to_string()); + } else if healthy && models_error.is_none() && !chat_found { + issues.push(format!( + "Chat model `{}` is not loaded in LM Studio", + expected_chat + )); + } + if healthy { + if let Some(ref err) = models_error { + issues.push(format!("Failed to list LM Studio models: {err}")); + } + } + + tracing::debug!( + provider = "lm_studio", + %base_url, + healthy, + models = models.len(), + issues = issues.len(), + "[local_ai] diagnostics" + ); + + Ok(serde_json::json!({ + "provider": "lm_studio", + "lm_studio_running": healthy, + "lm_studio_base_url": base_url, + "ollama_running": false, + "ollama_base_url": serde_json::Value::Null, + "ollama_binary_path": serde_json::Value::Null, + "installed_models": models, + "vision_mode": "disabled", + "expected": { + "chat_model": expected_chat, + "chat_found": chat_found, + "embedding_model": model_ids::effective_embedding_model_id(config), + "embedding_found": false, + "vision_model": model_ids::effective_vision_model_id(config), + "vision_found": false, + }, + "issues": issues, + "repair_actions": repair_actions, + "ok": issues.is_empty(), + })) + } +} diff --git a/src/openhuman/inference/local/service/ollama_admin/health.rs b/src/openhuman/inference/local/service/ollama_admin/health.rs new file mode 100644 index 0000000000..428569223f --- /dev/null +++ b/src/openhuman/inference/local/service/ollama_admin/health.rs @@ -0,0 +1,288 @@ +use std::path::PathBuf; + +use crate::openhuman::config::Config; +use crate::openhuman::inference::local::install::find_system_ollama_binary; +use crate::openhuman::inference::local::ollama::{ollama_base_url, ollama_base_url_from_config}; +use crate::openhuman::inference::paths::{find_workspace_ollama_binary, workspace_ollama_binary}; + +use super::super::spawn_marker; +use super::super::LocalAiService; + +impl LocalAiService { + /// Check Ollama health against the given base URL. + pub(in crate::openhuman::inference::local::service) async fn ollama_healthy_at( + &self, + base_url: &str, + ) -> bool { + tracing::debug!( + target: "local_ai::ollama_admin", + %base_url, + "[local_ai:ollama_admin] ollama_healthy_at: checking" + ); + self.http + .get(format!("{base_url}/api/tags")) + .timeout(std::time::Duration::from_secs(2)) + .send() + .await + .map(|r| r.status().is_success()) + .unwrap_or(false) + } + + /// Backward-compat wrapper — resolves the URL from env vars only (no config). + /// Prefer [`ollama_healthy_at`] when a `Config` is available. + pub(in crate::openhuman::inference::local::service) async fn ollama_healthy(&self) -> bool { + self.ollama_healthy_at(&ollama_base_url()).await + } + + /// Filesystem-only precondition: is *any* Ollama binary discoverable? + /// + /// This is the cheapest possible check — no process spawns, no HTTP, no + /// timeouts. Callers that need to decide whether it's even worth talking + /// to `/api/tags` should consult this first. Returning `false` here means + /// the UI should drive the user to install Ollama instead of polling for + /// model state that can never appear. + pub(in crate::openhuman::inference::local::service) fn ollama_binary_present( + &self, + config: &Config, + ) -> bool { + if let Some(ref custom) = config.local_ai.ollama_binary_path { + if PathBuf::from(custom).is_file() { + return true; + } + } + if let Some(env_path) = std::env::var("OLLAMA_BIN") + .ok() + .filter(|v| !v.trim().is_empty()) + { + if PathBuf::from(env_path).is_file() { + return true; + } + } + if find_workspace_ollama_binary(config).is_some() { + return true; + } + find_system_ollama_binary().is_some() + } + + /// Quick check that the Ollama runner can actually exec models against the given URL. + pub(in crate::openhuman::inference::local::service) async fn ollama_runner_ok_at( + &self, + base_url: &str, + ) -> bool { + let resp = self + .http + .get(format!("{base_url}/api/tags")) + .timeout(std::time::Duration::from_secs(3)) + .send() + .await; + match resp { + Ok(r) if r.status().is_success() => { + // Tags endpoint works — but the runner error only shows up on model exec. + // Do a lightweight pull-status check (won't download, just checks). + let check = self + .http + .post(format!("{base_url}/api/show")) + .json(&serde_json::json!({"name": "___nonexistent_probe___"})) + .timeout(std::time::Duration::from_secs(3)) + .send() + .await; + match check { + Ok(r) => { + let status = r.status().as_u16(); + let body = r.text().await.unwrap_or_default(); + // 404 = model not found — runner is fine. 500 with fork/exec = broken. + if status == 500 && body.contains("fork/exec") { + log::warn!("[local_ai] ollama runner broken: {body}"); + return false; + } + true + } + Err(_) => true, // network error, assume ok + } + } + _ => false, + } + } + + /// Kill any running Ollama server process so we can restart with the correct binary. + /// Kill the `ollama serve` daemon openhuman itself spawned, if any. + /// + /// **No-op when openhuman never spawned a daemon** (i.e. it adopted an + /// externally-managed one via the `ollama_healthy()` fast-path, or no + /// daemon was started at all). This avoids the friendly-fire bug from + /// the previous blanket `taskkill /IM ollama.exe` / `pkill -f` which + /// would terminate any Ollama on the host — including ones started by + /// the user's CLI, tray app, or other tooling. + /// + /// External daemons can be replaced/restarted by the user; killing + /// them out from under their owner is never the right move from inside + /// a desktop app. + pub(in crate::openhuman::inference::local::service) async fn kill_ollama_server(&self) { + let maybe_child = self.owned_ollama.lock().take(); + let Some(mut child) = maybe_child else { + log::debug!( + "[local_ai] kill_ollama_server: no openhuman-owned daemon; \ + leaving any external Ollama on :11434 untouched" + ); + return; + }; + let pid = child.id().unwrap_or(0); + match child.kill().await { + Ok(()) => { + log::info!("[local_ai] killed openhuman-owned ollama serve (pid={pid})"); + // Reap so the OS doesn't keep the zombie around on Unix. + let _ = child.wait().await; + } + Err(err) => { + log::warn!("[local_ai] kill of owned ollama serve pid={pid} failed: {err}"); + } + } + // Give the kernel a moment to release :11434 before any imminent + // respawn races for the same port. + tokio::time::sleep(std::time::Duration::from_millis(500)).await; + } + + /// Public shutdown hook for the Tauri exit lifecycle. + /// + /// Kills the openhuman-owned `ollama serve` (if any) and clears the + /// spawn marker so the next launch doesn't try to reclaim a daemon + /// that's already dead. Idempotent — safe to call from both + /// `RunEvent::ExitRequested` and window-close paths. + pub async fn shutdown_owned_ollama(&self, config: &Config) { + self.kill_ollama_server().await; + spawn_marker::clear_marker(config); + } + + pub(in crate::openhuman::inference::local::service) fn resolve_binary_path( + &self, + config: &Config, + ) -> Option { + // 1. Explicit user-configured path in Settings. + if let Some(ref custom) = config.local_ai.ollama_binary_path { + let p = PathBuf::from(custom); + if p.is_file() { + log::debug!( + "[local_ai] resolve_binary_path: using configured path {}", + p.display() + ); + return Some(custom.clone()); + } + } + + // 2. OLLAMA_BIN env var (mirrors bootstrap detection). + if let Some(from_env) = std::env::var("OLLAMA_BIN") + .ok() + .filter(|v| !v.trim().is_empty()) + { + let p = PathBuf::from(&from_env); + if p.is_file() { + log::debug!( + "[local_ai] resolve_binary_path: using OLLAMA_BIN {}", + p.display() + ); + return Some(from_env); + } + } + + // 3. Workspace-managed binary installed by the app. + let workspace_bin = workspace_ollama_binary(config); + if workspace_bin.is_file() { + log::debug!( + "[local_ai] resolve_binary_path: using workspace binary {}", + workspace_bin.display() + ); + return Some(workspace_bin.display().to_string()); + } + + // 4. Bare `ollama` on PATH — same as bootstrap's `which ollama` step. + let binary_name = if cfg!(windows) { + "ollama.exe" + } else { + "ollama" + }; + if let Some(path_var) = std::env::var_os("PATH") { + for dir in std::env::split_paths(&path_var) { + let candidate = dir.join(binary_name); + if candidate.is_file() { + log::debug!( + "[local_ai] resolve_binary_path: found on PATH at {}", + candidate.display() + ); + return Some(candidate.display().to_string()); + } + } + } + + // 5. Platform-specific well-known locations (macOS bundles, Windows, Linux). + crate::openhuman::inference::local::install::find_system_ollama_binary() + .map(|p| p.display().to_string()) + } + + pub(in crate::openhuman::inference::local::service) async fn has_model( + &self, + model: &str, + ) -> Result { + self.has_model_at(&ollama_base_url(), model).await + } + + pub(in crate::openhuman::inference::local::service) async fn has_model_for_config( + &self, + config: &Config, + model: &str, + ) -> Result { + self.has_model_at(&ollama_base_url_from_config(config), model) + .await + } + + pub(in crate::openhuman::inference::local::service) async fn has_model_at( + &self, + base_url: &str, + model: &str, + ) -> Result { + use crate::openhuman::inference::local::ollama::OllamaTagsResponse; + // Issue the /api/tags GET directly. We previously short-circuited via + // ollama_healthy(), but that doubled the number of /api/tags round-trips + // on healthy polls (one probe + one tags fetch). With three has_model() + // calls per assets_status poll (chat, vision, embedding) that was 6 + // network calls instead of 3. The 500ms connect_timeout on the shared + // reqwest client (set in bootstrap.rs) bounds the cost when the server + // is down — the connect failure surfaces as Err, same as ollama_healthy() + // would have surfaced as `false`. + log::debug!("[local_ai] has_model_at: checking for model `{model}` at {base_url}"); + let response = self + .http + .get(format!("{base_url}/api/tags")) + // Per-request timeout matches list_models (5s). The shared client's + // connect_timeout only bounds the TCP handshake; without this a + // hung server (accepted connection, no response body) would block + // assets_status polls indefinitely. + .timeout(std::time::Duration::from_secs(5)) + .send() + .await + .map_err(|e| format!("ollama tags request failed: {e}"))?; + if !response.status().is_success() { + let status = response.status(); + let body = response.text().await.unwrap_or_default(); + let detail = body.trim(); + return Err(format!( + "ollama tags failed with status {}{}", + status, + if detail.is_empty() { + String::new() + } else { + format!(": {detail}") + } + )); + } + let payload: OllamaTagsResponse = response + .json() + .await + .map_err(|e| format!("ollama tags parse failed: {e}"))?; + + let target = model.to_ascii_lowercase(); + Ok(payload.models.iter().any(|m| { + let name = m.name.to_ascii_lowercase(); + name == target || name.starts_with(&(target.clone() + ":")) + })) + } +} diff --git a/src/openhuman/inference/local/service/ollama_admin/mod.rs b/src/openhuman/inference/local/service/ollama_admin/mod.rs new file mode 100644 index 0000000000..69ebd0ff3e --- /dev/null +++ b/src/openhuman/inference/local/service/ollama_admin/mod.rs @@ -0,0 +1,16 @@ +// Sub-modules split by concern from the original ollama_admin.rs (1586 lines). +mod binary; +mod diagnostics; +mod health; +mod model_pull; +mod server; +mod util; + +// Re-export free functions that form the public/crate API of this module. +pub(crate) use util::interrupted_pull_settle_window_secs; +pub(crate) use util::kill_pid_by_id; +pub(crate) use util::test_ollama_connection; + +#[cfg(test)] +#[path = "../ollama_admin_tests.rs"] +mod tests; diff --git a/src/openhuman/inference/local/service/ollama_admin/model_pull.rs b/src/openhuman/inference/local/service/ollama_admin/model_pull.rs new file mode 100644 index 0000000000..b9761227cd --- /dev/null +++ b/src/openhuman/inference/local/service/ollama_admin/model_pull.rs @@ -0,0 +1,321 @@ +use futures_util::StreamExt; + +use crate::openhuman::config::Config; +use crate::openhuman::inference::local::ollama::{ + ollama_base_url_from_config, OllamaPullEvent, OllamaPullProgress, OllamaPullRequest, +}; +use crate::openhuman::inference::model_ids; +use crate::openhuman::inference::presets::{self, VisionMode}; + +use super::super::LocalAiService; +use super::util::interrupted_pull_settle_window_secs; + +impl LocalAiService { + pub(in crate::openhuman::inference::local::service) async fn ensure_models_available( + &self, + config: &Config, + ) -> Result<(), String> { + let chat_model = model_ids::effective_chat_model_id(config); + self.ensure_ollama_model_available(config, &chat_model, "chat") + .await?; + + match presets::vision_mode_for_config(&config.local_ai) { + VisionMode::Disabled => { + self.status.lock().vision_state = "disabled".to_string(); + } + VisionMode::Ondemand => { + self.status.lock().vision_state = "idle".to_string(); + } + VisionMode::Bundled => { + let vision_model = model_ids::effective_vision_model_id(config); + self.ensure_ollama_model_available(config, &vision_model, "vision") + .await?; + self.status.lock().vision_state = "ready".to_string(); + } + } + + let embedding_model = model_ids::effective_embedding_model_id(config); + if config.local_ai.preload_embedding_model { + self.ensure_ollama_model_available(config, &embedding_model, "embedding") + .await?; + self.status.lock().embedding_state = "ready".to_string(); + } + + if config.local_ai.preload_stt_model { + self.ensure_stt_asset_available(config).await?; + } + + if config.local_ai.preload_tts_voice { + self.ensure_tts_asset_available(config).await?; + } + + Ok(()) + } + + pub(in crate::openhuman::inference::local::service) async fn ensure_ollama_model_available( + &self, + config: &Config, + model_id: &str, + label: &str, + ) -> Result<(), String> { + let base_url = ollama_base_url_from_config(config); + if self.has_model_at(&base_url, model_id).await? { + return Ok(()); + } + + { + let mut status = self.status.lock(); + status.state = "downloading".to_string(); + status.warning = Some(format!( + "Pulling {} model `{}` from Ollama library", + label, model_id + )); + match label { + "vision" => status.vision_state = "downloading".to_string(), + "embedding" => status.embedding_state = "downloading".to_string(), + _ => {} + } + status.download_progress = Some(0.0); + status.downloaded_bytes = Some(0); + status.total_bytes = None; + status.download_speed_bps = Some(0); + status.eta_seconds = None; + } + + const MAX_PULL_RETRIES: usize = 3; + const PULL_RETRY_BACKOFF_MS: u64 = 1_500; + const PULL_INTERRUPT_SETTLE_SECS: u64 = 20; + let mut last_error: Option = None; + + for attempt in 1..=MAX_PULL_RETRIES { + if attempt > 1 { + let retry_msg = format!( + "Ollama pull stream interrupted. Retrying {}/{}...", + attempt, MAX_PULL_RETRIES + ); + { + let mut status = self.status.lock(); + status.state = "downloading".to_string(); + status.warning = Some(retry_msg.clone()); + } + log::warn!( + "[local_ai] pull retry {}/{} for model `{}` after interruption", + attempt, + MAX_PULL_RETRIES, + model_id + ); + tokio::time::sleep(std::time::Duration::from_millis( + PULL_RETRY_BACKOFF_MS * attempt as u64, + )) + .await; + } + + let response = match self + .http + .post(format!("{base_url}/api/pull")) + .json(&OllamaPullRequest { + name: model_id.to_string(), + stream: true, + }) + // Model pulls are long-running streaming responses; the default 30s + // client timeout can interrupt healthy downloads mid-stream. + .timeout(std::time::Duration::from_secs(30 * 60)) + .send() + .await + { + Ok(response) => response, + Err(e) => { + let err = format!("ollama pull request failed: {e}"); + last_error = Some(err.clone()); + if attempt < MAX_PULL_RETRIES { + continue; + } + return Err(format!("{err} after {MAX_PULL_RETRIES} attempts")); + } + }; + if !response.status().is_success() { + let status = response.status(); + let body = response.text().await.unwrap_or_default(); + let detail = body.trim(); + return Err(format!( + "ollama pull failed with status {}{}", + status, + if detail.is_empty() { + String::new() + } else { + format!(": {detail}") + } + )); + } + + let mut stream = response.bytes_stream(); + let mut pending = String::new(); + let mut stream_error: Option = None; + let started_at = std::time::Instant::now(); + let mut progress = OllamaPullProgress::default(); + let mut observed_bytes = false; + while let Some(item) = stream.next().await { + let chunk = match item { + Ok(value) => value, + Err(e) => { + stream_error = Some(format!("ollama pull stream error: {e}")); + break; + } + }; + pending.push_str(&String::from_utf8_lossy(&chunk)); + while let Some(pos) = pending.find('\n') { + let line = pending[..pos].trim().to_string(); + pending = pending[pos + 1..].to_string(); + if line.is_empty() { + continue; + } + let event: OllamaPullEvent = match serde_json::from_str(&line) { + Ok(v) => v, + Err(_) => continue, + }; + if let Some(err) = event.error { + return Err(format!("ollama pull error: {err}")); + } + + progress.observe(&event); + let completed = progress.aggregate_downloaded(); + let total = progress.aggregate_total(); + let elapsed = started_at.elapsed().as_secs_f64().max(0.001); + let speed_bps = (completed as f64 / elapsed).round().max(0.0) as u64; + let eta_seconds = total.and_then(|t| { + if completed >= t || speed_bps == 0 { + None + } else { + Some((t.saturating_sub(completed)) / speed_bps.max(1)) + } + }); + observed_bytes |= completed > 0; + + let mut status = self.status.lock(); + if let Some(status_text) = event.status.as_deref() { + status.warning = Some(format!("Ollama pull: {status_text}")); + if status_text.eq_ignore_ascii_case("success") { + status.download_progress = Some(1.0); + } + } + status.downloaded_bytes = Some(completed); + status.total_bytes = total; + status.download_speed_bps = Some(speed_bps); + status.eta_seconds = eta_seconds; + status.download_progress = total + .map(|t| (completed as f32 / t as f32).clamp(0.0, 1.0)) + .or(Some(0.0)); + } + } + + if let Some(err) = stream_error { + last_error = Some(err.clone()); + let resumed = self + .wait_for_model_after_pull_interruption( + &base_url, + model_id, + attempt, + MAX_PULL_RETRIES, + observed_bytes, + PULL_INTERRUPT_SETTLE_SECS, + ) + .await?; + if resumed { + break; + } + if attempt < MAX_PULL_RETRIES { + continue; + } + return Err(format!("{err} after {MAX_PULL_RETRIES} attempts")); + } + + if self.has_model_at(&base_url, model_id).await? { + break; + } + + last_error = Some(format!( + "ollama pull finished but model `{}` was not found", + model_id + )); + let resumed = self + .wait_for_model_after_pull_interruption( + &base_url, + model_id, + attempt, + MAX_PULL_RETRIES, + observed_bytes, + PULL_INTERRUPT_SETTLE_SECS, + ) + .await?; + if resumed { + break; + } + if attempt < MAX_PULL_RETRIES { + continue; + } + } + + if !self.has_model_at(&base_url, model_id).await? { + return Err(last_error.unwrap_or_else(|| { + format!( + "ollama pull finished but model `{}` was not found", + model_id + ) + })); + } + + match label { + "vision" => self.status.lock().vision_state = "ready".to_string(), + "embedding" => self.status.lock().embedding_state = "ready".to_string(), + _ => {} + } + + Ok(()) + } + + async fn wait_for_model_after_pull_interruption( + &self, + base_url: &str, + model_id: &str, + attempt: usize, + max_attempts: usize, + observed_bytes: bool, + settle_window_secs: u64, + ) -> Result { + let wait_secs = interrupted_pull_settle_window_secs(observed_bytes, settle_window_secs); + if wait_secs == 0 { + return Ok(false); + } + + { + let mut status = self.status.lock(); + status.state = "downloading".to_string(); + status.warning = Some(format!( + "Ollama pull stream disconnected. Waiting up to {wait_secs}s for ongoing download to resume before retry {}/{}.", + attempt + 1, + max_attempts + )); + } + log::warn!( + "[local_ai] pull stream interrupted for model `{}`; waiting up to {}s before retry {}/{}", + model_id, + wait_secs, + attempt + 1, + max_attempts + ); + + let deadline = std::time::Instant::now() + std::time::Duration::from_secs(wait_secs); + while std::time::Instant::now() < deadline { + if self.has_model_at(base_url, model_id).await? { + log::info!( + "[local_ai] model `{}` became available after interrupted pull stream", + model_id + ); + return Ok(true); + } + tokio::time::sleep(std::time::Duration::from_secs(1)).await; + } + + Ok(false) + } +} diff --git a/src/openhuman/inference/local/service/ollama_admin/server.rs b/src/openhuman/inference/local/service/ollama_admin/server.rs new file mode 100644 index 0000000000..a02f0060f8 --- /dev/null +++ b/src/openhuman/inference/local/service/ollama_admin/server.rs @@ -0,0 +1,246 @@ +use std::path::Path; + +use crate::openhuman::config::Config; +use crate::openhuman::inference::local::ollama::ollama_base_url_from_config; +use crate::openhuman::inference::local::process_util::apply_no_window; + +use super::super::spawn_marker::{self, OllamaSpawnMarker}; +use super::super::LocalAiService; +use super::util::kill_pid_by_id; + +impl LocalAiService { + pub(in crate::openhuman::inference::local::service) async fn ensure_ollama_server( + &self, + config: &Config, + ) -> Result<(), String> { + let base_url = ollama_base_url_from_config(config); + if self.ollama_healthy_at(&base_url).await { + if self.ollama_runner_ok_at(&base_url).await { + return Ok(()); + } + log::warn!("[local_ai] Ollama server responds but runner is broken"); + return Err( + "Configured Ollama runtime is reachable but cannot execute models. Restart the external runtime and retry." + .to_string(), + ); + } + Err(format!( + "OpenHuman no longer starts or installs Ollama automatically. Start your inference runtime yourself and make sure it is reachable at {base_url}." + )) + } + + /// Alias of `ensure_ollama_server` in external-runtime mode. + /// OpenHuman no longer installs or starts Ollama automatically; the + /// "fresh" retry path is a no-op that defers to the standard check. + pub(in crate::openhuman::inference::local::service) async fn ensure_ollama_server_fresh( + &self, + config: &Config, + ) -> Result<(), String> { + self.ensure_ollama_server(config).await + } + + /// Check if a healthy daemon on `:11434` is actually openhuman's own + /// orphan from a prior session (i.e. we crashed before the graceful + /// shutdown hook fired). If so, kill it so the upcoming spawn can + /// resume owned-child tracking. External daemons are never touched. + pub(in crate::openhuman::inference::local::service) async fn reclaim_orphan_if_ours( + &self, + config: &Config, + ) { + let Some(marker) = spawn_marker::read_marker(config) else { + return; + }; + if !spawn_marker::pid_is_alive(marker.pid) { + log::debug!( + "[local_ai] stale ollama spawn marker (pid={} no longer alive); clearing", + marker.pid + ); + spawn_marker::clear_marker(config); + return; + } + let base_url = ollama_base_url_from_config(config); + if !self.ollama_healthy_at(&base_url).await { + // PID is alive but :11434 isn't healthy — either Ollama is + // mid-boot or the recorded PID was reused for an unrelated + // process. Leave the marker; either the daemon will come up + // and the next call will reclaim it, or `start_and_wait_for_server` + // will overwrite it on a fresh spawn. + log::debug!( + "[local_ai] ollama spawn marker pid={} alive but :11434 not healthy yet; \ + deferring reclaim", + marker.pid + ); + return; + } + log::info!( + "[local_ai] reclaiming openhuman-owned ollama orphan from prior session \ + (pid={}, binary={})", + marker.pid, + marker.binary_path + ); + kill_pid_by_id(marker.pid); + spawn_marker::clear_marker(config); + // Brief settle so the listener releases :11434 before we respawn. + tokio::time::sleep(std::time::Duration::from_millis(500)).await; + } + + pub(in crate::openhuman::inference::local::service) async fn start_and_wait_for_server( + &self, + config: &Config, + ollama_cmd: &Path, + ) -> Result<(), String> { + let base_url = ollama_base_url_from_config(config); + if self.ollama_healthy_at(&base_url).await { + // A daemon is already up — adopt it. We did NOT spawn it (or any + // prior spawn was already reclaimed in `reclaim_orphan_if_ours`), + // so `owned_ollama` stays `None` and the daemon survives openhuman + // exit. This is the contract: external/adopted daemons are never + // killed; only our own children die with us. + return Ok(()); + } + + // Defensive: if a previous spawn attempt left a stale `Child` in + // `owned_ollama` (e.g. ensure_ollama_server_fresh after a failed + // first pass), clear it before respawning. Without this, the new + // child would replace the field and the old one would be leaked. + self.kill_ollama_server().await; + spawn_marker::clear_marker(config); + + let mut version_cmd = tokio::process::Command::new(ollama_cmd); + version_cmd + .arg("--version") + .stdout(std::process::Stdio::null()) + .stderr(std::process::Stdio::null()); + apply_no_window(&mut version_cmd); + if let Err(err) = version_cmd.status().await { + return Err(format!( + "Ollama binary not available ({}; error: {err}).", + ollama_cmd.display() + )); + } + + let mut serve_cmd = tokio::process::Command::new(ollama_cmd); + serve_cmd + .arg("serve") + .stdout(std::process::Stdio::null()) + // Pipe stderr so we can detect specific failure modes — most + // importantly Windows Controlled Folder Access blocks, which + // surface as "Access is denied" / "operation was blocked" / + // 0x80070005 in Ollama's own stderr when CFA refuses writes + // to the model cache or even prevents the binary from running. + .stderr(std::process::Stdio::piped()); + apply_no_window(&mut serve_cmd); + let mut serve_child = match serve_cmd.spawn() { + Ok(child) => { + log::debug!( + "[local_ai] spawned `ollama serve` from {}", + ollama_cmd.display() + ); + child + } + Err(err) => { + log::warn!( + "[local_ai] failed to spawn `ollama serve` from {}: {err}", + ollama_cmd.display() + ); + return Err(format!( + "Failed to start Ollama server ({}): {err}", + ollama_cmd.display() + )); + } + }; + + // Drain stderr into a bounded buffer in the background. We keep + // the last ~16KB so we can quote it back to the user / Sentry on + // failure but don't grow unbounded if Ollama logs heavily. + let stderr_buffer = std::sync::Arc::new(parking_lot::Mutex::new(String::new())); + if let Some(stderr) = serve_child.stderr.take() { + let buf = std::sync::Arc::clone(&stderr_buffer); + tokio::spawn(async move { + use tokio::io::{AsyncBufReadExt, BufReader}; + let mut reader = BufReader::new(stderr); + let mut line = String::new(); + while reader + .read_line(&mut line) + .await + .map(|n| n > 0) + .unwrap_or(false) + { + let mut b = buf.lock(); + let new_len = b.len() + line.len(); + if new_len > 16 * 1024 { + let drop_n = new_len - 16 * 1024; + let drop_n = std::cmp::min(drop_n, b.len()); + b.drain(0..drop_n); + } + b.push_str(&line); + line.clear(); + } + }); + } + + for _ in 0..20 { + if self.ollama_healthy_at(&base_url).await { + // Daemon is up. Take ownership so we can kill it on exit and + // write the spawn marker so a crashed openhuman can reclaim + // this PID on next launch instead of orphaning it forever. + let pid = serve_child.id().unwrap_or(0); + if pid == 0 { + log::warn!( + "[local_ai] spawned ollama child has no PID — owned-child kill \ + will be a no-op but daemon is healthy, continuing" + ); + } else { + let marker = OllamaSpawnMarker::new(pid, ollama_cmd); + if let Err(e) = spawn_marker::write_marker(config, &marker) { + // Marker write failure is non-fatal — graceful shutdown + // still kills via the in-memory `Child` handle. Only + // crash-recovery on next launch is degraded. + log::warn!( + "[local_ai] failed to write ollama spawn marker (pid={pid}): {e}" + ); + } + } + *self.owned_ollama.lock() = Some(serve_child); + return Ok(()); + } + tokio::time::sleep(std::time::Duration::from_millis(300)).await; + } + + // Health probe timed out. The serve child is unhealthy and may be + // holding the Ollama port — kill it before returning so the next + // bootstrap attempt isn't blocked by a zombie listener. + if let Err(err) = serve_child.kill().await { + log::warn!("[local_ai] failed to kill unhealthy `ollama serve` child: {err}"); + } + + // Classify the failure from captured stderr. + let stderr_snapshot = stderr_buffer.lock().clone(); + let lowered = stderr_snapshot.to_ascii_lowercase(); + // Match only explicit Controlled Folder Access markers. Generic + // strings like "access is denied" or "is not recognized as a trusted" + // appear in many unrelated Windows errors and previously caused us + // to surface a misleading CFA remediation message. + let cfa_signatures = ["controlled folder access", "operation was blocked"]; + let cfa_hit = cfa_signatures.iter().any(|sig| lowered.contains(sig)); + if cfa_hit { + log::warn!( + "[local_ai] Ollama failed to start — Controlled Folder Access blocked it. \ + stderr tail: {stderr_snapshot}" + ); + self.status.lock().error_detail = Some(stderr_snapshot); + return Err(format!( + "Ollama was blocked by Windows Controlled Folder Access. \ + Open Windows Security → Ransomware protection → Allow an app \ + through Controlled folder access, and add `{}`.", + ollama_cmd.display() + )); + } + // Non-CFA timeout — surface the stderr tail anyway for diagnosis. + if !stderr_snapshot.is_empty() { + log::warn!("[local_ai] Ollama not reachable. stderr tail: {stderr_snapshot}"); + self.status.lock().error_detail = Some(stderr_snapshot); + } + Err("Ollama runtime is not reachable after fresh install. Start `ollama serve` manually and retry.".to_string()) + } +} diff --git a/src/openhuman/inference/local/service/ollama_admin/util.rs b/src/openhuman/inference/local/service/ollama_admin/util.rs new file mode 100644 index 0000000000..175fde3e4d --- /dev/null +++ b/src/openhuman/inference/local/service/ollama_admin/util.rs @@ -0,0 +1,101 @@ +use crate::openhuman::inference::local::ollama::{validate_ollama_url, OllamaTagsResponse}; + +pub(super) fn lm_studio_models_error_means_unreachable(error: &str) -> bool { + error.starts_with("lm studio models request failed:") +} + +pub(crate) fn interrupted_pull_settle_window_secs( + observed_bytes: bool, + settle_window_secs: u64, +) -> u64 { + if observed_bytes { + settle_window_secs.max(1) + } else { + 0 + } +} + +/// Kill a process by PID using `sysinfo`'s cross-platform `Process::kill`. +/// +/// Used by `reclaim_orphan_if_ours` where we no longer have the original +/// `tokio::process::Child` handle (the spawning openhuman crashed) but +/// recorded the PID in the spawn marker. +pub(crate) fn kill_pid_by_id(pid: u32) { + use sysinfo::{Pid, ProcessesToUpdate, System}; + let target = Pid::from_u32(pid); + let mut sys = System::new(); + sys.refresh_processes(ProcessesToUpdate::Some(&[target]), true); + match sys.process(target) { + Some(proc) => { + if proc.kill() { + log::info!("[local_ai] killed reclaimed ollama orphan pid={pid}"); + } else { + // sysinfo's kill returns false if the platform refused + // (permissions, race with exit). The next ollama_healthy() + // check will reveal whether the daemon is actually gone. + log::warn!("[local_ai] sysinfo Process::kill returned false for pid={pid}"); + } + } + None => { + log::debug!("[local_ai] kill_pid_by_id: pid={pid} no longer present"); + } + } +} + +/// Test connectivity to a user-supplied Ollama URL. +/// +/// Validates the URL via [`validate_ollama_url`], then issues a GET to +/// `{normalized_url}/api/tags` with a 3-second timeout. +/// Returns a JSON object with `reachable`, optional `error`, and +/// `models_count` when reachable. +pub(crate) async fn test_ollama_connection(url: &str) -> Result { + let normalized = validate_ollama_url(url)?; + log::debug!("[local_ai] test_ollama_connection: testing url={normalized}"); + + let client = reqwest::Client::builder() + .timeout(std::time::Duration::from_secs(3)) + .build() + .map_err(|e| format!("failed to build HTTP client: {e}"))?; + + match client.get(format!("{normalized}/api/tags")).send().await { + Ok(resp) if resp.status().is_success() => { + let models_count = resp + .json::() + .await + .map(|t| t.models.len()) + .unwrap_or(0); + log::debug!( + "[local_ai] test_ollama_connection: reachable url={normalized} models={models_count}" + ); + Ok(serde_json::json!({ + "reachable": true, + "error": null, + "models_count": models_count, + })) + } + Ok(resp) => { + let status = resp.status(); + let body = resp.text().await.unwrap_or_default(); + let err = format!("server responded with status {status}: {}", body.trim()); + log::debug!( + "[local_ai] test_ollama_connection: unreachable url={normalized} err={err}" + ); + Ok(serde_json::json!({ + "reachable": false, + "error": err, + "models_count": null, + })) + } + Err(e) => { + let err = e.to_string(); + log::debug!( + "[local_ai] test_ollama_connection: connection failed url={normalized} err={err}" + ); + Ok(serde_json::json!({ + "reachable": false, + "error": err, + "models_count": null, + })) + } + } +} diff --git a/src/openhuman/inference/provider/compatible.rs b/src/openhuman/inference/provider/compatible.rs index ac647787ab..7f5aebc250 100644 --- a/src/openhuman/inference/provider/compatible.rs +++ b/src/openhuman/inference/provider/compatible.rs @@ -4,122 +4,43 @@ #[path = "compatible_dump.rs"] mod compatible_dump; +#[path = "compatible_helpers.rs"] +mod compatible_helpers; #[path = "compatible_parse.rs"] mod compatible_parse; +#[path = "compatible_provider_impl.rs"] +mod compatible_provider_impl; +#[path = "compatible_repeat.rs"] +mod compatible_repeat; #[path = "compatible_request.rs"] mod compatible_request; #[path = "compatible_stream.rs"] mod compatible_stream; +#[path = "compatible_stream_native.rs"] +mod compatible_stream_native; #[path = "compatible_types.rs"] mod compatible_types; +#[cfg(test)] +pub(crate) use super::traits::{ChatMessage, ConversationMessage, Provider}; +#[cfg(test)] +pub(crate) use compatible_parse::normalize_function_arguments; #[cfg(test)] pub(crate) use compatible_parse::{ - parse_provider_tool_call_from_value, parse_sse_line, strip_think_tags, + build_responses_prompt, extract_responses_text, parse_chat_response_body, + parse_provider_tool_call_from_value, parse_responses_response_body, parse_sse_line, + strip_think_tags, }; #[cfg(test)] -pub(crate) use compatible_types::ResponsesResponse; - -use crate::openhuman::inference::provider::traits::{ - ChatMessage, ChatRequest as ProviderChatRequest, ChatResponse as ProviderChatResponse, - Provider, StreamChunk, StreamError, StreamOptions, StreamResult, ToolCall as ProviderToolCall, - UsageInfo as ProviderUsageInfo, -}; -use async_trait::async_trait; -use futures_util::{stream, StreamExt}; - -use compatible_dump::{dump_prompt_if_enabled, dump_response_if_enabled, reserve_dump_seq}; -use compatible_parse::{ - aggregate_responses_sse_body, build_responses_prompt, extract_responses_text, - normalize_function_arguments, parse_chat_response_body, parse_responses_response_body, - parse_tool_calls_from_content_json, -}; -use compatible_stream::sse_bytes_to_chunks; -use compatible_types::{ - ApiChatRequest, ApiChatResponse, ApiUsage, Choice, Function, Message, MessageContent, - NativeChatRequest, NativeMessage, OpenAiStreamOptions, OpenHumanMeta, ResponseMessage, - ResponsesRequest, StreamChunkResponse, StreamingToolCall, ToolCall, +pub(crate) use compatible_repeat::{StreamRepeatDetector, STREAM_REPEAT_THRESHOLD}; +#[cfg(test)] +pub(crate) use compatible_types::StreamChunkResponse; +#[cfg(test)] +pub(crate) use compatible_types::{ + ApiChatRequest, ApiChatResponse, Choice, Function, Message, MessageContent, NativeChatRequest, + NativeMessage, ResponseMessage, ResponsesResponse, ToolCall, }; -/// `frequency_penalty` applied to streaming chat-completions requests. -/// -/// Autoregressive models have a self-reinforcing bias toward repeating spans -/// already in their context; with no penalty a momentary repeat can spiral into -/// the same line emitted until the output-token cap (degenerate decoding). A -/// small positive penalty damps that loop without harming coherence. Carried on -/// the streaming path (where those loops occur — long autonomous turns) and -/// retried without it if a strict provider rejects it; the buffered -/// non-streaming fallback omits it for maximum compatibility. Skipped in -/// serialisation when `None` so providers that don't accept the field are -/// unaffected. -const CHAT_FREQUENCY_PENALTY: f64 = 0.3; - -/// Consecutive identical substantial lines that trip the in-generation repeat -/// cutoff. Autoregressive models can latch onto a line and emit it verbatim -/// until the token cap (observed: 234× the same sentence in one response). -/// `frequency_penalty` / stronger model tiers only lower the odds — they don't -/// prevent it — so this is the deterministic, model-agnostic stop. Set well -/// above any legitimate repetition. -const STREAM_REPEAT_THRESHOLD: u32 = 6; -/// Minimum trimmed length for a line to count toward [`STREAM_REPEAT_THRESHOLD`]. -/// Keeps short, legitimately-repeated lines (`}`, blank-ish code) from tripping -/// it; degenerate spirals are long sentences well over this. -const MIN_REPEAT_LINE_CHARS: usize = 16; - -/// Detects in-generation repetition degeneration on the streaming path so the -/// reader can abort the stream and truncate the blob. Trips after -/// [`STREAM_REPEAT_THRESHOLD`] consecutive identical substantial lines; blank -/// separator lines are ignored, so `"sentence\n\nsentence\n\n…"` still trips. -#[derive(Default)] -struct StreamRepeatDetector { - current_line: String, - last_line: Option, - consecutive: u32, -} - -impl StreamRepeatDetector { - fn new() -> Self { - Self::default() - } - - /// Feed one streamed text delta. Returns `true` once the same substantial - /// line has repeated [`STREAM_REPEAT_THRESHOLD`] times back-to-back. - fn observe(&mut self, delta: &str) -> bool { - for ch in delta.chars() { - if ch == '\n' { - if self.finalize_line() { - return true; - } - } else { - self.current_line.push(ch); - } - } - false - } - - fn finalize_line(&mut self) -> bool { - let line = self.current_line.trim().to_string(); - self.current_line.clear(); - if line.is_empty() { - // Blank separator between repeats — ignore, don't reset the run. - return false; - } - if line.chars().count() < MIN_REPEAT_LINE_CHARS { - // Short line — not a degenerate-sentence repeat; reset the run. - self.last_line = Some(line); - self.consecutive = 1; - return false; - } - if self.last_line.as_deref() == Some(line.as_str()) { - self.consecutive += 1; - } else { - self.last_line = Some(line); - self.consecutive = 1; - } - self.consecutive >= STREAM_REPEAT_THRESHOLD - } -} - /// A provider that speaks the OpenAI-compatible chat completions API. /// Used by: Venice, Vercel AI Gateway, Cloudflare AI Gateway, Moonshot, /// Synthetic, `OpenCode` Zen, `Z.AI`, `GLM`, `MiniMax`, Bedrock, Qianfan, Groq, Mistral, `xAI`, etc. @@ -161,12 +82,10 @@ pub struct OpenAiCompatibleProvider { /// `temperature_unsupported_models` glob filter still applies after. pub(crate) temperature_override: Option, /// Value reported by `capabilities().native_tool_calling`. Defaults to - /// `true` because most OpenAI-compatible providers (OpenAI, Anthropic - /// adapters, GLM, Groq, Mistral, OpenHuman backend, …) implement the + /// `true` because most OpenAI-compatible providers implement the /// `tools` parameter correctly. The factory flips this to `false` for - /// Ollama (sub-issue 3 of #3098), whose OpenAI-compat endpoint returns - /// HTTP 400 on `tools` for many models — making prompt-guided text - /// tool specs the only path that works across the Ollama model zoo. + /// Ollama, whose OpenAI-compat endpoint returns HTTP 400 on `tools` + /// for many models. native_tool_calling: bool, /// Ollama-specific `options.num_ctx` override. When set, every request /// to this provider includes `"options": {"num_ctx": }` in the @@ -214,110 +133,6 @@ impl OpenAiCompatibleProvider { Self::new_with_options(name, base_url, credential, auth_style, false, None, false) } - fn enrich_404_message(&self, base: String, status: reqwest::StatusCode) -> String { - if status == reqwest::StatusCode::NOT_FOUND && !self.supports_responses_fallback { - format!( - "{base}; check that your endpoint URL is correct \ - and the model name exists on your provider" - ) - } else { - base - } - } - - /// Build an actionable error for a completion-only model that was routed - /// to `/v1/chat/completions`. OpenHuman only speaks the chat-completions - /// API (with an optional `/v1/responses` fallback) — a completion-only / - /// base model 404s here and the responses fallback cannot rescue it, so we - /// surface the model name and concrete remediation instead of an opaque - /// "responses fallback failed" chain. See issue #3193. - fn completion_only_model_message(&self, model: &str, sanitized: &str) -> String { - format!( - "{name} API error (404): model '{model}' does not support the \ - chat-completions API that OpenHuman uses — it appears to be a \ - completion-only / base model. Assign a chat-capable model to this \ - provider (e.g. in Settings → AI), or pick a different model. \ - Provider detail: {sanitized}", - name = self.name, - ) - } - - /// Guard shared by every chat-completions 404 handler: if the body shows a - /// completion-only model, return the actionable error so the caller can - /// fail fast instead of attempting the futile `/v1/responses` fallback. - /// `None` means "not this case — proceed with normal fallback/enrich". - /// See issue #3193. - fn completion_only_404_guard( - &self, - status: reqwest::StatusCode, - sanitized: &str, - model: &str, - ) -> Option { - if Self::is_completion_only_model_404(status, sanitized) { - Some(anyhow::anyhow!( - self.completion_only_model_message(model, sanitized) - )) - } else { - None - } - } - - /// Build an actionable error for a model that lacks the chat capability — - /// e.g. an *embedding* model (Ollama `bge-m3`) selected as the chat model. - /// Ollama returns `400 "" does not support chat`; we replace the - /// opaque upstream JSON with concrete remediation. See Sentry - /// TAURI-RUST-4P6. - /// - /// The phrase `does not support chat` is preserved verbatim so the - /// re-reported error still matches - /// [`super::config_rejection::is_provider_config_rejection_message`] and - /// stays demoted from Sentry. - fn not_chat_capable_model_message(&self, model: &str, sanitized: &str) -> String { - format!( - "{name} API error: model '{model}' does not support chat — it \ - appears to be an embedding or non-chat model. Assign a \ - chat-capable model to this provider (e.g. in Settings → AI), or \ - pick a different model. Provider detail: {sanitized}", - name = self.name, - ) - } - - /// Detect a model rejected because it has no chat capability. Unlike the - /// completion-only base model (which 404s), an embedding model picked as - /// the chat model is rejected by Ollama with a **400/422** carrying - /// `"" does not support chat`, so it bypasses - /// [`is_completion_only_model_404`]. Match is tight (the exact phrase) so - /// ordinary 400s keep their normal handling. See Sentry TAURI-RUST-4P6. - fn is_not_chat_capable_model(status: reqwest::StatusCode, error: &str) -> bool { - if !matches!( - status, - reqwest::StatusCode::BAD_REQUEST | reqwest::StatusCode::UNPROCESSABLE_ENTITY - ) { - return false; - } - error.to_lowercase().contains("does not support chat") - } - - /// Guard shared by every chat-completions error handler: if the body shows - /// a non-chat-capable model (embedding model picked as chat), return the - /// actionable error so the caller fails fast with concrete remediation - /// instead of surfacing the opaque upstream JSON. `None` means "not this - /// case — proceed with normal fallback/enrich". See Sentry TAURI-RUST-4P6. - fn not_chat_capable_guard( - &self, - status: reqwest::StatusCode, - sanitized: &str, - model: &str, - ) -> Option { - if Self::is_not_chat_capable_model(status, sanitized) { - Some(anyhow::anyhow!( - self.not_chat_capable_model_message(model, sanitized) - )) - } else { - None - } - } - /// Create a provider with a custom User-Agent header. /// /// Some providers (for example Kimi Code) require a specific User-Agent @@ -393,30 +208,26 @@ impl OpenAiCompatibleProvider { /// Toggle whether this provider advertises native (OpenAI-style) tool /// calling to the agent harness. The default is `true`; set to `false` /// for providers whose `/v1/chat/completions` endpoint rejects the - /// `tools` parameter — the harness will then embed tool specs in the - /// system prompt and parse calls out of the response text instead. + /// `tools` parameter. pub fn with_native_tool_calling(mut self, enabled: bool) -> Self { self.native_tool_calling = enabled; self } /// Set the list of model glob patterns for which temperature must be - /// omitted from request bodies. Called by the provider factory to - /// propagate `config.temperature_unsupported_models`. + /// omitted from request bodies. pub fn with_temperature_unsupported_models(mut self, patterns: Vec) -> Self { self.temperature_unsupported_models = patterns; self } /// Pin a per-workload temperature, overriding whatever the caller passes. - /// Set by the factory when the provider string carries an `@` suffix. pub fn with_temperature_override(mut self, temperature: Option) -> Self { self.temperature_override = temperature; self } - /// Set the Ollama `options.num_ctx` override. When set, the provider - /// includes `"options": {"num_ctx": }` in every request body. + /// Set the Ollama `options.num_ctx` override. pub fn with_ollama_num_ctx(mut self, num_ctx: Option) -> Self { self.ollama_num_ctx = num_ctx; self @@ -468,2277 +279,6 @@ impl OpenAiCompatibleProvider { } self } - - async fn chat_via_responses( - &self, - credential: Option<&str>, - messages: &[ChatMessage], - model: &str, - ) -> anyhow::Result { - let (instructions, input) = build_responses_prompt(messages); - if input.is_empty() { - anyhow::bail!( - "{} Responses API fallback requires at least one non-system message", - self.name - ); - } - - // #3201: the Codex/ChatGPT OAuth Responses endpoint - // (`https://chatgpt.com/backend-api/codex/responses`) rejects - // `stream: false` outright with `{"detail":"Stream must be set to - // true"}`. PR #3192 fixed the sibling `store: false` requirement; - // this branch lifts the same constraint for the stream flag and - // parses the resulting SSE body inline so the existing non-streaming - // call signature is preserved. Other Responses-API providers (real - // OpenAI, custom OpenAI-compatible) keep the single-envelope path — - // they accept `stream: false` and the SSE branch would be wasted - // work for them. - // - // Detection is keyed on the `/backend-api/codex` path segment, not - // the `chatgpt.com` host: the same path segment is what - // `OpenAiCodexRouting` substitutes when a user is signed in via - // OAuth (see `OPENAI_CODEX_BACKEND_BASE_URL`), and it's specific - // enough that no other OpenAI-compatible provider URL uses it. - // - // Parse the URL and inspect path segments rather than scanning the - // whole `base_url` so a proxy URL whose query string or fragment - // contains the literal `/backend-api/codex` (e.g. - // `.../v1?upstream=/backend-api/codex`) doesn't get falsely - // promoted into the SSE branch. - let is_codex_oauth_responses = reqwest::Url::parse(&self.base_url) - .ok() - .and_then(|url| { - let segments: Vec<&str> = url.path_segments()?.collect(); - Some( - segments - .windows(2) - .any(|window| window == ["backend-api", "codex"]), - ) - }) - .unwrap_or(false); - - let request = ResponsesRequest { - model: model.to_string(), - input, - instructions, - stream: Some(is_codex_oauth_responses), - store: Some(false), - }; - - let url = self.responses_url(); - - let response = self - .apply_auth_header(self.http_client().post(&url).json(&request), credential) - .send() - .await?; - - if !response.status().is_success() { - let status = response.status(); - let status_str = status.as_u16().to_string(); - let error = response.text().await?; - let sanitized = super::sanitize_api_error(&error); - let message = format!("{} Responses API error: {sanitized}", self.name); - if super::is_budget_exhausted_http_400(status, &error) { - super::log_budget_exhausted_http_400( - "responses_api", - self.name.as_str(), - Some(model), - status, - ); - } else if super::is_custom_openai_upstream_bad_request_http_400( - self.name.as_str(), - status, - &error, - ) { - super::log_custom_openai_upstream_bad_request_http_400( - "responses_api", - self.name.as_str(), - Some(model), - status, - ); - } else if super::is_provider_access_policy_denied_http_403(status, &error) { - super::log_provider_access_policy_denied_http_403( - "responses_api", - self.name.as_str(), - Some(model), - status, - ); - } else if super::is_provider_config_rejection_http(status, self.name.as_str(), &error) { - super::log_provider_config_rejection( - "responses_api", - self.name.as_str(), - Some(model), - status, - ); - } else if super::should_report_provider_http_failure(status) { - crate::core::observability::report_error( - message.as_str(), - "llm_provider", - "responses_api", - &[ - ("provider", self.name.as_str()), - ("model", model), - ("status", status_str.as_str()), - ("failure", "non_2xx"), - ], - ); - } - anyhow::bail!(message); - } - - let body = response.text().await?; - if is_codex_oauth_responses { - // SSE branch — `stream: true` always produces a Server-Sent - // Event body, even on the non-streaming wrapper. Aggregate it - // back into the same `String` shape the caller expects. - return aggregate_responses_sse_body(&self.name, &body); - } - let responses = parse_responses_response_body(&self.name, &body)?; - - extract_responses_text(responses) - .ok_or_else(|| anyhow::anyhow!("No response from {} Responses API", self.name)) - } - - fn convert_tool_specs( - tools: Option<&[crate::openhuman::tools::ToolSpec]>, - ) -> Option> { - tools.map(|items| { - let mut seen: std::collections::HashSet<&str> = - std::collections::HashSet::with_capacity(items.len()); - let mut dropped: Vec<&str> = Vec::new(); - let mut out: Vec = Vec::with_capacity(items.len()); - for tool in items { - if !seen.insert(tool.name.as_str()) { - dropped.push(tool.name.as_str()); - continue; - } - out.push(serde_json::json!({ - "type": "function", - "function": { - "name": tool.name, - "description": tool.description, - "parameters": tool.parameters, - } - })); - } - if !dropped.is_empty() { - log::warn!( - "[providers][compatible] dropped {} duplicate tool spec(s) at wire \ - boundary (TAURI-RUST-2E): {:?}", - dropped.len(), - dropped - ); - } - out - }) - } - - fn convert_messages_for_native(messages: &[ChatMessage]) -> Vec { - let converted: Vec = - messages - .iter() - .map(|message| { - // Extract reasoning_content stored in extra_metadata by the - // agent harness after each assistant turn. Thinking models - // (DeepSeek-R1, Qwen3, GLM-4) require this to be echoed back - // verbatim in subsequent requests, or the API returns HTTP 400. - let reasoning_content = if message.role == "assistant" { - message - .extra_metadata - .as_ref() - .and_then(|m| m.get("reasoning_content")) - .and_then(serde_json::Value::as_str) - .map(ToString::to_string) - } else { - None - }; - - if message.role == "assistant" { - if let Ok(value) = - serde_json::from_str::(&message.content) - { - if let Some(tool_calls_value) = value.get("tool_calls") { - if let Ok(parsed_calls) = - serde_json::from_value::>( - tool_calls_value.clone(), - ) - { - let tool_calls = parsed_calls - .into_iter() - .map(|tc| ToolCall { - id: Some(tc.id), - kind: Some("function".to_string()), - function: Some(Function { - name: Some(tc.name), - arguments: Some(serde_json::Value::String( - tc.arguments, - )), - }), - }) - .collect::>(); - - // Default to empty string (not None) for - // tool-call assistant messages so the wire - // emits `"content":""` rather than omitting - // the key — some providers reject a missing - // content alongside reasoning_content. - let content = Some(MessageContent::Text( - value - .get("content") - .and_then(serde_json::Value::as_str) - .unwrap_or("") - .to_string(), - )); - - // Replay the assistant's reasoning so - // DeepSeek thinking mode accepts the - // tool-call turn on the follow-up request - // (Sentry TAURI-RUST-4KB). Prefer the value - // embedded in the JSON content (written by - // `build_native_assistant_history` in the - // tool-loop path); fall back to the value - // stored in `extra_metadata` (written by the - // main session-turn path). - let reasoning_content = value - .get("reasoning_content") - .and_then(serde_json::Value::as_str) - .filter(|s| !s.trim().is_empty()) - .map(ToString::to_string) - .or_else(|| reasoning_content.clone()); - - return NativeMessage { - role: "assistant".to_string(), - content, - tool_call_id: None, - tool_calls: Some(tool_calls), - reasoning_content, - }; - } - } - } - } - - if message.role == "tool" { - if let Ok(value) = - serde_json::from_str::(&message.content) - { - let tool_call_id = value - .get("tool_call_id") - .and_then(serde_json::Value::as_str) - .map(ToString::to_string); - let content = value - .get("content") - .and_then(serde_json::Value::as_str) - .map(ToString::to_string) - .or_else(|| Some(message.content.clone())) - .map(MessageContent::Text); - - return NativeMessage { - role: "tool".to_string(), - content, - tool_call_id, - tool_calls: None, - reasoning_content: None, - }; - } - } - - NativeMessage { - role: message.role.clone(), - // User-authored content may carry `[IMAGE:]` - // markers from chat attachments — promote them to - // structured `image_url` parts here. Markerless text - // (every system/assistant/tool turn) is returned as the - // plain-string arm, unchanged on the wire. - content: Some(MessageContent::from_chat_text(&message.content)), - tool_call_id: None, - tool_calls: None, - reasoning_content, - } - }) - .collect(); - - Self::enforce_tool_message_invariants(converted) - } - - /// Enforce the OpenAI-compatible tool-message ordering invariants on the - /// fully-serialized wire array, immediately before it goes on the wire. - /// - /// Several upstream defects can leave the array malformed and trip a 400 - /// (`messages with role 'tool' must be a response to a preceding message - /// with 'tool_calls'`). That 400 streams back as an empty completion, which - /// the agent loop collapses to "The model returned an empty response" and - /// the chat surface shows as a generic "Something went wrong": - /// - /// * **(A)** History tail-trimming (`session::turn::trim_history` / - /// `bound_cached_transcript_messages`) cuts *between* an - /// `assistant(tool_calls)` and its `tool` result, dropping the assistant - /// and orphaning the result at the head of the window. - /// * **(B)** A persisted assistant tool-call message whose `content` no - /// longer deserializes as `tool_calls` (format drift) falls through the - /// parser above and is emitted as plain text with its `tool_calls` - /// stripped — again orphaning the following `tool` result. - /// * **(C)** An `assistant(tool_calls)` whose results never arrived (an - /// aborted / max-iteration turn, or a partially-answered multi-call - /// cycle) leaves dangling tool-call ids with no matching `tool` response. - /// - /// This pass makes the contract hold *by construction* regardless of which - /// path produced the array. It is **position-aware**: each - /// `assistant(tool_calls)` is paired with the *contiguous run of `tool` - /// messages that immediately follows it* (the only place valid responses can - /// live in the OpenAI wire format), then: - /// - /// * `tool_calls` entries with no matching response *in that run* are pruned - /// (C); if none survive, the field is dropped so the message serializes as - /// plain assistant text rather than an empty tool-call block. - /// * `tool` messages that are **not** part of such a run — a leading orphan - /// from trimming (A), or one stranded after an assistant whose `tool_calls` - /// were stripped (B) — are dropped. - /// - /// Pairing by adjacency (rather than a global "is this id answered anywhere" - /// set) is what keeps **sequential** cycles (`asst(A)→tool(A)`, - /// `asst(B)→tool(B)`, …) and **parallel** calls (one `asst([X,Y,Z])` answered - /// by `tool(X) tool(Y) tool(Z)`) correct, and makes the result well-formed - /// even if responses are reordered or a cycle is bisected mid-sequence — no - /// causal-ordering assumption required. - fn enforce_tool_message_invariants(messages: Vec) -> Vec { - use std::collections::HashSet; - - let mut out: Vec = Vec::with_capacity(messages.len()); - let mut dropped_orphans = 0usize; - let mut pruned_calls = 0usize; - - let mut iter = messages.into_iter().peekable(); - while let Some(mut msg) = iter.next() { - if msg.role == "assistant" && msg.tool_calls.is_some() { - // Gather the contiguous run of `tool` messages that answer this - // block (responses must immediately follow, in any order). - let mut run: Vec = Vec::new(); - while iter.peek().is_some_and(|m| m.role == "tool") { - run.push(iter.next().expect("peeked tool message")); - } - let responded: HashSet = - run.iter().filter_map(|t| t.tool_call_id.clone()).collect(); - - // (C) keep only tool_calls answered within this run. - let calls = msg.tool_calls.take().unwrap_or_default(); - let before = calls.len(); - let kept: Vec = calls - .into_iter() - .filter(|c| c.id.as_deref().is_some_and(|id| responded.contains(id))) - .collect(); - pruned_calls += before - kept.len(); - let kept_ids: HashSet = kept.iter().filter_map(|c| c.id.clone()).collect(); - msg.tool_calls = if kept.is_empty() { None } else { Some(kept) }; - // Strip reasoning_content when the message collapses to plain - // text (no surviving tool_calls). Thinking-mode providers - // (DeepSeek) require reasoning only on tool-call assistant - // messages; a stale reasoning_content on a non-tool-call - // message is at best ignored and at worst a malformed shape. - if msg.tool_calls.is_none() { - msg.reasoning_content = None; - } - out.push(msg); - - // Emit the run's responses that map to a surviving call; drop the - // rest (e.g. a stray tool whose id wasn't in this block). - for tool_msg in run { - let kept = tool_msg - .tool_call_id - .as_deref() - .is_some_and(|id| kept_ids.contains(id)); - if kept { - out.push(tool_msg); - } else { - dropped_orphans += 1; - } - } - } else if msg.role == "tool" { - // (A, B) a `tool` not consumed by a preceding assistant block. - dropped_orphans += 1; - } else { - out.push(msg); - } - } - - if dropped_orphans > 0 || pruned_calls > 0 { - log::warn!( - "[provider] sanitized malformed tool-message ordering before send: \ - dropped {dropped_orphans} orphaned tool result(s), pruned {pruned_calls} \ - unanswered tool_call(s)" - ); - } - - out - } - - fn with_prompt_guided_tool_instructions( - messages: &[ChatMessage], - tools: Option<&[crate::openhuman::tools::ToolSpec]>, - ) -> Vec { - let Some(tools) = tools else { - return messages.to_vec(); - }; - - if tools.is_empty() { - return messages.to_vec(); - } - - let instructions = - crate::openhuman::inference::provider::traits::build_tool_instructions_text(tools); - let mut modified_messages = messages.to_vec(); - - if let Some(system_message) = modified_messages.iter_mut().find(|m| m.role == "system") { - if !system_message.content.is_empty() { - system_message.content.push_str("\n\n"); - } - system_message.content.push_str(&instructions); - } else { - modified_messages.insert(0, ChatMessage::system(instructions)); - } - - modified_messages - } - - fn parse_native_response( - api_response: ApiChatResponse, - provider_name: &str, - ) -> anyhow::Result { - let usage = Self::extract_usage(&api_response); - - let message = api_response - .choices - .into_iter() - .next() - .map(|c| c.message) - .ok_or_else(|| anyhow::anyhow!("No choices in response from {}", provider_name))?; - - let mut text = message.effective_content_optional(); - // Capture reasoning_content before the message fields are moved into - // the tool-call extractors below. This must be passed back verbatim on - // the next turn for thinking models (e.g. DeepSeek-R1, Qwen3) whose APIs - // return HTTP 400 ("reasoning_content in thinking mode must be passed back") - // when the field is omitted from subsequent assistant messages. - let reasoning_content = message - .reasoning_content - .as_deref() - .map(str::trim) - .filter(|s| !s.is_empty()) - .map(str::to_owned); - let mut tool_calls = message - .tool_calls - .unwrap_or_default() - .into_iter() - .filter_map(|tc| { - let function = tc.function?; - let name = function.name?; - let arguments = normalize_function_arguments(function.arguments); - Some(ProviderToolCall { - id: tc.id.unwrap_or_else(|| uuid::Uuid::new_v4().to_string()), - name, - arguments, - }) - }) - .collect::>(); - - if tool_calls.is_empty() { - if let Some(function) = message.function_call.as_ref() { - if let Some(name) = function - .name - .as_ref() - .filter(|name| !name.trim().is_empty()) - { - tool_calls.push(ProviderToolCall { - id: uuid::Uuid::new_v4().to_string(), - name: name.clone(), - arguments: normalize_function_arguments(function.arguments.clone()), - }); - } - } - } - - // Some providers return OpenAI-style tool_calls encoded as a JSON string - // inside message.content. Recover those here so native tool-calling still works. - if let Some(content) = message.content.as_deref() { - if let Some((json_text, json_tool_calls)) = parse_tool_calls_from_content_json(content) - { - if !json_tool_calls.is_empty() { - tool_calls = json_tool_calls; - text = json_text.or(text); - } - } - } - - tracing::debug!( - has_reasoning_content = reasoning_content.is_some(), - reasoning_content_chars = reasoning_content.as_ref().map_or(0, |r| r.chars().count()), - "[provider:parse_native_response] reasoning_content capture" - ); - - Ok(ProviderChatResponse { - text, - tool_calls, - usage, - reasoning_content, - }) - } - - /// Extract usage info from API response, preferring the OpenHuman - /// metadata block (which includes cache stats and billing) over the - /// standard OpenAI usage block. - fn extract_usage(resp: &ApiChatResponse) -> Option { - let oh = resp.openhuman.as_ref(); - let std_usage = resp.usage.as_ref(); - - // Need at least one source of token counts. - if oh.is_none() && std_usage.is_none() { - return None; - } - - let oh_usage = oh.and_then(|o| o.usage.as_ref()); - let oh_billing = oh.and_then(|o| o.billing.as_ref()); - - // Prefer OpenHuman metadata when the fields are actually present; - // fall back to the standard OpenAI usage block when they are None. - let input_tokens = oh_usage - .and_then(|u| u.input_tokens) - .or(std_usage.map(|u| u.prompt_tokens)) - .unwrap_or(0); - let output_tokens = oh_usage - .and_then(|u| u.output_tokens) - .or(std_usage.map(|u| u.completion_tokens)) - .unwrap_or(0); - let cached_input_tokens = oh_usage - .and_then(|u| u.cached_input_tokens) - .or(std_usage - .and_then(|u| u.prompt_tokens_details.as_ref()) - .map(|d| d.cached_tokens)) - .unwrap_or(0); - let charged_amount_usd = oh_billing.map(|b| b.charged_amount_usd).unwrap_or(0.0); - - let from_openhuman = oh_usage.is_some(); - let from_standard = std_usage.is_some() && !from_openhuman; - let has_billing = oh_billing.is_some(); - tracing::debug!( - from_openhuman, - from_standard, - has_billing, - input_tokens, - output_tokens, - cached_input_tokens, - charged_amount_usd, - "[provider:usage] extract_usage resolved token counts" - ); - - Some(ProviderUsageInfo { - input_tokens, - output_tokens, - context_window: 0, - cached_input_tokens, - charged_amount_usd, - }) - } - - fn is_native_tool_schema_unsupported(status: reqwest::StatusCode, error: &str) -> bool { - if !matches!( - status, - reqwest::StatusCode::BAD_REQUEST | reqwest::StatusCode::UNPROCESSABLE_ENTITY - ) { - return false; - } - - let lower = error.to_lowercase(); - [ - "unknown parameter: tools", - "unsupported parameter: tools", - "unrecognized field `tools`", - "does not support tools", - "function calling is not supported", - "tool_choice", - ] - .iter() - .any(|hint| lower.contains(hint)) - } - - fn err_supports_no_tools_retry(error: &str) -> bool { - Self::is_native_tool_schema_unsupported(reqwest::StatusCode::BAD_REQUEST, error) - } - - /// Detect a provider rejecting the `frequency_penalty` sampling field. Some - /// strict OpenAI-compatible backends 400 on unknown params; when this fires - /// the caller retries once with the field omitted (mirrors the no-tools - /// retry). String-based because the streamed transport error surfaces the - /// API error body. - fn err_indicates_frequency_penalty_unsupported(error: &str) -> bool { - let lower = error.to_lowercase(); - lower.contains("frequency_penalty") - && (lower.contains("unsupported") - || lower.contains("unknown") - || lower.contains("unrecognized") - || lower.contains("not supported") - || lower.contains("does not support") - || lower.contains("invalid") - || lower.contains("unexpected")) - } - - /// Detect a 404 whose body says the model is completion-only and cannot be - /// served from `/v1/chat/completions` (OpenAI: "This is not a chat model - /// and thus not supported in the v1/chat/completions endpoint. Did you - /// mean to use v1/completions?"). When this fires, attempting the - /// `/v1/responses` fallback is futile, so callers should fail fast with an - /// actionable message via [`completion_only_model_message`]. The match is - /// deliberately tight so ordinary "model does not exist" 404s are NOT - /// caught (those should keep their existing fallback / enrich behaviour). - /// See issue #3193. - fn is_completion_only_model_404(status: reqwest::StatusCode, error: &str) -> bool { - if status != reqwest::StatusCode::NOT_FOUND { - return false; - } - let lower = error.to_lowercase(); - lower.contains("not a chat model") - || (lower.contains("v1/chat/completions") && lower.contains("v1/completions")) - } - - /// Streaming variant of the native-tools chat path. - /// - /// Sends the request with `stream: true`, consumes the upstream SSE - /// stream chunk by chunk, forwards fine-grained `ProviderDelta` - /// events to the caller-supplied sender, and returns the aggregated - /// [`ProviderChatResponse`] once the stream ends. Per-chunk parsing - /// uses [`StreamChunkResponse`] — a permissive subset of the - /// OpenAI/Fireworks streaming schema that tolerates unknown fields. - async fn stream_native_chat( - &self, - credential: Option<&str>, - native_request: &NativeChatRequest, - delta_tx: &tokio::sync::mpsc::Sender, - dump_seq: u64, - ) -> anyhow::Result { - use futures_util::StreamExt; - - let url = self.chat_completions_url(); - log::info!( - "[stream] {} POST {} (stream=true, tools={})", - self.name, - url, - native_request.tools.as_ref().map_or(0, |t| t.len()), - ); - - let response = self - .apply_auth_header( - self.http_client() - .post(&url) - .header("Accept", "text/event-stream") - .json(native_request), - credential, - ) - .send() - .await?; - - if !response.status().is_success() { - let status = response.status(); - let status_str = status.as_u16().to_string(); - let body = response.text().await.unwrap_or_default(); - // Sanitize the upstream error body so we don't leak user - // prompts, tool arguments, or credentials the backend - // echoed back into the anyhow chain / logs. - let sanitized = super::sanitize_api_error(&body); - let message = format!( - "{} streaming API error ({}): {}", - self.name, status, sanitized - ); - if super::is_budget_exhausted_http_400(status, &body) { - super::log_budget_exhausted_http_400( - "streaming_chat", - self.name.as_str(), - Some(native_request.model.as_str()), - status, - ); - } else if super::is_custom_openai_upstream_bad_request_http_400( - self.name.as_str(), - status, - &body, - ) { - super::log_custom_openai_upstream_bad_request_http_400( - "streaming_chat", - self.name.as_str(), - Some(native_request.model.as_str()), - status, - ); - } else if super::is_provider_access_policy_denied_http_403(status, &body) { - super::log_provider_access_policy_denied_http_403( - "streaming_chat", - self.name.as_str(), - Some(native_request.model.as_str()), - status, - ); - } else if super::is_provider_config_rejection_http(status, self.name.as_str(), &body) { - super::log_provider_config_rejection( - "streaming_chat", - self.name.as_str(), - Some(native_request.model.as_str()), - status, - ); - } else if Self::is_native_tool_schema_unsupported(status, &body) { - // Model rejects tool definitions (e.g. Ollama "does not support tools"). - // The caller's retry loop already handles this by re-issuing without - // tools — suppress the Sentry event so noise doesn't accumulate for - // every model that lacks tool-calling support (TAURI-RUST-4K7). - log::info!( - "[stream] {} model rejected tool schema (status={}) — caller will retry without tools", - self.name, - status, - ); - } else if super::should_report_provider_http_failure(status) { - crate::core::observability::report_error( - message.as_str(), - "llm_provider", - "streaming_chat", - &[ - ("provider", self.name.as_str()), - ("model", native_request.model.as_str()), - ("status", status_str.as_str()), - ("failure", "non_2xx"), - ], - ); - } - anyhow::bail!(message); - } - - // Some OpenAI-compatible backends (and our e2e mock) accept - // `stream: true` in the request but reply with a regular - // `application/json` body rather than SSE. Detect this and - // fall back to the non-streaming parse path so the caller - // still gets an aggregated response. No deltas are emitted in - // this case (there's nothing to stream). - let is_sse = response - .headers() - .get(reqwest::header::CONTENT_TYPE) - .and_then(|v| v.to_str().ok()) - .map(|ct| ct.to_ascii_lowercase().contains("text/event-stream")) - .unwrap_or(false); - if !is_sse { - log::warn!( - "[stream] {} upstream replied with non-SSE content-type; falling back to JSON parse \ - (no token deltas reach the UI)", - self.name, - ); - let response_bytes = response.bytes().await?; - dump_response_if_enabled(&self.name, &native_request.model, dump_seq, &response_bytes); - let api_resp: ApiChatResponse = serde_json::from_slice(&response_bytes) - .map_err(|err| anyhow::anyhow!("{} response parse error: {err}", self.name))?; - return Self::parse_native_response(api_resp, &self.name); - } - - // Accumulators for the final aggregated response. Tool-call - // state is keyed by the upstream `index` so interleaved chunks - // for multiple tool calls in the same turn don't clobber each - // other. - let mut text_accum = String::new(); - let mut thinking_accum = String::new(); - let mut tool_accum: std::collections::BTreeMap = - std::collections::BTreeMap::new(); - let mut last_usage: Option = None; - let mut last_openhuman: Option = None; - - let mut bytes_stream = response.bytes_stream(); - let mut buffer = String::new(); - let mut repeat_detector = StreamRepeatDetector::new(); - let mut degenerate_repeat = false; - - 'stream: while let Some(item) = bytes_stream.next().await { - let bytes = item?; - buffer.push_str(&String::from_utf8_lossy(&bytes)); - - // SSE events are separated by "\n\n"; lines within an event - // are "\n"-terminated. We accumulate partial events across - // socket reads and only pop complete ones. - while let Some(sep_idx) = buffer.find("\n\n") { - let event = buffer[..sep_idx].to_string(); - buffer.drain(..sep_idx + 2); - for line in event.lines() { - let line = line.trim(); - if line.is_empty() || line.starts_with(':') { - continue; - } - let Some(data) = line.strip_prefix("data:") else { - continue; - }; - let data = data.trim(); - if data == "[DONE]" { - continue; - } - - let chunk: StreamChunkResponse = match serde_json::from_str(data) { - Ok(v) => v, - Err(e) => { - log::debug!( - "[stream] {} skipping unparseable chunk: {} — data={}", - self.name, - e, - data, - ); - continue; - } - }; - - if let Some(usage) = chunk.usage { - last_usage = Some(usage); - } - if let Some(meta) = chunk.openhuman { - last_openhuman = Some(meta); - } - - for choice in chunk.choices { - // Visible text delta. - if let Some(content) = choice.delta.content.as_ref() { - if !content.is_empty() { - text_accum.push_str(content); - let _ = delta_tx - .send(crate::openhuman::inference::provider::ProviderDelta::TextDelta { - delta: content.clone(), - }) - .await; - // Deterministic in-generation repeat cutoff: a - // model spiraling on one line can't be stopped by - // prompt or penalty, so abort the stream once the - // same substantial line repeats too many times. - if repeat_detector.observe(content) { - log::warn!( - "[stream] {} degenerate repetition detected (≥{} identical lines) — aborting generation, truncating (text_chars={})", - self.name, - STREAM_REPEAT_THRESHOLD, - text_accum.chars().count(), - ); - degenerate_repeat = true; - break 'stream; - } - } - } - // Reasoning / thinking delta. - if let Some(reasoning) = choice.delta.reasoning_content.as_ref() { - if !reasoning.is_empty() { - thinking_accum.push_str(reasoning); - let _ = delta_tx - .send( - crate::openhuman::inference::provider::ProviderDelta::ThinkingDelta { - delta: reasoning.clone(), - }, - ) - .await; - } - } - // Tool-call fragments. - // - // Ordering invariant emitted downstream: - // ToolCallStart (once, when id+name both known) - // → ToolCallArgsDelta* (buffered then streamed) - // - // Args fragments that arrive *before* we know the - // canonical id are buffered into `entry.arguments` - // but NOT emitted — emitting them with a synthetic - // id would break client-side reconciliation against - // the eventual tool_call / tool_result events that - // carry the real id. Once start fires we flush the - // buffered prefix in a single delta, then stream - // subsequent fragments as they arrive. - if let Some(tc_list) = choice.delta.tool_calls.as_ref() { - for tc in tc_list { - let idx = tc.index.unwrap_or(0); - let entry = tool_accum.entry(idx).or_default(); - - if let Some(id) = tc.id.as_ref() { - if entry.id.is_none() { - log::debug!( - "[stream] {} tool_call[{}] id resolved: {}", - self.name, - idx, - id, - ); - } - entry.id = Some(id.clone()); - } - if let Some(func) = tc.function.as_ref() { - if let Some(name) = func.name.as_ref() { - if !name.is_empty() && entry.name.is_none() { - log::debug!( - "[stream] {} tool_call[{}] name resolved: {}", - self.name, - idx, - name, - ); - } - if !name.is_empty() { - entry.name = Some(name.clone()); - } - } - if let Some(args) = func.arguments.as_ref() { - if !args.is_empty() { - entry.arguments.push_str(args); - if !entry.emitted_start { - log::debug!( - "[stream] {} tool_call[{}] buffering args ({} chars total) — waiting for id/name", - self.name, - idx, - entry.arguments.len(), - ); - } - } - } - } - - // Fire start + flush buffered args once - // both id and name have been observed. - if !entry.emitted_start { - if let (Some(id), Some(name)) = - (entry.id.as_ref(), entry.name.as_ref()) - { - log::debug!( - "[stream] {} tool_call[{}] emitting ToolCallStart id={} name={}", - self.name, - idx, - id, - name, - ); - let _ = delta_tx - .send(crate::openhuman::inference::provider::ProviderDelta::ToolCallStart { - call_id: id.clone(), - tool_name: name.clone(), - }) - .await; - entry.emitted_start = true; - // Flush any args that were - // buffered before the start id - // was known. - if !entry.arguments.is_empty() { - log::debug!( - "[stream] {} tool_call[{}] flushing buffered args ({} chars)", - self.name, - idx, - entry.arguments.len(), - ); - let buffered = entry.arguments.clone(); - let _ = delta_tx - .send(crate::openhuman::inference::provider::ProviderDelta::ToolCallArgsDelta { - call_id: id.clone(), - delta: buffered, - }) - .await; - entry.emitted_chars = entry.arguments.len(); - } - } - } else if entry.arguments.len() > entry.emitted_chars { - // Start already fired — stream the - // newly appended fragment with the - // canonical id. - if let Some(ref id) = entry.id { - let fresh = - entry.arguments[entry.emitted_chars..].to_string(); - let _ = delta_tx - .send(crate::openhuman::inference::provider::ProviderDelta::ToolCallArgsDelta { - call_id: id.clone(), - delta: fresh, - }) - .await; - entry.emitted_chars = entry.arguments.len(); - } - } - } - } - } - } - } - } - - if degenerate_repeat { - // Mark the truncated output so downstream (and the user) see why it - // was cut off rather than a silently shortened response. - text_accum.push_str( - "\n\n[Output stopped: detected repeated/looping generation (model degeneration).]", - ); - } - - let tool_call_count = tool_accum.len(); - log::info!( - "[stream] {} aggregated text_chars={} thinking_chars={} tool_calls={}", - self.name, - text_accum.chars().count(), - thinking_accum.chars().count(), - tool_call_count, - ); - - // Aggregate the collected tool calls into the unified response - // shape. We reuse `parse_native_response` by building an - // `ApiChatResponse` from the accumulators so downstream code - // sees the same shape as the non-streaming path. - let tool_calls_for_api: Vec = tool_accum - .into_values() - .map(|c| ToolCall { - id: c.id, - kind: Some("function".to_string()), - function: Some(Function { - name: c.name, - arguments: if c.arguments.is_empty() { - None - } else { - // Try to parse as JSON first so downstream - // `normalize_function_arguments` can take the - // usual Value (object) path; fall back to a - // JSON-string value for partially-assembled or - // permanently malformed fragments. - // `normalize_function_arguments` validates and - // discards malformed strings (OPENHUMAN-TAURI-6F). - Some( - serde_json::from_str(&c.arguments) - .unwrap_or(serde_json::Value::String(c.arguments)), - ) - }, - }), - }) - .collect(); - - let api_resp = ApiChatResponse { - choices: vec![Choice { - message: ResponseMessage { - content: if text_accum.is_empty() { - None - } else { - Some(text_accum) - }, - reasoning_content: if thinking_accum.is_empty() { - None - } else { - Some(thinking_accum) - }, - tool_calls: if tool_calls_for_api.is_empty() { - None - } else { - Some(tool_calls_for_api) - }, - function_call: None, - }, - }], - usage: last_usage, - openhuman: last_openhuman, - }; - - // Dump the aggregated final response (structured, diff-friendly, - // carries usage + openhuman cache meta from the last chunks). - // Hand-build a Value here because `ApiChatResponse` is - // Deserialize-only. - if std::env::var("OPENHUMAN_PROMPT_DUMP_DIR").is_ok() { - let msg = &api_resp.choices[0].message; - let aggregated = serde_json::json!({ - "content": msg.content, - "reasoning_content": msg.reasoning_content, - "tool_calls": msg.tool_calls.as_ref().map(|calls| { - calls.iter().map(|c| serde_json::json!({ - "id": c.id, - "type": c.kind, - "function": c.function.as_ref().map(|f| serde_json::json!({ - "name": f.name, - "arguments": f.arguments, - })), - })).collect::>() - }), - "usage": api_resp.usage.as_ref().map(|u| serde_json::json!({ - "prompt_tokens": u.prompt_tokens, - "completion_tokens": u.completion_tokens, - "total_tokens": u.total_tokens, - "prompt_cached_tokens": u.prompt_tokens_details - .as_ref().map(|d| d.cached_tokens), - })), - "openhuman": api_resp.openhuman.as_ref().map(|m| serde_json::json!({ - "usage": m.usage.as_ref().map(|u| serde_json::json!({ - "input_tokens": u.input_tokens, - "output_tokens": u.output_tokens, - "cached_input_tokens": u.cached_input_tokens, - })), - "billing": m.billing.as_ref().map(|b| serde_json::json!({ - "charged_amount_usd": b.charged_amount_usd, - })), - })), - }); - if let Ok(bytes) = serde_json::to_vec(&aggregated) { - dump_response_if_enabled(&self.name, &native_request.model, dump_seq, &bytes); - } - } - - Self::parse_native_response(api_resp, &self.name) - } -} - -#[async_trait] -impl Provider for OpenAiCompatibleProvider { - fn capabilities(&self) -> crate::openhuman::inference::provider::traits::ProviderCapabilities { - crate::openhuman::inference::provider::traits::ProviderCapabilities { - native_tool_calling: self.native_tool_calling, - // Kept `false` for now. The provider already serializes images as - // `image_url` content parts on the chat-completions path (#3205), but - // vision is a per-*model* property the provider can't know here — and - // the Responses-API path (`chat_via_responses`) is still text-only. - // Claiming vision provider-wide would let image turns through the - // gate to a possibly-non-vision model. The capability stays off until - // it can be driven per-model (e.g. from `model_registry.vision`). - vision: false, - } - } - - async fn chat_with_system( - &self, - system_prompt: Option<&str>, - message: &str, - model: &str, - temperature: f64, - ) -> anyhow::Result { - let credential = self.credential_for_request()?; - - let mut messages = Vec::new(); - - if self.merge_system_into_user { - let content = match system_prompt { - Some(sys) => format!("{sys}\n\n{message}"), - None => message.to_string(), - }; - messages.push(Message { - role: "user".to_string(), - content: MessageContent::from_chat_text(&content), - }); - } else { - if let Some(sys) = system_prompt { - messages.push(Message { - role: "system".to_string(), - content: sys.into(), - }); - } - messages.push(Message { - role: "user".to_string(), - content: MessageContent::from_chat_text(message), - }); - } - - let request = ApiChatRequest { - model: model.to_string(), - messages, - temperature: self.effective_temperature(model, temperature), - stream: Some(false), - tools: None, - tool_choice: None, - }; - - let url = self.chat_completions_url(); - - let mut fallback_messages = Vec::new(); - if let Some(system_prompt) = system_prompt { - fallback_messages.push(ChatMessage::system(system_prompt)); - } - fallback_messages.push(ChatMessage::user(message)); - let fallback_messages = if self.merge_system_into_user { - Self::flatten_system_messages(&fallback_messages) - } else { - fallback_messages - }; - - if self.responses_api_primary { - return self - .chat_via_responses(credential, &fallback_messages, model) - .await; - } - - let response = match self - .apply_auth_header(self.http_client().post(&url).json(&request), credential) - .send() - .await - { - Ok(response) => response, - Err(chat_error) => { - if self.supports_responses_fallback { - let detail = super::format_error_chain(&chat_error); - return self - .chat_via_responses(credential, &fallback_messages, model) - .await - .map_err(|responses_err| { - let fb = super::format_anyhow_chain(&responses_err); - anyhow::anyhow!( - "{} chat completions transport error: {detail} (responses fallback failed: {fb})", - self.name - ) - }); - } - - return Err(chat_error.into()); - } - }; - - if !response.status().is_success() { - let status = response.status(); - let error = response.text().await?; - let sanitized = super::sanitize_api_error(&error); - - // A completion-only model 404s here and the /v1/responses fallback - // cannot rescue it — fail fast with actionable guidance (#3193). - if let Some(err) = self.completion_only_404_guard(status, &sanitized, model) { - return Err(err); - } - - // An embedding / non-chat model rejected with 400 "does not - // support chat" (e.g. Ollama bge-m3 picked as the chat model) — - // fail fast with actionable guidance. See Sentry TAURI-RUST-4P6. - if let Some(err) = self.not_chat_capable_guard(status, &sanitized, model) { - return Err(err); - } - - if status == reqwest::StatusCode::NOT_FOUND && self.supports_responses_fallback { - return self - .chat_via_responses(credential, &fallback_messages, model) - .await - .map_err(|responses_err| { - let fb = super::format_anyhow_chain(&responses_err); - anyhow::anyhow!( - "{} API error ({status}): {sanitized} (chat completions unavailable; responses fallback failed: {fb})", - self.name - ) - }); - } - - let status_str = status.as_u16().to_string(); - let message = self.enrich_404_message( - format!("{} API error ({status}): {sanitized}", self.name), - status, - ); - if super::is_backend_auth_failure(self.name.as_str(), status) { - // Backend rejected the app session JWT (401/403): expected - // session-expiry (token expired/revoked/rotated), not a code - // bug. Publish SessionExpired so the credentials subscriber - // drives reauth and the scheduler-gate halts downstream LLM - // work, and skip the Sentry report (TAURI-RUST-N). Mirrors the - // `is_backend_auth_failure` arm in `super::api_error`. - super::publish_backend_session_expired( - "chat_completions", - self.name.as_str(), - status, - &message, - ); - } else if super::is_budget_exhausted_http_400(status, &error) { - super::log_budget_exhausted_http_400( - "chat_completions", - self.name.as_str(), - Some(model), - status, - ); - } else if super::is_custom_openai_upstream_bad_request_http_400( - self.name.as_str(), - status, - &error, - ) { - super::log_custom_openai_upstream_bad_request_http_400( - "chat_completions", - self.name.as_str(), - Some(model), - status, - ); - } else if super::is_provider_access_policy_denied_http_403(status, &error) { - super::log_provider_access_policy_denied_http_403( - "chat_completions", - self.name.as_str(), - Some(model), - status, - ); - } else if super::is_provider_config_rejection_http(status, self.name.as_str(), &error) { - super::log_provider_config_rejection( - "chat_completions", - self.name.as_str(), - Some(model), - status, - ); - } else if super::should_report_provider_http_failure(status) { - crate::core::observability::report_error( - message.as_str(), - "llm_provider", - "chat_completions", - &[ - ("provider", self.name.as_str()), - ("model", model), - ("status", status_str.as_str()), - ("failure", "non_2xx"), - ], - ); - } - anyhow::bail!(message); - } - - let body = response.text().await?; - let chat_response = parse_chat_response_body(&self.name, &body)?; - - chat_response - .choices - .into_iter() - .next() - .map(|c| { - // If tool_calls are present, serialize the full message as JSON - // so parse_tool_calls can handle the OpenAI-style format - if c.message.tool_calls.is_some() - && c.message.tool_calls.as_ref().is_some_and(|t| !t.is_empty()) - { - serde_json::to_string(&c.message) - .unwrap_or_else(|_| c.message.effective_content()) - } else { - // No tool calls, return content (with reasoning_content fallback) - c.message.effective_content() - } - }) - .ok_or_else(|| anyhow::anyhow!("No response from {}", self.name)) - } - - async fn chat_with_history( - &self, - messages: &[ChatMessage], - model: &str, - temperature: f64, - ) -> anyhow::Result { - let credential = self.credential_for_request()?; - - let effective_messages = if self.merge_system_into_user { - Self::flatten_system_messages(messages) - } else { - messages.to_vec() - }; - let api_messages: Vec = effective_messages - .iter() - .map(|m| Message { - role: m.role.clone(), - content: MessageContent::from_chat_text(&m.content), - }) - .collect(); - - let request = ApiChatRequest { - model: model.to_string(), - messages: api_messages, - temperature: self.effective_temperature(model, temperature), - stream: Some(false), - tools: None, - tool_choice: None, - }; - - let url = self.chat_completions_url(); - if self.responses_api_primary { - return self - .chat_via_responses(credential, &effective_messages, model) - .await; - } - - let response = match self - .apply_auth_header(self.http_client().post(&url).json(&request), credential) - .send() - .await - { - Ok(response) => response, - Err(chat_error) => { - if self.supports_responses_fallback { - let detail = super::format_error_chain(&chat_error); - return self - .chat_via_responses(credential, &effective_messages, model) - .await - .map_err(|responses_err| { - let fb = super::format_anyhow_chain(&responses_err); - anyhow::anyhow!( - "{} chat completions transport error: {detail} (responses fallback failed: {fb})", - self.name - ) - }); - } - - return Err(chat_error.into()); - } - }; - - if !response.status().is_success() { - let status = response.status(); - - // A 404 may mean this provider uses the Responses API, OR that the - // model is completion-only. Read the body once so we can tell the - // two apart (#3193) — only the 404 branch needs it; the response is - // not used again here, so `api_error` below still owns the rest. - if status == reqwest::StatusCode::NOT_FOUND { - let error = response.text().await?; - let sanitized = super::sanitize_api_error(&error); - - // Completion-only model: the responses fallback can't help — - // fail fast with actionable guidance. - if let Some(err) = self.completion_only_404_guard(status, &sanitized, model) { - return Err(err); - } - - if self.supports_responses_fallback { - return self - .chat_via_responses(credential, &effective_messages, model) - .await - .map_err(|responses_err| { - let fb = super::format_anyhow_chain(&responses_err); - anyhow::anyhow!( - "{} API error ({status}): {sanitized} (chat completions unavailable; responses fallback failed: {fb})", - self.name - ) - }); - } - - let enriched = self.enrich_404_message( - format!("{} API error ({status}): {sanitized}", self.name), - status, - ); - return Err(anyhow::anyhow!("{enriched}")); - } - - // `api_error` reads the body and runs the shared classification - // (SessionExpired publish, config-rejection demotion, Sentry-report - // decision). For a non-chat-capable model (embedding model picked - // as chat → 400 "does not support chat") it already demotes the - // event, but its message is the opaque upstream JSON. Upgrade that - // to the actionable "assign a chat-capable model" copy — which - // still carries the phrase, so it stays demoted on any re-report. - // See Sentry TAURI-RUST-4P6. - let err = super::api_error(&self.name, response).await; - let err_str = err.to_string(); - if Self::is_not_chat_capable_model(status, &err_str) { - return Err(anyhow::anyhow!( - self.not_chat_capable_model_message(model, &err_str) - )); - } - let enriched = self.enrich_404_message(format!("{err:#}"), status); - return Err(anyhow::anyhow!("{enriched}")); - } - - let body = response.text().await?; - let chat_response = parse_chat_response_body(&self.name, &body)?; - - chat_response - .choices - .into_iter() - .next() - .map(|c| { - // If tool_calls are present, serialize the full message as JSON - // so parse_tool_calls can handle the OpenAI-style format - if c.message.tool_calls.is_some() - && c.message.tool_calls.as_ref().is_some_and(|t| !t.is_empty()) - { - serde_json::to_string(&c.message) - .unwrap_or_else(|_| c.message.effective_content()) - } else { - // No tool calls, return content (with reasoning_content fallback) - c.message.effective_content() - } - }) - .ok_or_else(|| anyhow::anyhow!("No response from {}", self.name)) - } - - async fn chat_with_tools( - &self, - messages: &[ChatMessage], - tools: &[serde_json::Value], - model: &str, - temperature: f64, - ) -> anyhow::Result { - let credential = self.credential_for_request()?; - - let effective_messages = if self.merge_system_into_user { - Self::flatten_system_messages(messages) - } else { - messages.to_vec() - }; - let api_messages: Vec = effective_messages - .iter() - .map(|m| Message { - role: m.role.clone(), - content: MessageContent::from_chat_text(&m.content), - }) - .collect(); - - let request = ApiChatRequest { - model: model.to_string(), - messages: api_messages, - temperature: self.effective_temperature(model, temperature), - stream: Some(false), - tools: if tools.is_empty() { - None - } else { - Some(tools.to_vec()) - }, - tool_choice: if tools.is_empty() { - None - } else { - Some("auto".to_string()) - }, - }; - - let url = self.chat_completions_url(); - let response = match self - .apply_auth_header(self.http_client().post(&url).json(&request), credential) - .send() - .await - { - Ok(response) => response, - Err(error) => { - tracing::warn!( - "{} native tool call transport failed: {error}; falling back to history path", - self.name - ); - let text = self.chat_with_history(messages, model, temperature).await?; - return Ok(ProviderChatResponse { - text: Some(text), - tool_calls: vec![], - usage: None, - reasoning_content: None, - }); - } - }; - - if !response.status().is_success() { - return Err(super::api_error(&self.name, response).await); - } - - let body = response.text().await?; - let chat_response = parse_chat_response_body(&self.name, &body)?; - let usage = Self::extract_usage(&chat_response); - let choice = chat_response - .choices - .into_iter() - .next() - .ok_or_else(|| anyhow::anyhow!("No response from {}", self.name))?; - - let text = choice.message.effective_content_optional(); - // See `parse_native_response`: replay reasoning on the follow-up - // request so DeepSeek thinking mode accepts the tool-call turn. - let reasoning_content = choice - .message - .reasoning_content - .as_deref() - .map(str::trim) - .filter(|s| !s.is_empty()) - .map(ToString::to_string); - let tool_calls = choice - .message - .tool_calls - .unwrap_or_default() - .into_iter() - .filter_map(|tc| { - let function = tc.function?; - let name = function.name?; - let arguments = normalize_function_arguments(function.arguments); - Some(ProviderToolCall { - id: tc.id.unwrap_or_else(|| uuid::Uuid::new_v4().to_string()), - name, - arguments, - }) - }) - .collect::>(); - - tracing::debug!( - has_reasoning_content = reasoning_content.is_some(), - reasoning_content_chars = reasoning_content.as_ref().map_or(0, |r| r.chars().count()), - tool_calls = tool_calls.len(), - "[provider:chat] reasoning_content capture (non-streaming)" - ); - - Ok(ProviderChatResponse { - text, - tool_calls, - usage, - reasoning_content, - }) - } - - async fn chat( - &self, - request: ProviderChatRequest<'_>, - model: &str, - temperature: f64, - ) -> anyhow::Result { - let credential = self.credential_for_request()?; - - let tools = Self::convert_tool_specs(request.tools); - let effective_messages = if self.merge_system_into_user { - Self::flatten_system_messages(request.messages) - } else { - request.messages.to_vec() - }; - - if self.responses_api_primary { - let response_messages = if request.tools.is_some() { - Self::with_prompt_guided_tool_instructions(request.messages, request.tools) - } else { - effective_messages.clone() - }; - let text = self - .chat_via_responses(credential, &response_messages, model) - .await?; - if let Some(tx) = request.stream { - let _ = tx - .send( - crate::openhuman::inference::provider::ProviderDelta::TextDelta { - delta: text.clone(), - }, - ) - .await; - } - return Ok(ProviderChatResponse { - text: Some(text), - tool_calls: vec![], - usage: None, - reasoning_content: None, - }); - } - - // ── Streaming branch ───────────────────────────────────────── - // When the caller supplied a `ProviderDelta` sender, request - // SSE and forward fine-grained deltas while accumulating the - // final response. Fall back to non-streaming on non-200 errors - // so tool-schema rejections etc. still work. - if let Some(tx) = request.stream { - let native_request = NativeChatRequest { - model: model.to_string(), - messages: Self::convert_messages_for_native(&effective_messages), - temperature: self.effective_temperature(model, temperature), - stream: Some(true), - tool_choice: tools.as_ref().map(|_| "auto".to_string()), - tools: tools.clone(), - thread_id: self.outbound_thread_id(), - // Ask the server for a final usage chunk so token - // accounting (and `openhuman.billing.charged_amount_usd` - // for the OpenHuman backend) makes it back from - // streaming responses — orchestrator sessions otherwise - // lose the `- Charged: $…` line in their transcripts. - stream_options: Some(OpenAiStreamOptions { - include_usage: true, - }), - options: self.build_ollama_options(), - frequency_penalty: Some(CHAT_FREQUENCY_PENALTY), - }; - let stream_dump_seq = reserve_dump_seq(); - dump_prompt_if_enabled(&self.name, model, stream_dump_seq, &native_request); - match self - .stream_native_chat(credential, &native_request, tx, stream_dump_seq) - .await - { - Ok(resp) => return Ok(resp), - Err(err) => { - let err_str = err.to_string(); - // Some local-runtime models (e.g. Ollama serving - // gemma3, llama3.2:1b, …) reject the request with - // " does not support tools" when the - // ChatRequest carries a `tools` array. Retry the - // streaming call once with tools stripped so the - // user still gets a live token stream — without - // this we'd silently fall through to the buffered - // non-streaming path and the UI would render the - // reply all at once. - if tools.is_some() && Self::err_supports_no_tools_retry(&err_str) { - log::info!( - "[stream] {} model does not support tools — retrying streaming without tools", - self.name, - ); - let retry_request = NativeChatRequest { - tools: None, - tool_choice: None, - ..native_request.clone() - }; - match self - .stream_native_chat(credential, &retry_request, tx, stream_dump_seq) - .await - { - Ok(resp) => return Ok(resp), - Err(retry_err) => { - log::warn!( - "[stream] {} retry without tools also failed, falling back to non-streaming: {}", - self.name, - retry_err - ); - } - } - } else if Self::err_indicates_frequency_penalty_unsupported(&err_str) { - // Symmetric to the no-tools retry: a strict provider that - // 400s on `frequency_penalty` should degrade gracefully - // rather than fail the whole chat path. - log::info!( - "[stream] {} rejected frequency_penalty — retrying streaming without it", - self.name, - ); - let retry_request = NativeChatRequest { - frequency_penalty: None, - ..native_request.clone() - }; - match self - .stream_native_chat(credential, &retry_request, tx, stream_dump_seq) - .await - { - Ok(resp) => return Ok(resp), - Err(retry_err) => { - log::warn!( - "[stream] {} retry without frequency_penalty also failed, falling back to non-streaming: {}", - self.name, - retry_err - ); - } - } - } else { - log::warn!( - "[stream] {} streaming chat failed, falling back to non-streaming: {}", - self.name, - err - ); - } - // Fall through to the non-streaming path below. The - // non-streaming request below omits `frequency_penalty` so a - // provider that rejected it (streaming or not) still succeeds. - } - } - } - - let thread_id = self.outbound_thread_id(); - log::debug!( - "[provider:{}] chat() outbound thread_id={} model={}", - self.name, - thread_id.as_deref().unwrap_or(""), - model - ); - let native_request = NativeChatRequest { - model: model.to_string(), - messages: Self::convert_messages_for_native(&effective_messages), - temperature: self.effective_temperature(model, temperature), - stream: Some(false), - tool_choice: tools.as_ref().map(|_| "auto".to_string()), - tools, - thread_id, - stream_options: None, - options: self.build_ollama_options(), - // The buffered (non-streaming) path is the fallback / non-streaming - // provider path — omit `frequency_penalty` here for maximum - // compatibility (a provider that rejects it still succeeds). The - // streaming path above carries it (where degenerate repetition loops - // actually occur) and retries without it on rejection. - frequency_penalty: None, - }; - let dump_seq = reserve_dump_seq(); - dump_prompt_if_enabled(&self.name, model, dump_seq, &native_request); - - let url = self.chat_completions_url(); - let response = match self - .apply_auth_header( - self.http_client().post(&url).json(&native_request), - credential, - ) - .send() - .await - { - Ok(response) => response, - Err(chat_error) => { - if self.supports_responses_fallback { - let detail = super::format_error_chain(&chat_error); - return self - .chat_via_responses(credential, &effective_messages, model) - .await - .map(|text| ProviderChatResponse { - text: Some(text), - tool_calls: vec![], - usage: None, - reasoning_content: None, - }) - .map_err(|responses_err| { - let fb = super::format_anyhow_chain(&responses_err); - anyhow::anyhow!( - "{} native chat transport error: {detail} (responses fallback failed: {fb})", - self.name - ) - }); - } - - return Err(chat_error.into()); - } - }; - - if !response.status().is_success() { - let status = response.status(); - let error = response.text().await?; - let sanitized = super::sanitize_api_error(&error); - - if Self::is_native_tool_schema_unsupported(status, &sanitized) { - let fallback_messages = - Self::with_prompt_guided_tool_instructions(request.messages, request.tools); - let text = self - .chat_with_history(&fallback_messages, model, temperature) - .await?; - return Ok(ProviderChatResponse { - text: Some(text), - tool_calls: vec![], - usage: None, - reasoning_content: None, - }); - } - - // A completion-only model 404s here and the /v1/responses fallback - // cannot rescue it — fail fast with actionable guidance (#3193). - if let Some(err) = self.completion_only_404_guard(status, &sanitized, model) { - return Err(err); - } - - // An embedding / non-chat model rejected with 400 "does not - // support chat" (e.g. Ollama bge-m3 picked as the chat model) — - // fail fast with actionable guidance. See Sentry TAURI-RUST-4P6. - if let Some(err) = self.not_chat_capable_guard(status, &sanitized, model) { - return Err(err); - } - - if status == reqwest::StatusCode::NOT_FOUND && self.supports_responses_fallback { - return self - .chat_via_responses(credential, &effective_messages, model) - .await - .map(|text| ProviderChatResponse { - text: Some(text), - tool_calls: vec![], - usage: None, - reasoning_content: None, - }) - .map_err(|responses_err| { - let fb = super::format_anyhow_chain(&responses_err); - anyhow::anyhow!( - "{} API error ({status}): {sanitized} (chat completions unavailable; responses fallback failed: {fb})", - self.name - ) - }); - } - - let status_str = status.as_u16().to_string(); - let message = self.enrich_404_message( - format!("{} API error ({status}): {sanitized}", self.name), - status, - ); - if super::is_budget_exhausted_http_400(status, &error) { - super::log_budget_exhausted_http_400( - "native_chat", - self.name.as_str(), - Some(model), - status, - ); - } else if super::is_custom_openai_upstream_bad_request_http_400( - self.name.as_str(), - status, - &error, - ) { - super::log_custom_openai_upstream_bad_request_http_400( - "native_chat", - self.name.as_str(), - Some(model), - status, - ); - } else if super::is_provider_access_policy_denied_http_403(status, &error) { - super::log_provider_access_policy_denied_http_403( - "native_chat", - self.name.as_str(), - Some(model), - status, - ); - } else if super::is_provider_config_rejection_http(status, self.name.as_str(), &error) { - super::log_provider_config_rejection( - "native_chat", - self.name.as_str(), - Some(model), - status, - ); - } else if super::should_report_provider_http_failure(status) { - crate::core::observability::report_error( - message.as_str(), - "llm_provider", - "native_chat", - &[ - ("provider", self.name.as_str()), - ("model", model), - ("status", status_str.as_str()), - ("failure", "non_2xx"), - ], - ); - } - anyhow::bail!(message); - } - - let response_bytes = response.bytes().await?; - dump_response_if_enabled(&self.name, model, dump_seq, &response_bytes); - let native_response: ApiChatResponse = serde_json::from_slice(&response_bytes) - .map_err(|err| anyhow::anyhow!("{} response parse error: {err}", self.name))?; - Self::parse_native_response(native_response, &self.name) - } - - fn supports_native_tools(&self) -> bool { - // Must mirror `capabilities().native_tool_calling`. Both signals are - // read by the agent harness (`traits.rs:415`) to decide between an - // OpenAI-style `tools` array and the prompt-guided text fallback; - // letting them disagree would defeat `with_native_tool_calling(false)` - // for the Ollama branch of sub-issue 3 of #3098. - self.native_tool_calling - } - - fn supports_streaming(&self) -> bool { - true - } - - fn stream_chat_with_system( - &self, - system_prompt: Option<&str>, - message: &str, - model: &str, - temperature: f64, - options: StreamOptions, - ) -> stream::BoxStream<'static, StreamResult> { - let credential = match self.credential_for_request() { - Ok(value) => value.map(str::to_string), - Err(err) => { - return stream::once(async move { Err(StreamError::Provider(err.to_string())) }) - .boxed(); - } - }; - - let mut messages = Vec::new(); - if let Some(sys) = system_prompt { - messages.push(Message { - role: "system".to_string(), - content: sys.into(), - }); - } - messages.push(Message { - role: "user".to_string(), - content: MessageContent::from_chat_text(message), - }); - - let request = ApiChatRequest { - model: model.to_string(), - messages, - temperature: self.effective_temperature(model, temperature), - stream: Some(options.enabled), - tools: None, - tool_choice: None, - }; - - let url = self.chat_completions_url(); - let client = self.http_client(); - let auth_header = self.auth_header.clone(); - let extra_headers = self.extra_headers.clone(); - let openrouter_attribution_headers = self.openrouter_attribution_headers(); - let provider_name = self.name.clone(); - let model_owned = model.to_string(); - - // Use a channel to bridge the async HTTP response to the stream - let (tx, rx) = tokio::sync::mpsc::channel::>(100); - - tokio::spawn(async move { - // Build request with auth - let mut req_builder = client.post(&url).json(&request); - - // Apply auth header - req_builder = match (&auth_header, credential.as_deref()) { - (AuthStyle::None, _) | (_, None) => req_builder, - (AuthStyle::Bearer, Some(credential)) => { - req_builder.header("Authorization", format!("Bearer {credential}")) - } - (AuthStyle::XApiKey, Some(credential)) => { - req_builder.header("x-api-key", credential) - } - (AuthStyle::Anthropic, Some(credential)) => req_builder - .header("x-api-key", credential) - .header("anthropic-version", "2023-06-01"), - (AuthStyle::Custom(header), Some(credential)) => { - req_builder.header(header, credential) - } - }; - - for (name, value) in &extra_headers { - req_builder = req_builder.header(name.as_str(), value.as_str()); - } - if let Some((referer, title)) = openrouter_attribution_headers { - req_builder = req_builder - .header("HTTP-Referer", referer) - .header("X-OpenRouter-Title", title); - } - - // Set accept header for streaming - req_builder = req_builder.header("Accept", "text/event-stream"); - - // Send request - let response = match req_builder.send().await { - Ok(r) => r, - Err(e) => { - crate::core::observability::report_error( - e.to_string().as_str(), - "llm_provider", - "stream_chat", - &[ - ("provider", provider_name.as_str()), - ("model", model_owned.as_str()), - ("failure", "transport"), - ], - ); - let _ = tx.send(Err(StreamError::Http(e))).await; - return; - } - }; - - // Check status - if !response.status().is_success() { - let status = response.status(); - let status_str = status.as_u16().to_string(); - let raw_error = match response.text().await { - Ok(e) => e, - Err(_) => format!("HTTP error: {}", status), - }; - let sanitized_error = super::sanitize_api_error(&raw_error); - let message = format!("{}: {}", status, sanitized_error); - if super::is_budget_exhausted_http_400(status, &raw_error) { - super::log_budget_exhausted_http_400( - "stream_chat", - provider_name.as_str(), - Some(model_owned.as_str()), - status, - ); - } else if super::is_custom_openai_upstream_bad_request_http_400( - provider_name.as_str(), - status, - &raw_error, - ) { - super::log_custom_openai_upstream_bad_request_http_400( - "stream_chat", - provider_name.as_str(), - Some(model_owned.as_str()), - status, - ); - } else if super::is_provider_access_policy_denied_http_403(status, &raw_error) { - super::log_provider_access_policy_denied_http_403( - "stream_chat", - provider_name.as_str(), - Some(model_owned.as_str()), - status, - ); - } else if super::is_provider_config_rejection_http( - status, - provider_name.as_str(), - &raw_error, - ) { - super::log_provider_config_rejection( - "stream_chat", - provider_name.as_str(), - Some(model_owned.as_str()), - status, - ); - } else if super::should_report_provider_http_failure(status) { - crate::core::observability::report_error( - message.as_str(), - "llm_provider", - "stream_chat", - &[ - ("provider", provider_name.as_str()), - ("model", model_owned.as_str()), - ("status", status_str.as_str()), - ("failure", "non_2xx"), - ], - ); - } - let _ = tx.send(Err(StreamError::Provider(message))).await; - return; - } - - // Convert to chunk stream and forward to channel - let mut chunk_stream = sse_bytes_to_chunks(response, options.count_tokens); - while let Some(chunk) = chunk_stream.next().await { - if tx.send(chunk).await.is_err() { - break; // Receiver dropped - } - } - }); - - // Convert channel receiver to stream - stream::unfold(rx, |mut rx| async move { - rx.recv().await.map(|chunk| (chunk, rx)) - }) - .boxed() - } - - fn stream_chat_with_history( - &self, - messages: &[ChatMessage], - model: &str, - temperature: f64, - options: StreamOptions, - ) -> stream::BoxStream<'static, StreamResult> { - let credential = match self.credential_for_request() { - Ok(value) => value.map(str::to_string), - Err(err) => { - return stream::once(async move { Err(StreamError::Provider(err.to_string())) }) - .boxed(); - } - }; - - let effective_messages = if self.merge_system_into_user { - Self::flatten_system_messages(messages) - } else { - messages.to_vec() - }; - let api_messages = effective_messages - .into_iter() - .map(|message| Message { - role: message.role, - content: MessageContent::from_chat_text(&message.content), - }) - .collect(); - - let request = ApiChatRequest { - model: model.to_string(), - messages: api_messages, - temperature: self.effective_temperature(model, temperature), - stream: Some(options.enabled), - tools: None, - tool_choice: None, - }; - - let url = self.chat_completions_url(); - let client = self.http_client(); - let auth_header = self.auth_header.clone(); - let extra_headers = self.extra_headers.clone(); - let openrouter_attribution_headers = self.openrouter_attribution_headers(); - let provider_name = self.name.clone(); - let model_owned = model.to_string(); - - let (tx, rx) = tokio::sync::mpsc::channel::>(100); - - tokio::spawn(async move { - let mut req_builder = client.post(&url).json(&request); - req_builder = match (&auth_header, credential.as_deref()) { - (AuthStyle::None, _) | (_, None) => req_builder, - (AuthStyle::Bearer, Some(credential)) => { - req_builder.header("Authorization", format!("Bearer {credential}")) - } - (AuthStyle::XApiKey, Some(credential)) => { - req_builder.header("x-api-key", credential) - } - (AuthStyle::Anthropic, Some(credential)) => req_builder - .header("x-api-key", credential) - .header("anthropic-version", "2023-06-01"), - (AuthStyle::Custom(header), Some(credential)) => { - req_builder.header(header, credential) - } - }; - for (name, value) in &extra_headers { - req_builder = req_builder.header(name.as_str(), value.as_str()); - } - if let Some((referer, title)) = openrouter_attribution_headers { - req_builder = req_builder - .header("HTTP-Referer", referer) - .header("X-OpenRouter-Title", title); - } - req_builder = req_builder.header("Accept", "text/event-stream"); - - let response = match req_builder.send().await { - Ok(response) => response, - Err(error) => { - crate::core::observability::report_error( - error.to_string().as_str(), - "llm_provider", - "stream_chat_history", - &[ - ("provider", provider_name.as_str()), - ("model", model_owned.as_str()), - ("failure", "transport"), - ], - ); - let _ = tx.send(Err(StreamError::Http(error))).await; - return; - } - }; - - if !response.status().is_success() { - let status = response.status(); - let status_str = status.as_u16().to_string(); - let raw_error = match response.text().await { - Ok(error) => error, - Err(_) => format!("HTTP error: {status}"), - }; - let sanitized_error = super::sanitize_api_error(&raw_error); - let message = format!("{status}: {sanitized_error}"); - if super::is_budget_exhausted_http_400(status, &raw_error) { - super::log_budget_exhausted_http_400( - "stream_chat_history", - provider_name.as_str(), - Some(model_owned.as_str()), - status, - ); - } else if super::is_custom_openai_upstream_bad_request_http_400( - provider_name.as_str(), - status, - &raw_error, - ) { - super::log_custom_openai_upstream_bad_request_http_400( - "stream_chat_history", - provider_name.as_str(), - Some(model_owned.as_str()), - status, - ); - } else if super::is_provider_access_policy_denied_http_403(status, &raw_error) { - super::log_provider_access_policy_denied_http_403( - "stream_chat_history", - provider_name.as_str(), - Some(model_owned.as_str()), - status, - ); - } else if super::is_provider_config_rejection_http( - status, - provider_name.as_str(), - &raw_error, - ) { - super::log_provider_config_rejection( - "stream_chat_history", - provider_name.as_str(), - Some(model_owned.as_str()), - status, - ); - } else if super::should_report_provider_http_failure(status) { - crate::core::observability::report_error( - message.as_str(), - "llm_provider", - "stream_chat_history", - &[ - ("provider", provider_name.as_str()), - ("model", model_owned.as_str()), - ("status", status_str.as_str()), - ("failure", "non_2xx"), - ], - ); - } - let _ = tx.send(Err(StreamError::Provider(message))).await; - return; - } - - let mut chunk_stream = sse_bytes_to_chunks(response, options.count_tokens); - while let Some(chunk) = chunk_stream.next().await { - if tx.send(chunk).await.is_err() { - break; - } - } - }); - - stream::unfold(rx, |mut rx| async move { - rx.recv().await.map(|chunk| (chunk, rx)) - }) - .boxed() - } - - async fn warmup(&self) -> anyhow::Result<()> { - if let Some(credential) = self.credential.as_ref() { - // Hit the chat completions URL with a GET to establish the connection pool. - // The server will likely return 405 Method Not Allowed, which is fine - - // the goal is TLS handshake and HTTP/2 negotiation. - let url = self.chat_completions_url(); - let _ = self - .apply_auth_header(self.http_client().get(&url), Some(credential.as_str())) - .send() - .await?; - } - Ok(()) - } } #[cfg(test)] diff --git a/src/openhuman/inference/provider/compatible_helpers.rs b/src/openhuman/inference/provider/compatible_helpers.rs new file mode 100644 index 0000000000..adbaab4b05 --- /dev/null +++ b/src/openhuman/inference/provider/compatible_helpers.rs @@ -0,0 +1,634 @@ +use crate::openhuman::inference::provider::traits::{ + ChatMessage, ChatResponse as ProviderChatResponse, ToolCall as ProviderToolCall, + UsageInfo as ProviderUsageInfo, +}; + +use super::compatible_parse::{ + aggregate_responses_sse_body, build_responses_prompt, extract_responses_text, + normalize_function_arguments, parse_responses_response_body, + parse_tool_calls_from_content_json, +}; +use super::compatible_types::{ + ApiChatResponse, Message, MessageContent, NativeChatRequest, NativeMessage, ResponsesRequest, + ToolCall, +}; +use super::OpenAiCompatibleProvider; + +impl OpenAiCompatibleProvider { + pub(super) async fn chat_via_responses( + &self, + credential: Option<&str>, + messages: &[ChatMessage], + model: &str, + ) -> anyhow::Result { + let (instructions, input) = build_responses_prompt(messages); + if input.is_empty() { + anyhow::bail!( + "{} Responses API fallback requires at least one non-system message", + self.name + ); + } + + // #3201: the Codex/ChatGPT OAuth Responses endpoint rejects `stream: false` + // outright. This branch lifts the constraint for that endpoint specifically + // and parses the resulting SSE body so the existing non-streaming call + // signature is preserved. Other providers keep the single-envelope path. + let is_codex_oauth_responses = reqwest::Url::parse(&self.base_url) + .ok() + .and_then(|url| { + let segments: Vec<&str> = url.path_segments()?.collect(); + Some( + segments + .windows(2) + .any(|window| window == ["backend-api", "codex"]), + ) + }) + .unwrap_or(false); + + let request = ResponsesRequest { + model: model.to_string(), + input, + instructions, + stream: Some(is_codex_oauth_responses), + store: Some(false), + }; + + let url = self.responses_url(); + + let response = self + .apply_auth_header(self.http_client().post(&url).json(&request), credential) + .send() + .await?; + + if !response.status().is_success() { + let status = response.status(); + let status_str = status.as_u16().to_string(); + let error = response.text().await?; + let sanitized = super::super::sanitize_api_error(&error); + let message = format!("{} Responses API error: {sanitized}", self.name); + if super::super::is_budget_exhausted_http_400(status, &error) { + super::super::log_budget_exhausted_http_400( + "responses_api", + self.name.as_str(), + Some(model), + status, + ); + } else if super::super::is_custom_openai_upstream_bad_request_http_400( + self.name.as_str(), + status, + &error, + ) { + super::super::log_custom_openai_upstream_bad_request_http_400( + "responses_api", + self.name.as_str(), + Some(model), + status, + ); + } else if super::super::is_provider_access_policy_denied_http_403(status, &error) { + super::super::log_provider_access_policy_denied_http_403( + "responses_api", + self.name.as_str(), + Some(model), + status, + ); + } else if super::super::is_provider_config_rejection_http( + status, + self.name.as_str(), + &error, + ) { + super::super::log_provider_config_rejection( + "responses_api", + self.name.as_str(), + Some(model), + status, + ); + } else if super::super::should_report_provider_http_failure(status) { + crate::core::observability::report_error( + message.as_str(), + "llm_provider", + "responses_api", + &[ + ("provider", self.name.as_str()), + ("model", model), + ("status", status_str.as_str()), + ("failure", "non_2xx"), + ], + ); + } + anyhow::bail!(message); + } + + let body = response.text().await?; + if is_codex_oauth_responses { + return aggregate_responses_sse_body(&self.name, &body); + } + let responses = parse_responses_response_body(&self.name, &body)?; + + extract_responses_text(responses) + .ok_or_else(|| anyhow::anyhow!("No response from {} Responses API", self.name)) + } + + pub(super) fn convert_tool_specs( + tools: Option<&[crate::openhuman::tools::ToolSpec]>, + ) -> Option> { + tools.map(|items| { + let mut seen: std::collections::HashSet<&str> = + std::collections::HashSet::with_capacity(items.len()); + let mut dropped: Vec<&str> = Vec::new(); + let mut out: Vec = Vec::with_capacity(items.len()); + for tool in items { + if !seen.insert(tool.name.as_str()) { + dropped.push(tool.name.as_str()); + continue; + } + out.push(serde_json::json!({ + "type": "function", + "function": { + "name": tool.name, + "description": tool.description, + "parameters": tool.parameters, + } + })); + } + if !dropped.is_empty() { + log::warn!( + "[providers][compatible] dropped {} duplicate tool spec(s) at wire \ + boundary (TAURI-RUST-2E): {:?}", + dropped.len(), + dropped + ); + } + out + }) + } + + pub(super) fn convert_messages_for_native(messages: &[ChatMessage]) -> Vec { + let converted: Vec = + messages + .iter() + .map(|message| { + let reasoning_content = if message.role == "assistant" { + message + .extra_metadata + .as_ref() + .and_then(|m| m.get("reasoning_content")) + .and_then(serde_json::Value::as_str) + .map(ToString::to_string) + } else { + None + }; + + if message.role == "assistant" { + if let Ok(value) = + serde_json::from_str::(&message.content) + { + if let Some(tool_calls_value) = value.get("tool_calls") { + if let Ok(parsed_calls) = + serde_json::from_value::>( + tool_calls_value.clone(), + ) + { + let tool_calls = parsed_calls + .into_iter() + .map(|tc| ToolCall { + id: Some(tc.id), + kind: Some("function".to_string()), + function: Some(super::compatible_types::Function { + name: Some(tc.name), + arguments: Some(serde_json::Value::String( + tc.arguments, + )), + }), + }) + .collect::>(); + + let content = Some(MessageContent::Text( + value + .get("content") + .and_then(serde_json::Value::as_str) + .unwrap_or("") + .to_string(), + )); + + let reasoning_content = value + .get("reasoning_content") + .and_then(serde_json::Value::as_str) + .filter(|s| !s.trim().is_empty()) + .map(ToString::to_string) + .or_else(|| reasoning_content.clone()); + + return NativeMessage { + role: "assistant".to_string(), + content, + tool_call_id: None, + tool_calls: Some(tool_calls), + reasoning_content, + }; + } + } + } + } + + if message.role == "tool" { + if let Ok(value) = + serde_json::from_str::(&message.content) + { + let tool_call_id = value + .get("tool_call_id") + .and_then(serde_json::Value::as_str) + .map(ToString::to_string); + let content = value + .get("content") + .and_then(serde_json::Value::as_str) + .map(ToString::to_string) + .or_else(|| Some(message.content.clone())) + .map(MessageContent::Text); + + return NativeMessage { + role: "tool".to_string(), + content, + tool_call_id, + tool_calls: None, + reasoning_content: None, + }; + } + } + + NativeMessage { + role: message.role.clone(), + content: Some(MessageContent::from_chat_text(&message.content)), + tool_call_id: None, + tool_calls: None, + reasoning_content, + } + }) + .collect(); + + Self::enforce_tool_message_invariants(converted) + } + + /// Enforce the OpenAI-compatible tool-message ordering invariants on the + /// fully-serialized wire array, immediately before it goes on the wire. + /// + /// Several upstream defects can leave the array malformed and trip a 400 + /// (`messages with role 'tool' must be a response to a preceding message + /// with 'tool_calls'`). That 400 streams back as an empty completion, which + /// the agent loop collapses to "The model returned an empty response" and + /// the chat surface shows as a generic "Something went wrong": + /// + /// * **(A)** History tail-trimming cuts *between* an `assistant(tool_calls)` + /// and its `tool` result, dropping the assistant and orphaning the result. + /// * **(B)** A persisted assistant tool-call message whose `content` no + /// longer deserializes as `tool_calls` (format drift) falls through and + /// is emitted as plain text with its `tool_calls` stripped. + /// * **(C)** An `assistant(tool_calls)` whose results never arrived leaves + /// dangling tool-call ids with no matching `tool` response. + pub(super) fn enforce_tool_message_invariants( + messages: Vec, + ) -> Vec { + use std::collections::HashSet; + + let mut out: Vec = Vec::with_capacity(messages.len()); + let mut dropped_orphans = 0usize; + let mut pruned_calls = 0usize; + + let mut iter = messages.into_iter().peekable(); + while let Some(mut msg) = iter.next() { + if msg.role == "assistant" && msg.tool_calls.is_some() { + let mut run: Vec = Vec::new(); + while iter.peek().is_some_and(|m| m.role == "tool") { + run.push(iter.next().expect("peeked tool message")); + } + let responded: HashSet = + run.iter().filter_map(|t| t.tool_call_id.clone()).collect(); + + let calls = msg.tool_calls.take().unwrap_or_default(); + let before = calls.len(); + let kept: Vec = calls + .into_iter() + .filter(|c| c.id.as_deref().is_some_and(|id| responded.contains(id))) + .collect(); + pruned_calls += before - kept.len(); + let kept_ids: HashSet = kept.iter().filter_map(|c| c.id.clone()).collect(); + msg.tool_calls = if kept.is_empty() { None } else { Some(kept) }; + if msg.tool_calls.is_none() { + msg.reasoning_content = None; + } + out.push(msg); + + for tool_msg in run { + let kept = tool_msg + .tool_call_id + .as_deref() + .is_some_and(|id| kept_ids.contains(id)); + if kept { + out.push(tool_msg); + } else { + dropped_orphans += 1; + } + } + } else if msg.role == "tool" { + dropped_orphans += 1; + } else { + out.push(msg); + } + } + + if dropped_orphans > 0 || pruned_calls > 0 { + log::warn!( + "[provider] sanitized malformed tool-message ordering before send: \ + dropped {dropped_orphans} orphaned tool result(s), pruned {pruned_calls} \ + unanswered tool_call(s)" + ); + } + + out + } + + pub(super) fn with_prompt_guided_tool_instructions( + messages: &[ChatMessage], + tools: Option<&[crate::openhuman::tools::ToolSpec]>, + ) -> Vec { + let Some(tools) = tools else { + return messages.to_vec(); + }; + + if tools.is_empty() { + return messages.to_vec(); + } + + let instructions = + crate::openhuman::inference::provider::traits::build_tool_instructions_text(tools); + let mut modified_messages = messages.to_vec(); + + if let Some(system_message) = modified_messages.iter_mut().find(|m| m.role == "system") { + if !system_message.content.is_empty() { + system_message.content.push_str("\n\n"); + } + system_message.content.push_str(&instructions); + } else { + modified_messages.insert(0, ChatMessage::system(instructions)); + } + + modified_messages + } + + pub(super) fn parse_native_response( + api_response: ApiChatResponse, + provider_name: &str, + ) -> anyhow::Result { + let usage = Self::extract_usage(&api_response); + + let message = api_response + .choices + .into_iter() + .next() + .map(|c| c.message) + .ok_or_else(|| anyhow::anyhow!("No choices in response from {}", provider_name))?; + + let mut text = message.effective_content_optional(); + let reasoning_content = message + .reasoning_content + .as_deref() + .map(str::trim) + .filter(|s| !s.is_empty()) + .map(str::to_owned); + let mut tool_calls = message + .tool_calls + .unwrap_or_default() + .into_iter() + .filter_map(|tc| { + let function = tc.function?; + let name = function.name?; + let arguments = normalize_function_arguments(function.arguments); + Some(ProviderToolCall { + id: tc.id.unwrap_or_else(|| uuid::Uuid::new_v4().to_string()), + name, + arguments, + }) + }) + .collect::>(); + + if tool_calls.is_empty() { + if let Some(function) = message.function_call.as_ref() { + if let Some(name) = function + .name + .as_ref() + .filter(|name| !name.trim().is_empty()) + { + tool_calls.push(ProviderToolCall { + id: uuid::Uuid::new_v4().to_string(), + name: name.clone(), + arguments: normalize_function_arguments(function.arguments.clone()), + }); + } + } + } + + if let Some(content) = message.content.as_deref() { + if let Some((json_text, json_tool_calls)) = parse_tool_calls_from_content_json(content) + { + if !json_tool_calls.is_empty() { + tool_calls = json_tool_calls; + text = json_text.or(text); + } + } + } + + tracing::debug!( + has_reasoning_content = reasoning_content.is_some(), + reasoning_content_chars = reasoning_content.as_ref().map_or(0, |r| r.chars().count()), + "[provider:parse_native_response] reasoning_content capture" + ); + + Ok(ProviderChatResponse { + text, + tool_calls, + usage, + reasoning_content, + }) + } + + /// Extract usage info from API response, preferring the OpenHuman + /// metadata block (which includes cache stats and billing) over the + /// standard OpenAI usage block. + pub(super) fn extract_usage(resp: &ApiChatResponse) -> Option { + let oh = resp.openhuman.as_ref(); + let std_usage = resp.usage.as_ref(); + + if oh.is_none() && std_usage.is_none() { + return None; + } + + let oh_usage = oh.and_then(|o| o.usage.as_ref()); + let oh_billing = oh.and_then(|o| o.billing.as_ref()); + + let input_tokens = oh_usage + .and_then(|u| u.input_tokens) + .or(std_usage.map(|u| u.prompt_tokens)) + .unwrap_or(0); + let output_tokens = oh_usage + .and_then(|u| u.output_tokens) + .or(std_usage.map(|u| u.completion_tokens)) + .unwrap_or(0); + let cached_input_tokens = oh_usage + .and_then(|u| u.cached_input_tokens) + .or(std_usage + .and_then(|u| u.prompt_tokens_details.as_ref()) + .map(|d| d.cached_tokens)) + .unwrap_or(0); + let charged_amount_usd = oh_billing.map(|b| b.charged_amount_usd).unwrap_or(0.0); + + let from_openhuman = oh_usage.is_some(); + let from_standard = std_usage.is_some() && !from_openhuman; + let has_billing = oh_billing.is_some(); + tracing::debug!( + from_openhuman, + from_standard, + has_billing, + input_tokens, + output_tokens, + cached_input_tokens, + charged_amount_usd, + "[provider:usage] extract_usage resolved token counts" + ); + + Some(ProviderUsageInfo { + input_tokens, + output_tokens, + context_window: 0, + cached_input_tokens, + charged_amount_usd, + }) + } + + pub(super) fn is_native_tool_schema_unsupported( + status: reqwest::StatusCode, + error: &str, + ) -> bool { + if !matches!( + status, + reqwest::StatusCode::BAD_REQUEST | reqwest::StatusCode::UNPROCESSABLE_ENTITY + ) { + return false; + } + + let lower = error.to_lowercase(); + [ + "unknown parameter: tools", + "unsupported parameter: tools", + "unrecognized field `tools`", + "does not support tools", + "function calling is not supported", + "tool_choice", + ] + .iter() + .any(|hint| lower.contains(hint)) + } + + pub(super) fn err_supports_no_tools_retry(error: &str) -> bool { + Self::is_native_tool_schema_unsupported(reqwest::StatusCode::BAD_REQUEST, error) + } + + /// Detect a provider rejecting the `frequency_penalty` sampling field. Some + /// strict OpenAI-compatible backends 400 on unknown params; when this fires + /// the caller retries once with the field omitted (mirrors the no-tools + /// retry). String-based because the streamed transport error surfaces the + /// API error body. + pub(super) fn err_indicates_frequency_penalty_unsupported(error: &str) -> bool { + let lower = error.to_lowercase(); + lower.contains("frequency_penalty") + && (lower.contains("unsupported") + || lower.contains("unknown") + || lower.contains("unrecognized") + || lower.contains("not supported") + || lower.contains("does not support") + || lower.contains("invalid") + || lower.contains("unexpected")) + } + + /// Detect a 404 whose body says the model is completion-only. See issue #3193. + pub(super) fn is_completion_only_model_404(status: reqwest::StatusCode, error: &str) -> bool { + if status != reqwest::StatusCode::NOT_FOUND { + return false; + } + let lower = error.to_lowercase(); + lower.contains("not a chat model") + || (lower.contains("v1/chat/completions") && lower.contains("v1/completions")) + } + + /// Detect a model rejected because it has no chat capability. See Sentry TAURI-RUST-4P6. + pub(super) fn is_not_chat_capable_model(status: reqwest::StatusCode, error: &str) -> bool { + if !matches!( + status, + reqwest::StatusCode::BAD_REQUEST | reqwest::StatusCode::UNPROCESSABLE_ENTITY + ) { + return false; + } + error.to_lowercase().contains("does not support chat") + } + + pub(super) fn completion_only_model_message(&self, model: &str, sanitized: &str) -> String { + format!( + "{name} API error (404): model '{model}' does not support the \ + chat-completions API that OpenHuman uses — it appears to be a \ + completion-only / base model. Assign a chat-capable model to this \ + provider (e.g. in Settings → AI), or pick a different model. \ + Provider detail: {sanitized}", + name = self.name, + ) + } + + /// Guard shared by every chat-completions 404 handler. See issue #3193. + pub(super) fn completion_only_404_guard( + &self, + status: reqwest::StatusCode, + sanitized: &str, + model: &str, + ) -> Option { + if Self::is_completion_only_model_404(status, sanitized) { + Some(anyhow::anyhow!( + self.completion_only_model_message(model, sanitized) + )) + } else { + None + } + } + + pub(super) fn not_chat_capable_model_message(&self, model: &str, sanitized: &str) -> String { + format!( + "{name} API error: model '{model}' does not support chat — it \ + appears to be an embedding or non-chat model. Assign a \ + chat-capable model to this provider (e.g. in Settings → AI), or \ + pick a different model. Provider detail: {sanitized}", + name = self.name, + ) + } + + /// Guard shared by every chat-completions error handler. See Sentry TAURI-RUST-4P6. + pub(super) fn not_chat_capable_guard( + &self, + status: reqwest::StatusCode, + sanitized: &str, + model: &str, + ) -> Option { + if Self::is_not_chat_capable_model(status, sanitized) { + Some(anyhow::anyhow!( + self.not_chat_capable_model_message(model, sanitized) + )) + } else { + None + } + } + + pub(super) fn enrich_404_message(&self, base: String, status: reqwest::StatusCode) -> String { + if status == reqwest::StatusCode::NOT_FOUND && !self.supports_responses_fallback { + format!( + "{base}; check that your endpoint URL is correct \ + and the model name exists on your provider" + ) + } else { + base + } + } +} diff --git a/src/openhuman/inference/provider/compatible_provider_impl.rs b/src/openhuman/inference/provider/compatible_provider_impl.rs new file mode 100644 index 0000000000..a855332708 --- /dev/null +++ b/src/openhuman/inference/provider/compatible_provider_impl.rs @@ -0,0 +1,1127 @@ +use crate::openhuman::inference::provider::traits::{ + ChatMessage, ChatRequest as ProviderChatRequest, ChatResponse as ProviderChatResponse, + Provider, StreamChunk, StreamError, StreamOptions, StreamResult, ToolCall as ProviderToolCall, +}; +use async_trait::async_trait; +use futures_util::{stream, StreamExt}; + +use super::compatible_dump::{dump_prompt_if_enabled, dump_response_if_enabled, reserve_dump_seq}; +use super::compatible_parse::normalize_function_arguments; +use super::compatible_repeat::CHAT_FREQUENCY_PENALTY; +use super::compatible_stream::sse_bytes_to_chunks; +use super::compatible_types::{ + ApiChatRequest, ApiChatResponse, Message, MessageContent, NativeChatRequest, + OpenAiStreamOptions, +}; +use super::{AuthStyle, OpenAiCompatibleProvider}; + +#[async_trait] +impl Provider for OpenAiCompatibleProvider { + fn capabilities(&self) -> crate::openhuman::inference::provider::traits::ProviderCapabilities { + crate::openhuman::inference::provider::traits::ProviderCapabilities { + native_tool_calling: self.native_tool_calling, + // Kept `false` for now — vision is a per-*model* property the provider + // can't know here; the Responses-API path is still text-only. Stays off + // until it can be driven per-model (e.g. from `model_registry.vision`). + vision: false, + } + } + + async fn chat_with_system( + &self, + system_prompt: Option<&str>, + message: &str, + model: &str, + temperature: f64, + ) -> anyhow::Result { + let credential = self.credential_for_request()?; + + let mut messages = Vec::new(); + + if self.merge_system_into_user { + let content = match system_prompt { + Some(sys) => format!("{sys}\n\n{message}"), + None => message.to_string(), + }; + messages.push(Message { + role: "user".to_string(), + content: MessageContent::from_chat_text(&content), + }); + } else { + if let Some(sys) = system_prompt { + messages.push(Message { + role: "system".to_string(), + content: sys.into(), + }); + } + messages.push(Message { + role: "user".to_string(), + content: MessageContent::from_chat_text(message), + }); + } + + let request = ApiChatRequest { + model: model.to_string(), + messages, + temperature: self.effective_temperature(model, temperature), + stream: Some(false), + tools: None, + tool_choice: None, + }; + + let url = self.chat_completions_url(); + + let mut fallback_messages = Vec::new(); + if let Some(system_prompt) = system_prompt { + fallback_messages.push(ChatMessage::system(system_prompt)); + } + fallback_messages.push(ChatMessage::user(message)); + let fallback_messages = if self.merge_system_into_user { + Self::flatten_system_messages(&fallback_messages) + } else { + fallback_messages + }; + + if self.responses_api_primary { + return self + .chat_via_responses(credential, &fallback_messages, model) + .await; + } + + let response = match self + .apply_auth_header(self.http_client().post(&url).json(&request), credential) + .send() + .await + { + Ok(response) => response, + Err(chat_error) => { + if self.supports_responses_fallback { + let detail = super::super::format_error_chain(&chat_error); + return self + .chat_via_responses(credential, &fallback_messages, model) + .await + .map_err(|responses_err| { + let fb = super::super::format_anyhow_chain(&responses_err); + anyhow::anyhow!( + "{} chat completions transport error: {detail} (responses fallback failed: {fb})", + self.name + ) + }); + } + + return Err(chat_error.into()); + } + }; + + if !response.status().is_success() { + let status = response.status(); + let error = response.text().await?; + let sanitized = super::super::sanitize_api_error(&error); + + if let Some(err) = self.completion_only_404_guard(status, &sanitized, model) { + return Err(err); + } + + if let Some(err) = self.not_chat_capable_guard(status, &sanitized, model) { + return Err(err); + } + + if status == reqwest::StatusCode::NOT_FOUND && self.supports_responses_fallback { + return self + .chat_via_responses(credential, &fallback_messages, model) + .await + .map_err(|responses_err| { + let fb = super::super::format_anyhow_chain(&responses_err); + anyhow::anyhow!( + "{} API error ({status}): {sanitized} (chat completions unavailable; responses fallback failed: {fb})", + self.name + ) + }); + } + + let status_str = status.as_u16().to_string(); + let message = self.enrich_404_message( + format!("{} API error ({status}): {sanitized}", self.name), + status, + ); + if super::super::is_backend_auth_failure(self.name.as_str(), status) { + super::super::publish_backend_session_expired( + "chat_completions", + self.name.as_str(), + status, + &message, + ); + } else if super::super::is_budget_exhausted_http_400(status, &error) { + super::super::log_budget_exhausted_http_400( + "chat_completions", + self.name.as_str(), + Some(model), + status, + ); + } else if super::super::is_custom_openai_upstream_bad_request_http_400( + self.name.as_str(), + status, + &error, + ) { + super::super::log_custom_openai_upstream_bad_request_http_400( + "chat_completions", + self.name.as_str(), + Some(model), + status, + ); + } else if super::super::is_provider_access_policy_denied_http_403(status, &error) { + super::super::log_provider_access_policy_denied_http_403( + "chat_completions", + self.name.as_str(), + Some(model), + status, + ); + } else if super::super::is_provider_config_rejection_http( + status, + self.name.as_str(), + &error, + ) { + super::super::log_provider_config_rejection( + "chat_completions", + self.name.as_str(), + Some(model), + status, + ); + } else if super::super::should_report_provider_http_failure(status) { + crate::core::observability::report_error( + message.as_str(), + "llm_provider", + "chat_completions", + &[ + ("provider", self.name.as_str()), + ("model", model), + ("status", status_str.as_str()), + ("failure", "non_2xx"), + ], + ); + } + anyhow::bail!(message); + } + + let body = response.text().await?; + let chat_response = super::compatible_parse::parse_chat_response_body(&self.name, &body)?; + + chat_response + .choices + .into_iter() + .next() + .map(|c| { + if c.message.tool_calls.is_some() + && c.message.tool_calls.as_ref().is_some_and(|t| !t.is_empty()) + { + serde_json::to_string(&c.message) + .unwrap_or_else(|_| c.message.effective_content()) + } else { + c.message.effective_content() + } + }) + .ok_or_else(|| anyhow::anyhow!("No response from {}", self.name)) + } + + async fn chat_with_history( + &self, + messages: &[ChatMessage], + model: &str, + temperature: f64, + ) -> anyhow::Result { + let credential = self.credential_for_request()?; + + let effective_messages = if self.merge_system_into_user { + Self::flatten_system_messages(messages) + } else { + messages.to_vec() + }; + let api_messages: Vec = effective_messages + .iter() + .map(|m| Message { + role: m.role.clone(), + content: MessageContent::from_chat_text(&m.content), + }) + .collect(); + + let request = ApiChatRequest { + model: model.to_string(), + messages: api_messages, + temperature: self.effective_temperature(model, temperature), + stream: Some(false), + tools: None, + tool_choice: None, + }; + + let url = self.chat_completions_url(); + if self.responses_api_primary { + return self + .chat_via_responses(credential, &effective_messages, model) + .await; + } + + let response = match self + .apply_auth_header(self.http_client().post(&url).json(&request), credential) + .send() + .await + { + Ok(response) => response, + Err(chat_error) => { + if self.supports_responses_fallback { + let detail = super::super::format_error_chain(&chat_error); + return self + .chat_via_responses(credential, &effective_messages, model) + .await + .map_err(|responses_err| { + let fb = super::super::format_anyhow_chain(&responses_err); + anyhow::anyhow!( + "{} chat completions transport error: {detail} (responses fallback failed: {fb})", + self.name + ) + }); + } + + return Err(chat_error.into()); + } + }; + + if !response.status().is_success() { + let status = response.status(); + + if status == reqwest::StatusCode::NOT_FOUND { + let error = response.text().await?; + let sanitized = super::super::sanitize_api_error(&error); + + if let Some(err) = self.completion_only_404_guard(status, &sanitized, model) { + return Err(err); + } + + if self.supports_responses_fallback { + return self + .chat_via_responses(credential, &effective_messages, model) + .await + .map_err(|responses_err| { + let fb = super::super::format_anyhow_chain(&responses_err); + anyhow::anyhow!( + "{} API error ({status}): {sanitized} (chat completions unavailable; responses fallback failed: {fb})", + self.name + ) + }); + } + + let enriched = self.enrich_404_message( + format!("{} API error ({status}): {sanitized}", self.name), + status, + ); + return Err(anyhow::anyhow!("{enriched}")); + } + + let err = super::super::api_error(&self.name, response).await; + let err_str = err.to_string(); + if Self::is_not_chat_capable_model(status, &err_str) { + return Err(anyhow::anyhow!( + self.not_chat_capable_model_message(model, &err_str) + )); + } + let enriched = self.enrich_404_message(format!("{err:#}"), status); + return Err(anyhow::anyhow!("{enriched}")); + } + + let body = response.text().await?; + let chat_response = super::compatible_parse::parse_chat_response_body(&self.name, &body)?; + + chat_response + .choices + .into_iter() + .next() + .map(|c| { + if c.message.tool_calls.is_some() + && c.message.tool_calls.as_ref().is_some_and(|t| !t.is_empty()) + { + serde_json::to_string(&c.message) + .unwrap_or_else(|_| c.message.effective_content()) + } else { + c.message.effective_content() + } + }) + .ok_or_else(|| anyhow::anyhow!("No response from {}", self.name)) + } + + async fn chat_with_tools( + &self, + messages: &[ChatMessage], + tools: &[serde_json::Value], + model: &str, + temperature: f64, + ) -> anyhow::Result { + let credential = self.credential_for_request()?; + + let effective_messages = if self.merge_system_into_user { + Self::flatten_system_messages(messages) + } else { + messages.to_vec() + }; + let api_messages: Vec = effective_messages + .iter() + .map(|m| Message { + role: m.role.clone(), + content: MessageContent::from_chat_text(&m.content), + }) + .collect(); + + let request = ApiChatRequest { + model: model.to_string(), + messages: api_messages, + temperature: self.effective_temperature(model, temperature), + stream: Some(false), + tools: if tools.is_empty() { + None + } else { + Some(tools.to_vec()) + }, + tool_choice: if tools.is_empty() { + None + } else { + Some("auto".to_string()) + }, + }; + + let url = self.chat_completions_url(); + let response = match self + .apply_auth_header(self.http_client().post(&url).json(&request), credential) + .send() + .await + { + Ok(response) => response, + Err(error) => { + tracing::warn!( + "{} native tool call transport failed: {error}; falling back to history path", + self.name + ); + let text = self.chat_with_history(messages, model, temperature).await?; + return Ok(ProviderChatResponse { + text: Some(text), + tool_calls: vec![], + usage: None, + reasoning_content: None, + }); + } + }; + + if !response.status().is_success() { + return Err(super::super::api_error(&self.name, response).await); + } + + let body = response.text().await?; + let chat_response = super::compatible_parse::parse_chat_response_body(&self.name, &body)?; + let usage = Self::extract_usage(&chat_response); + let choice = chat_response + .choices + .into_iter() + .next() + .ok_or_else(|| anyhow::anyhow!("No response from {}", self.name))?; + + let text = choice.message.effective_content_optional(); + let reasoning_content = choice + .message + .reasoning_content + .as_deref() + .map(str::trim) + .filter(|s| !s.is_empty()) + .map(ToString::to_string); + let tool_calls = choice + .message + .tool_calls + .unwrap_or_default() + .into_iter() + .filter_map(|tc| { + let function = tc.function?; + let name = function.name?; + let arguments = normalize_function_arguments(function.arguments); + Some(ProviderToolCall { + id: tc.id.unwrap_or_else(|| uuid::Uuid::new_v4().to_string()), + name, + arguments, + }) + }) + .collect::>(); + + tracing::debug!( + has_reasoning_content = reasoning_content.is_some(), + reasoning_content_chars = reasoning_content.as_ref().map_or(0, |r| r.chars().count()), + tool_calls = tool_calls.len(), + "[provider:chat] reasoning_content capture (non-streaming)" + ); + + Ok(ProviderChatResponse { + text, + tool_calls, + usage, + reasoning_content, + }) + } + + async fn chat( + &self, + request: ProviderChatRequest<'_>, + model: &str, + temperature: f64, + ) -> anyhow::Result { + let credential = self.credential_for_request()?; + + let tools = Self::convert_tool_specs(request.tools); + let effective_messages = if self.merge_system_into_user { + Self::flatten_system_messages(request.messages) + } else { + request.messages.to_vec() + }; + + if self.responses_api_primary { + let response_messages = if request.tools.is_some() { + Self::with_prompt_guided_tool_instructions(request.messages, request.tools) + } else { + effective_messages.clone() + }; + let text = self + .chat_via_responses(credential, &response_messages, model) + .await?; + if let Some(tx) = request.stream { + let _ = tx + .send( + crate::openhuman::inference::provider::ProviderDelta::TextDelta { + delta: text.clone(), + }, + ) + .await; + } + return Ok(ProviderChatResponse { + text: Some(text), + tool_calls: vec![], + usage: None, + reasoning_content: None, + }); + } + + if let Some(tx) = request.stream { + let native_request = NativeChatRequest { + model: model.to_string(), + messages: Self::convert_messages_for_native(&effective_messages), + temperature: self.effective_temperature(model, temperature), + stream: Some(true), + tool_choice: tools.as_ref().map(|_| "auto".to_string()), + tools: tools.clone(), + thread_id: self.outbound_thread_id(), + stream_options: Some(OpenAiStreamOptions { + include_usage: true, + }), + options: self.build_ollama_options(), + frequency_penalty: Some(CHAT_FREQUENCY_PENALTY), + }; + let stream_dump_seq = reserve_dump_seq(); + dump_prompt_if_enabled(&self.name, model, stream_dump_seq, &native_request); + match self + .stream_native_chat(credential, &native_request, tx, stream_dump_seq) + .await + { + Ok(resp) => return Ok(resp), + Err(err) => { + let err_str = err.to_string(); + if tools.is_some() && Self::err_supports_no_tools_retry(&err_str) { + log::info!( + "[stream] {} model does not support tools — retrying streaming without tools", + self.name, + ); + let retry_request = NativeChatRequest { + tools: None, + tool_choice: None, + ..native_request.clone() + }; + match self + .stream_native_chat(credential, &retry_request, tx, stream_dump_seq) + .await + { + Ok(resp) => return Ok(resp), + Err(retry_err) => { + log::warn!( + "[stream] {} retry without tools also failed, falling back to non-streaming: {}", + self.name, + retry_err + ); + } + } + } else if Self::err_indicates_frequency_penalty_unsupported(&err_str) { + log::info!( + "[stream] {} rejected frequency_penalty — retrying streaming without it", + self.name, + ); + let retry_request = NativeChatRequest { + frequency_penalty: None, + ..native_request.clone() + }; + match self + .stream_native_chat(credential, &retry_request, tx, stream_dump_seq) + .await + { + Ok(resp) => return Ok(resp), + Err(retry_err) => { + log::warn!( + "[stream] {} retry without frequency_penalty also failed, falling back to non-streaming: {}", + self.name, + retry_err + ); + } + } + } else { + log::warn!( + "[stream] {} streaming chat failed, falling back to non-streaming: {}", + self.name, + err + ); + } + } + } + } + + let thread_id = self.outbound_thread_id(); + log::debug!( + "[provider:{}] chat() outbound thread_id={} model={}", + self.name, + thread_id.as_deref().unwrap_or(""), + model + ); + let native_request = NativeChatRequest { + model: model.to_string(), + messages: Self::convert_messages_for_native(&effective_messages), + temperature: self.effective_temperature(model, temperature), + stream: Some(false), + tool_choice: tools.as_ref().map(|_| "auto".to_string()), + tools, + thread_id, + stream_options: None, + options: self.build_ollama_options(), + // The buffered non-streaming path omits `frequency_penalty` for maximum + // compatibility. The streaming path carries it and retries without on rejection. + frequency_penalty: None, + }; + let dump_seq = reserve_dump_seq(); + dump_prompt_if_enabled(&self.name, model, dump_seq, &native_request); + + let url = self.chat_completions_url(); + let response = match self + .apply_auth_header( + self.http_client().post(&url).json(&native_request), + credential, + ) + .send() + .await + { + Ok(response) => response, + Err(chat_error) => { + if self.supports_responses_fallback { + let detail = super::super::format_error_chain(&chat_error); + return self + .chat_via_responses(credential, &effective_messages, model) + .await + .map(|text| ProviderChatResponse { + text: Some(text), + tool_calls: vec![], + usage: None, + reasoning_content: None, + }) + .map_err(|responses_err| { + let fb = super::super::format_anyhow_chain(&responses_err); + anyhow::anyhow!( + "{} native chat transport error: {detail} (responses fallback failed: {fb})", + self.name + ) + }); + } + + return Err(chat_error.into()); + } + }; + + if !response.status().is_success() { + let status = response.status(); + let error = response.text().await?; + let sanitized = super::super::sanitize_api_error(&error); + + if Self::is_native_tool_schema_unsupported(status, &sanitized) { + let fallback_messages = + Self::with_prompt_guided_tool_instructions(request.messages, request.tools); + let text = self + .chat_with_history(&fallback_messages, model, temperature) + .await?; + return Ok(ProviderChatResponse { + text: Some(text), + tool_calls: vec![], + usage: None, + reasoning_content: None, + }); + } + + if let Some(err) = self.completion_only_404_guard(status, &sanitized, model) { + return Err(err); + } + + if let Some(err) = self.not_chat_capable_guard(status, &sanitized, model) { + return Err(err); + } + + if status == reqwest::StatusCode::NOT_FOUND && self.supports_responses_fallback { + return self + .chat_via_responses(credential, &effective_messages, model) + .await + .map(|text| ProviderChatResponse { + text: Some(text), + tool_calls: vec![], + usage: None, + reasoning_content: None, + }) + .map_err(|responses_err| { + let fb = super::super::format_anyhow_chain(&responses_err); + anyhow::anyhow!( + "{} API error ({status}): {sanitized} (chat completions unavailable; responses fallback failed: {fb})", + self.name + ) + }); + } + + let status_str = status.as_u16().to_string(); + let message = self.enrich_404_message( + format!("{} API error ({status}): {sanitized}", self.name), + status, + ); + if super::super::is_budget_exhausted_http_400(status, &error) { + super::super::log_budget_exhausted_http_400( + "native_chat", + self.name.as_str(), + Some(model), + status, + ); + } else if super::super::is_custom_openai_upstream_bad_request_http_400( + self.name.as_str(), + status, + &error, + ) { + super::super::log_custom_openai_upstream_bad_request_http_400( + "native_chat", + self.name.as_str(), + Some(model), + status, + ); + } else if super::super::is_provider_access_policy_denied_http_403(status, &error) { + super::super::log_provider_access_policy_denied_http_403( + "native_chat", + self.name.as_str(), + Some(model), + status, + ); + } else if super::super::is_provider_config_rejection_http( + status, + self.name.as_str(), + &error, + ) { + super::super::log_provider_config_rejection( + "native_chat", + self.name.as_str(), + Some(model), + status, + ); + } else if super::super::should_report_provider_http_failure(status) { + crate::core::observability::report_error( + message.as_str(), + "llm_provider", + "native_chat", + &[ + ("provider", self.name.as_str()), + ("model", model), + ("status", status_str.as_str()), + ("failure", "non_2xx"), + ], + ); + } + anyhow::bail!(message); + } + + let response_bytes = response.bytes().await?; + dump_response_if_enabled(&self.name, model, dump_seq, &response_bytes); + let native_response: ApiChatResponse = serde_json::from_slice(&response_bytes) + .map_err(|err| anyhow::anyhow!("{} response parse error: {err}", self.name))?; + Self::parse_native_response(native_response, &self.name) + } + + fn supports_native_tools(&self) -> bool { + self.native_tool_calling + } + + fn supports_streaming(&self) -> bool { + true + } + + fn stream_chat_with_system( + &self, + system_prompt: Option<&str>, + message: &str, + model: &str, + temperature: f64, + options: StreamOptions, + ) -> stream::BoxStream<'static, StreamResult> { + let credential = match self.credential_for_request() { + Ok(value) => value.map(str::to_string), + Err(err) => { + return stream::once(async move { Err(StreamError::Provider(err.to_string())) }) + .boxed(); + } + }; + + let mut messages = Vec::new(); + if let Some(sys) = system_prompt { + messages.push(Message { + role: "system".to_string(), + content: sys.into(), + }); + } + messages.push(Message { + role: "user".to_string(), + content: MessageContent::from_chat_text(message), + }); + + let request = ApiChatRequest { + model: model.to_string(), + messages, + temperature: self.effective_temperature(model, temperature), + stream: Some(options.enabled), + tools: None, + tool_choice: None, + }; + + let url = self.chat_completions_url(); + let client = self.http_client(); + let auth_header = self.auth_header.clone(); + let extra_headers = self.extra_headers.clone(); + let openrouter_attribution_headers = self.openrouter_attribution_headers(); + let provider_name = self.name.clone(); + let model_owned = model.to_string(); + + let (tx, rx) = tokio::sync::mpsc::channel::>(100); + + tokio::spawn(async move { + let mut req_builder = client.post(&url).json(&request); + + req_builder = match (&auth_header, credential.as_deref()) { + (AuthStyle::None, _) | (_, None) => req_builder, + (AuthStyle::Bearer, Some(credential)) => { + req_builder.header("Authorization", format!("Bearer {credential}")) + } + (AuthStyle::XApiKey, Some(credential)) => { + req_builder.header("x-api-key", credential) + } + (AuthStyle::Anthropic, Some(credential)) => req_builder + .header("x-api-key", credential) + .header("anthropic-version", "2023-06-01"), + (AuthStyle::Custom(header), Some(credential)) => { + req_builder.header(header, credential) + } + }; + + for (name, value) in &extra_headers { + req_builder = req_builder.header(name.as_str(), value.as_str()); + } + if let Some((referer, title)) = openrouter_attribution_headers { + req_builder = req_builder + .header("HTTP-Referer", referer) + .header("X-OpenRouter-Title", title); + } + + req_builder = req_builder.header("Accept", "text/event-stream"); + + let response = match req_builder.send().await { + Ok(r) => r, + Err(e) => { + crate::core::observability::report_error( + e.to_string().as_str(), + "llm_provider", + "stream_chat", + &[ + ("provider", provider_name.as_str()), + ("model", model_owned.as_str()), + ("failure", "transport"), + ], + ); + let _ = tx.send(Err(StreamError::Http(e))).await; + return; + } + }; + + if !response.status().is_success() { + let status = response.status(); + let status_str = status.as_u16().to_string(); + let raw_error = match response.text().await { + Ok(e) => e, + Err(_) => format!("HTTP error: {}", status), + }; + let sanitized_error = + crate::openhuman::inference::provider::sanitize_api_error(&raw_error); + let message = format!("{}: {}", status, sanitized_error); + if crate::openhuman::inference::provider::is_budget_exhausted_http_400( + status, &raw_error, + ) { + crate::openhuman::inference::provider::log_budget_exhausted_http_400( + "stream_chat", + provider_name.as_str(), + Some(model_owned.as_str()), + status, + ); + } else if crate::openhuman::inference::provider::is_custom_openai_upstream_bad_request_http_400( + provider_name.as_str(), + status, + &raw_error, + ) { + crate::openhuman::inference::provider::log_custom_openai_upstream_bad_request_http_400( + "stream_chat", + provider_name.as_str(), + Some(model_owned.as_str()), + status, + ); + } else if crate::openhuman::inference::provider::is_provider_access_policy_denied_http_403( + status, + &raw_error, + ) { + crate::openhuman::inference::provider::log_provider_access_policy_denied_http_403( + "stream_chat", + provider_name.as_str(), + Some(model_owned.as_str()), + status, + ); + } else if crate::openhuman::inference::provider::is_provider_config_rejection_http( + status, + provider_name.as_str(), + &raw_error, + ) { + crate::openhuman::inference::provider::log_provider_config_rejection( + "stream_chat", + provider_name.as_str(), + Some(model_owned.as_str()), + status, + ); + } else if crate::openhuman::inference::provider::should_report_provider_http_failure( + status, + ) { + crate::core::observability::report_error( + message.as_str(), + "llm_provider", + "stream_chat", + &[ + ("provider", provider_name.as_str()), + ("model", model_owned.as_str()), + ("status", status_str.as_str()), + ("failure", "non_2xx"), + ], + ); + } + let _ = tx.send(Err(StreamError::Provider(message))).await; + return; + } + + let mut chunk_stream = sse_bytes_to_chunks(response, options.count_tokens); + while let Some(chunk) = chunk_stream.next().await { + if tx.send(chunk).await.is_err() { + break; + } + } + }); + + stream::unfold(rx, |mut rx| async move { + rx.recv().await.map(|chunk| (chunk, rx)) + }) + .boxed() + } + + fn stream_chat_with_history( + &self, + messages: &[ChatMessage], + model: &str, + temperature: f64, + options: StreamOptions, + ) -> stream::BoxStream<'static, StreamResult> { + let credential = match self.credential_for_request() { + Ok(value) => value.map(str::to_string), + Err(err) => { + return stream::once(async move { Err(StreamError::Provider(err.to_string())) }) + .boxed(); + } + }; + + let effective_messages = if self.merge_system_into_user { + Self::flatten_system_messages(messages) + } else { + messages.to_vec() + }; + let api_messages = effective_messages + .into_iter() + .map(|message| Message { + role: message.role, + content: MessageContent::from_chat_text(&message.content), + }) + .collect(); + + let request = ApiChatRequest { + model: model.to_string(), + messages: api_messages, + temperature: self.effective_temperature(model, temperature), + stream: Some(options.enabled), + tools: None, + tool_choice: None, + }; + + let url = self.chat_completions_url(); + let client = self.http_client(); + let auth_header = self.auth_header.clone(); + let extra_headers = self.extra_headers.clone(); + let openrouter_attribution_headers = self.openrouter_attribution_headers(); + let provider_name = self.name.clone(); + let model_owned = model.to_string(); + + let (tx, rx) = tokio::sync::mpsc::channel::>(100); + + tokio::spawn(async move { + let mut req_builder = client.post(&url).json(&request); + req_builder = match (&auth_header, credential.as_deref()) { + (AuthStyle::None, _) | (_, None) => req_builder, + (AuthStyle::Bearer, Some(credential)) => { + req_builder.header("Authorization", format!("Bearer {credential}")) + } + (AuthStyle::XApiKey, Some(credential)) => { + req_builder.header("x-api-key", credential) + } + (AuthStyle::Anthropic, Some(credential)) => req_builder + .header("x-api-key", credential) + .header("anthropic-version", "2023-06-01"), + (AuthStyle::Custom(header), Some(credential)) => { + req_builder.header(header, credential) + } + }; + for (name, value) in &extra_headers { + req_builder = req_builder.header(name.as_str(), value.as_str()); + } + if let Some((referer, title)) = openrouter_attribution_headers { + req_builder = req_builder + .header("HTTP-Referer", referer) + .header("X-OpenRouter-Title", title); + } + req_builder = req_builder.header("Accept", "text/event-stream"); + + let response = match req_builder.send().await { + Ok(response) => response, + Err(error) => { + crate::core::observability::report_error( + error.to_string().as_str(), + "llm_provider", + "stream_chat_history", + &[ + ("provider", provider_name.as_str()), + ("model", model_owned.as_str()), + ("failure", "transport"), + ], + ); + let _ = tx.send(Err(StreamError::Http(error))).await; + return; + } + }; + + if !response.status().is_success() { + let status = response.status(); + let status_str = status.as_u16().to_string(); + let raw_error = match response.text().await { + Ok(error) => error, + Err(_) => format!("HTTP error: {status}"), + }; + let sanitized_error = + crate::openhuman::inference::provider::sanitize_api_error(&raw_error); + let message = format!("{status}: {sanitized_error}"); + if crate::openhuman::inference::provider::is_budget_exhausted_http_400( + status, &raw_error, + ) { + crate::openhuman::inference::provider::log_budget_exhausted_http_400( + "stream_chat_history", + provider_name.as_str(), + Some(model_owned.as_str()), + status, + ); + } else if crate::openhuman::inference::provider::is_custom_openai_upstream_bad_request_http_400( + provider_name.as_str(), + status, + &raw_error, + ) { + crate::openhuman::inference::provider::log_custom_openai_upstream_bad_request_http_400( + "stream_chat_history", + provider_name.as_str(), + Some(model_owned.as_str()), + status, + ); + } else if crate::openhuman::inference::provider::is_provider_access_policy_denied_http_403( + status, + &raw_error, + ) { + crate::openhuman::inference::provider::log_provider_access_policy_denied_http_403( + "stream_chat_history", + provider_name.as_str(), + Some(model_owned.as_str()), + status, + ); + } else if crate::openhuman::inference::provider::is_provider_config_rejection_http( + status, + provider_name.as_str(), + &raw_error, + ) { + crate::openhuman::inference::provider::log_provider_config_rejection( + "stream_chat_history", + provider_name.as_str(), + Some(model_owned.as_str()), + status, + ); + } else if crate::openhuman::inference::provider::should_report_provider_http_failure( + status, + ) { + crate::core::observability::report_error( + message.as_str(), + "llm_provider", + "stream_chat_history", + &[ + ("provider", provider_name.as_str()), + ("model", model_owned.as_str()), + ("status", status_str.as_str()), + ("failure", "non_2xx"), + ], + ); + } + let _ = tx.send(Err(StreamError::Provider(message))).await; + return; + } + + let mut chunk_stream = sse_bytes_to_chunks(response, options.count_tokens); + while let Some(chunk) = chunk_stream.next().await { + if tx.send(chunk).await.is_err() { + break; + } + } + }); + + stream::unfold(rx, |mut rx| async move { + rx.recv().await.map(|chunk| (chunk, rx)) + }) + .boxed() + } + + async fn warmup(&self) -> anyhow::Result<()> { + if let Some(credential) = self.credential.as_ref() { + let url = self.chat_completions_url(); + let _ = self + .apply_auth_header(self.http_client().get(&url), Some(credential.as_str())) + .send() + .await?; + } + Ok(()) + } +} diff --git a/src/openhuman/inference/provider/compatible_repeat.rs b/src/openhuman/inference/provider/compatible_repeat.rs new file mode 100644 index 0000000000..e16eb51385 --- /dev/null +++ b/src/openhuman/inference/provider/compatible_repeat.rs @@ -0,0 +1,76 @@ +/// `frequency_penalty` applied to streaming chat-completions requests. +/// +/// Autoregressive models have a self-reinforcing bias toward repeating spans +/// already in their context; with no penalty a momentary repeat can spiral into +/// the same line emitted until the output-token cap (degenerate decoding). A +/// small positive penalty damps that loop without harming coherence. Carried on +/// the streaming path (where those loops occur — long autonomous turns) and +/// retried without it if a strict provider rejects it; the buffered +/// non-streaming fallback omits it for maximum compatibility. Skipped in +/// serialisation when `None` so providers that don't accept the field are +/// unaffected. +pub(super) const CHAT_FREQUENCY_PENALTY: f64 = 0.3; + +/// Consecutive identical substantial lines that trip the in-generation repeat +/// cutoff. Autoregressive models can latch onto a line and emit it verbatim +/// until the token cap (observed: 234× the same sentence in one response). +/// `frequency_penalty` / stronger model tiers only lower the odds — they don't +/// prevent it — so this is the deterministic, model-agnostic stop. Set well +/// above any legitimate repetition. +pub(crate) const STREAM_REPEAT_THRESHOLD: u32 = 6; +/// Minimum trimmed length for a line to count toward [`STREAM_REPEAT_THRESHOLD`]. +/// Keeps short, legitimately-repeated lines (`}`, blank-ish code) from tripping +/// it; degenerate spirals are long sentences well over this. +pub(super) const MIN_REPEAT_LINE_CHARS: usize = 16; + +/// Detects in-generation repetition degeneration on the streaming path so the +/// reader can abort the stream and truncate the blob. Trips after +/// [`STREAM_REPEAT_THRESHOLD`] consecutive identical substantial lines; blank +/// separator lines are ignored, so `"sentence\n\nsentence\n\n…"` still trips. +#[derive(Default)] +pub(crate) struct StreamRepeatDetector { + current_line: String, + last_line: Option, + consecutive: u32, +} + +impl StreamRepeatDetector { + pub(super) fn new() -> Self { + Self::default() + } + + /// Feed one streamed text delta. Returns `true` once the same substantial + /// line has repeated [`STREAM_REPEAT_THRESHOLD`] times back-to-back. + pub(super) fn observe(&mut self, delta: &str) -> bool { + for ch in delta.chars() { + if ch == '\n' { + if self.finalize_line() { + return true; + } + } else { + self.current_line.push(ch); + } + } + false + } + + fn finalize_line(&mut self) -> bool { + let line = self.current_line.trim().to_string(); + self.current_line.clear(); + if line.is_empty() { + return false; + } + if line.chars().count() < MIN_REPEAT_LINE_CHARS { + self.last_line = Some(line); + self.consecutive = 1; + return false; + } + if self.last_line.as_deref() == Some(line.as_str()) { + self.consecutive += 1; + } else { + self.last_line = Some(line); + self.consecutive = 1; + } + self.consecutive >= STREAM_REPEAT_THRESHOLD + } +} diff --git a/src/openhuman/inference/provider/compatible_stream_native.rs b/src/openhuman/inference/provider/compatible_stream_native.rs new file mode 100644 index 0000000000..f8f721dcf4 --- /dev/null +++ b/src/openhuman/inference/provider/compatible_stream_native.rs @@ -0,0 +1,428 @@ +use crate::openhuman::inference::provider::traits::ChatResponse as ProviderChatResponse; + +use super::compatible_dump::dump_response_if_enabled; +use super::compatible_repeat::{StreamRepeatDetector, STREAM_REPEAT_THRESHOLD}; +use super::compatible_types::{ + ApiChatResponse, ApiUsage, Choice, Function, NativeChatRequest, OpenHumanMeta, ResponseMessage, + StreamChunkResponse, StreamingToolCall, ToolCall, +}; +use super::OpenAiCompatibleProvider; + +impl OpenAiCompatibleProvider { + /// Streaming variant of the native-tools chat path. + /// + /// Sends the request with `stream: true`, consumes the upstream SSE + /// stream chunk by chunk, forwards fine-grained `ProviderDelta` + /// events to the caller-supplied sender, and returns the aggregated + /// [`ProviderChatResponse`] once the stream ends. + pub(super) async fn stream_native_chat( + &self, + credential: Option<&str>, + native_request: &NativeChatRequest, + delta_tx: &tokio::sync::mpsc::Sender, + dump_seq: u64, + ) -> anyhow::Result { + use futures_util::StreamExt; + + let url = self.chat_completions_url(); + log::info!( + "[stream] {} POST {} (stream=true, tools={})", + self.name, + url, + native_request.tools.as_ref().map_or(0, |t| t.len()), + ); + + let response = self + .apply_auth_header( + self.http_client() + .post(&url) + .header("Accept", "text/event-stream") + .json(native_request), + credential, + ) + .send() + .await?; + + if !response.status().is_success() { + let status = response.status(); + let status_str = status.as_u16().to_string(); + let body = response.text().await.unwrap_or_default(); + let sanitized = super::super::sanitize_api_error(&body); + let message = format!( + "{} streaming API error ({}): {}", + self.name, status, sanitized + ); + if super::super::is_budget_exhausted_http_400(status, &body) { + super::super::log_budget_exhausted_http_400( + "streaming_chat", + self.name.as_str(), + Some(native_request.model.as_str()), + status, + ); + } else if super::super::is_custom_openai_upstream_bad_request_http_400( + self.name.as_str(), + status, + &body, + ) { + super::super::log_custom_openai_upstream_bad_request_http_400( + "streaming_chat", + self.name.as_str(), + Some(native_request.model.as_str()), + status, + ); + } else if super::super::is_provider_access_policy_denied_http_403(status, &body) { + super::super::log_provider_access_policy_denied_http_403( + "streaming_chat", + self.name.as_str(), + Some(native_request.model.as_str()), + status, + ); + } else if super::super::is_provider_config_rejection_http( + status, + self.name.as_str(), + &body, + ) { + super::super::log_provider_config_rejection( + "streaming_chat", + self.name.as_str(), + Some(native_request.model.as_str()), + status, + ); + } else if Self::is_native_tool_schema_unsupported(status, &body) { + log::info!( + "[stream] {} model rejected tool schema (status={}) — caller will retry without tools", + self.name, + status, + ); + } else if super::super::should_report_provider_http_failure(status) { + crate::core::observability::report_error( + message.as_str(), + "llm_provider", + "streaming_chat", + &[ + ("provider", self.name.as_str()), + ("model", native_request.model.as_str()), + ("status", status_str.as_str()), + ("failure", "non_2xx"), + ], + ); + } + anyhow::bail!(message); + } + + let is_sse = response + .headers() + .get(reqwest::header::CONTENT_TYPE) + .and_then(|v| v.to_str().ok()) + .map(|ct| ct.to_ascii_lowercase().contains("text/event-stream")) + .unwrap_or(false); + if !is_sse { + log::warn!( + "[stream] {} upstream replied with non-SSE content-type; falling back to JSON parse \ + (no token deltas reach the UI)", + self.name, + ); + let response_bytes = response.bytes().await?; + dump_response_if_enabled(&self.name, &native_request.model, dump_seq, &response_bytes); + let api_resp: ApiChatResponse = serde_json::from_slice(&response_bytes) + .map_err(|err| anyhow::anyhow!("{} response parse error: {err}", self.name))?; + return Self::parse_native_response(api_resp, &self.name); + } + + let mut text_accum = String::new(); + let mut thinking_accum = String::new(); + let mut tool_accum: std::collections::BTreeMap = + std::collections::BTreeMap::new(); + let mut last_usage: Option = None; + let mut last_openhuman: Option = None; + + let mut bytes_stream = response.bytes_stream(); + let mut buffer = String::new(); + let mut repeat_detector = StreamRepeatDetector::new(); + let mut degenerate_repeat = false; + + 'stream: while let Some(item) = bytes_stream.next().await { + let bytes = item?; + buffer.push_str(&String::from_utf8_lossy(&bytes)); + + while let Some(sep_idx) = buffer.find("\n\n") { + let event = buffer[..sep_idx].to_string(); + buffer.drain(..sep_idx + 2); + for line in event.lines() { + let line = line.trim(); + if line.is_empty() || line.starts_with(':') { + continue; + } + let Some(data) = line.strip_prefix("data:") else { + continue; + }; + let data = data.trim(); + if data == "[DONE]" { + continue; + } + + let chunk: StreamChunkResponse = match serde_json::from_str(data) { + Ok(v) => v, + Err(e) => { + log::debug!( + "[stream] {} skipping unparseable chunk: {} — data={}", + self.name, + e, + data, + ); + continue; + } + }; + + if let Some(usage) = chunk.usage { + last_usage = Some(usage); + } + if let Some(meta) = chunk.openhuman { + last_openhuman = Some(meta); + } + + for choice in chunk.choices { + if let Some(content) = choice.delta.content.as_ref() { + if !content.is_empty() { + text_accum.push_str(content); + let _ = delta_tx + .send(crate::openhuman::inference::provider::ProviderDelta::TextDelta { + delta: content.clone(), + }) + .await; + if repeat_detector.observe(content) { + log::warn!( + "[stream] {} degenerate repetition detected (≥{} identical lines) — aborting generation, truncating (text_chars={})", + self.name, + STREAM_REPEAT_THRESHOLD, + text_accum.chars().count(), + ); + degenerate_repeat = true; + break 'stream; + } + } + } + if let Some(reasoning) = choice.delta.reasoning_content.as_ref() { + if !reasoning.is_empty() { + thinking_accum.push_str(reasoning); + let _ = delta_tx + .send( + crate::openhuman::inference::provider::ProviderDelta::ThinkingDelta { + delta: reasoning.clone(), + }, + ) + .await; + } + } + // Tool-call fragments. + // + // Ordering invariant emitted downstream: + // ToolCallStart (once, when id+name both known) + // → ToolCallArgsDelta* (buffered then streamed) + // + // Args fragments that arrive *before* we know the + // canonical id are buffered but NOT emitted — emitting + // them with a synthetic id would break client-side + // reconciliation. Once start fires we flush the buffered + // prefix in a single delta, then stream subsequent + // fragments as they arrive. + if let Some(tc_list) = choice.delta.tool_calls.as_ref() { + for tc in tc_list { + let idx = tc.index.unwrap_or(0); + let entry = tool_accum.entry(idx).or_default(); + + if let Some(id) = tc.id.as_ref() { + if entry.id.is_none() { + log::debug!( + "[stream] {} tool_call[{}] id resolved: {}", + self.name, + idx, + id, + ); + } + entry.id = Some(id.clone()); + } + if let Some(func) = tc.function.as_ref() { + if let Some(name) = func.name.as_ref() { + if !name.is_empty() && entry.name.is_none() { + log::debug!( + "[stream] {} tool_call[{}] name resolved: {}", + self.name, + idx, + name, + ); + } + if !name.is_empty() { + entry.name = Some(name.clone()); + } + } + if let Some(args) = func.arguments.as_ref() { + if !args.is_empty() { + entry.arguments.push_str(args); + if !entry.emitted_start { + log::debug!( + "[stream] {} tool_call[{}] buffering args ({} chars total) — waiting for id/name", + self.name, + idx, + entry.arguments.len(), + ); + } + } + } + } + + if !entry.emitted_start { + if let (Some(id), Some(name)) = + (entry.id.as_ref(), entry.name.as_ref()) + { + log::debug!( + "[stream] {} tool_call[{}] emitting ToolCallStart id={} name={}", + self.name, + idx, + id, + name, + ); + let _ = delta_tx + .send(crate::openhuman::inference::provider::ProviderDelta::ToolCallStart { + call_id: id.clone(), + tool_name: name.clone(), + }) + .await; + entry.emitted_start = true; + if !entry.arguments.is_empty() { + log::debug!( + "[stream] {} tool_call[{}] flushing buffered args ({} chars)", + self.name, + idx, + entry.arguments.len(), + ); + let buffered = entry.arguments.clone(); + let _ = delta_tx + .send(crate::openhuman::inference::provider::ProviderDelta::ToolCallArgsDelta { + call_id: id.clone(), + delta: buffered, + }) + .await; + entry.emitted_chars = entry.arguments.len(); + } + } + } else if entry.arguments.len() > entry.emitted_chars { + if let Some(ref id) = entry.id { + let fresh = + entry.arguments[entry.emitted_chars..].to_string(); + let _ = delta_tx + .send(crate::openhuman::inference::provider::ProviderDelta::ToolCallArgsDelta { + call_id: id.clone(), + delta: fresh, + }) + .await; + entry.emitted_chars = entry.arguments.len(); + } + } + } + } + } + } + } + } + + if degenerate_repeat { + text_accum.push_str( + "\n\n[Output stopped: detected repeated/looping generation (model degeneration).]", + ); + } + + let tool_call_count = tool_accum.len(); + log::info!( + "[stream] {} aggregated text_chars={} thinking_chars={} tool_calls={}", + self.name, + text_accum.chars().count(), + thinking_accum.chars().count(), + tool_call_count, + ); + + let tool_calls_for_api: Vec = tool_accum + .into_values() + .map(|c| ToolCall { + id: c.id, + kind: Some("function".to_string()), + function: Some(super::compatible_types::Function { + name: c.name, + arguments: if c.arguments.is_empty() { + None + } else { + Some( + serde_json::from_str(&c.arguments) + .unwrap_or(serde_json::Value::String(c.arguments)), + ) + }, + }), + }) + .collect(); + + let api_resp = ApiChatResponse { + choices: vec![Choice { + message: ResponseMessage { + content: if text_accum.is_empty() { + None + } else { + Some(text_accum) + }, + reasoning_content: if thinking_accum.is_empty() { + None + } else { + Some(thinking_accum) + }, + tool_calls: if tool_calls_for_api.is_empty() { + None + } else { + Some(tool_calls_for_api) + }, + function_call: None, + }, + }], + usage: last_usage, + openhuman: last_openhuman, + }; + + if std::env::var("OPENHUMAN_PROMPT_DUMP_DIR").is_ok() { + let msg = &api_resp.choices[0].message; + let aggregated = serde_json::json!({ + "content": msg.content, + "reasoning_content": msg.reasoning_content, + "tool_calls": msg.tool_calls.as_ref().map(|calls| { + calls.iter().map(|c| serde_json::json!({ + "id": c.id, + "type": c.kind, + "function": c.function.as_ref().map(|f| serde_json::json!({ + "name": f.name, + "arguments": f.arguments, + })), + })).collect::>() + }), + "usage": api_resp.usage.as_ref().map(|u| serde_json::json!({ + "prompt_tokens": u.prompt_tokens, + "completion_tokens": u.completion_tokens, + "total_tokens": u.total_tokens, + "prompt_cached_tokens": u.prompt_tokens_details + .as_ref().map(|d| d.cached_tokens), + })), + "openhuman": api_resp.openhuman.as_ref().map(|m| serde_json::json!({ + "usage": m.usage.as_ref().map(|u| serde_json::json!({ + "input_tokens": u.input_tokens, + "output_tokens": u.output_tokens, + "cached_input_tokens": u.cached_input_tokens, + })), + "billing": m.billing.as_ref().map(|b| serde_json::json!({ + "charged_amount_usd": b.charged_amount_usd, + })), + })), + }); + if let Ok(bytes) = serde_json::to_vec(&aggregated) { + dump_response_if_enabled(&self.name, &native_request.model, dump_seq, &bytes); + } + } + + Self::parse_native_response(api_resp, &self.name) + } +} diff --git a/src/openhuman/inference/provider/ops.rs b/src/openhuman/inference/provider/ops.rs deleted file mode 100644 index aa11eeef13..0000000000 --- a/src/openhuman/inference/provider/ops.rs +++ /dev/null @@ -1,1290 +0,0 @@ -use super::*; - -use serde::Serialize; -use std::path::PathBuf; - -use super::openai_codex::{ - openai_codex_client_version, openai_codex_user_agent, resolve_openai_codex_routing, - OpenAiCodexRouting, OPENAI_CODEX_ACCOUNT_HEADER, OPENAI_CODEX_MODEL_HINTS, - OPENAI_CODEX_ORIGINATOR, OPENAI_CODEX_ORIGINATOR_HEADER, -}; - -const MAX_API_ERROR_CHARS: usize = 200; - -/// Fixed id for the single inference backend (OpenHuman API). -pub const INFERENCE_BACKEND_ID: &str = "openhuman"; - -#[derive(Debug, Clone)] -pub struct ProviderRuntimeOptions { - pub auth_profile_override: Option, - pub openhuman_dir: Option, - pub secrets_encrypt: bool, - pub reasoning_enabled: Option, -} - -#[derive(Debug, Serialize)] -pub struct ModelInfo { - pub id: String, - #[serde(skip_serializing_if = "Option::is_none")] - pub owned_by: Option, - #[serde(skip_serializing_if = "Option::is_none")] - pub context_window: Option, -} - -pub async fn list_configured_models( - provider_id: &str, -) -> Result, String> { - let config = crate::openhuman::config::Config::load_or_init() - .await - .map_err(|e| e.to_string())?; - - list_configured_models_from_config(provider_id, &config).await -} - -async fn list_configured_models_from_config( - provider_id: &str, - config: &crate::openhuman::config::Config, -) -> Result, String> { - let provider_id = provider_id.trim().to_string(); - if provider_id.is_empty() { - return Err("provider_id must not be empty".to_string()); - } - - log::debug!("[providers][list_models] provider_id={}", provider_id); - - // Explicit `cloud_providers` entry wins (e.g. a user-pointed remote - // ollama box at https://ollama.example.com/v1). Falling back to the - // local-runtime synthesis below only happens when no entry matches. - let entry = config - .cloud_providers - .iter() - .find(|e| e.id == provider_id || e.slug == provider_id) - .cloned() - .or_else(|| synthesize_local_runtime_entry(&provider_id, config)) - .ok_or_else(|| format!("no cloud provider with id or slug '{}' found", provider_id))?; - - let api_key = - crate::openhuman::inference::provider::factory::lookup_key_for_slug(&entry.slug, config) - .unwrap_or_default(); - let api_key = api_key.trim().to_string(); - - let routing = resolve_openai_codex_routing(config, &entry.slug, &entry.endpoint, &api_key) - .unwrap_or_else(|err| { - log::warn!( - "[providers][list_models] openai codex routing unavailable; continuing with configured endpoint: {err}" - ); - OpenAiCodexRouting::standard(&entry.endpoint) - }); - - let mut models_url = format!("{}/models", routing.endpoint); - if routing.using_oauth { - models_url = - append_query_param(&models_url, "client_version", openai_codex_client_version()); - } - - log::debug!( - "[providers][list_models] fetching url={} slug={} codex_oauth={} account_id_header={}", - models_url, - entry.slug, - routing.using_oauth, - routing.account_id.is_some() - ); - - let client = crate::openhuman::config::build_runtime_proxy_client_with_timeouts( - "providers.list_models", - 30, - 10, - ); - - use crate::openhuman::config::schema::cloud_providers::AuthStyle; - if is_openrouter_provider(&entry) { - validate_openrouter_api_key(&client, &routing.endpoint, &api_key).await?; - } - - let mut request = client.get(&models_url); - if routing.using_oauth { - request = request - .header(reqwest::header::USER_AGENT, openai_codex_user_agent()) - .header(OPENAI_CODEX_ORIGINATOR_HEADER, OPENAI_CODEX_ORIGINATOR); - } - - request = match entry.auth_style { - AuthStyle::Bearer => { - if !api_key.is_empty() { - let mut r = request.header("Authorization", format!("Bearer {}", api_key)); - if let Some(account_id) = routing.account_id.as_deref() { - r = r.header(OPENAI_CODEX_ACCOUNT_HEADER, account_id); - } - r - } else { - request - } - } - AuthStyle::Anthropic => { - let mut r = request.header("anthropic-version", "2023-06-01"); - if !api_key.is_empty() { - r = r.header("x-api-key", &api_key); - } - r - } - AuthStyle::OpenhumanJwt => { - if !api_key.is_empty() { - request.header("Authorization", format!("Bearer {}", api_key)) - } else { - request - } - } - AuthStyle::None => request, - }; - - let response = request - .send() - .await - .map_err(|e| format!("[providers][list_models] HTTP request failed: {}", e))?; - - let status = response.status(); - if !status.is_success() { - let body = response.text().await.unwrap_or_default(); - let sanitized = sanitize_api_error(&body); - let truncated = crate::openhuman::util::truncate_with_ellipsis(&sanitized, 300); - return Err(format!( - "provider returned {}: {}", - status.as_u16(), - truncated - )); - } - - // TAURI-RUST-12: `response.json()` discards the body when decoding fails, - // so Sentry just sees `error decoding response body` with no clue what the - // server actually sent. In practice the offending body is HTML from a - // captive portal / corporate proxy login page, an upstream load-balancer - // 502 served as HTML with a `200 OK`, or a JSON parser tripping on a - // wrong-path endpoint. Read the body as text first, then parse, and - // surface a sanitized + truncated snippet so the failure is diagnosable - // from the error string alone. - let raw_body = response.text().await.map_err(|e| { - format!( - "[providers][list_models] failed to read response body: {}", - e - ) - })?; - let body: serde_json::Value = serde_json::from_str(&raw_body).map_err(|e| { - let sanitized = sanitize_api_error(&raw_body); - let snippet = crate::openhuman::util::truncate_with_ellipsis(&sanitized, 300); - format!( - "[providers][list_models] failed to parse JSON: {} (body: {})", - e, snippet - ) - })?; - - // OpenAI-compatible servers occasionally return HTTP 200 with an error - // payload instead of a 4xx (LM Studio does this for unknown paths like - // `/v11/models` — body `{"error":"Unexpected endpoint or method..."}`). - // Treat any top-level `error` field as a failure so the AI-panel probe - // doesn't silently accept a typo'd endpoint. - if let Some(err_field) = body.get("error") { - let msg = err_field - .as_str() - .map(|s| s.to_string()) - .or_else(|| { - err_field - .get("message") - .and_then(|m| m.as_str()) - .map(|s| s.to_string()) - }) - .unwrap_or_else(|| err_field.to_string()); - let sanitized = sanitize_api_error(&msg); - return Err(format!("provider returned error payload: {}", sanitized)); - } - - // Parse the OpenAI-compatible `/models` envelope into typed model - // entries. See `parse_models_response` for the distinct error shapes - // returned for "missing field" vs "field present but wrong type" - // (TAURI-RUST-4Y). The ChatGPT Codex backend uses a sibling `models` - // array keyed by `slug`, so that shape is accepted here too. - let mut models = parse_models_response(&body)?; - if routing.using_oauth { - merge_openai_codex_model_hints(&mut models); - } - - log::info!( - "[providers][list_models] slug={} fetched {} models", - entry.slug, - models.len() - ); - - Ok(crate::rpc::RpcOutcome::new( - serde_json::json!({ "models": models }), - vec![format!("fetched {} models", models.len())], - )) -} - -/// Parse the OpenAI-compatible `/models` response envelope, or the ChatGPT -/// Codex backend's sibling `models` envelope, into typed [`ModelInfo`] entries. -/// -/// Returns distinct errors for the three failure modes the wild has -/// produced in `inference_list_models` Sentry events: -/// -/// 1. **Missing `data`/`models` field** — endpoint isn't `/models`-compatible -/// (user typo'd the base URL, pointed at a vector-DB host, etc.). -/// 2. **`data`/`models` field present but wrong type** — provider returned -/// `{"object":"error","data":{…}}`, `{"data":null}`, or similar -/// non-array. The error names the actual JSON type so triage knows what -/// the provider sent. -/// 3. **Non-object top-level body** — provider returned a bare array, -/// string, etc. Caught explicitly so the parser doesn't silently -/// drop into the missing-data arm with a `` keys list. -/// -/// Per-entry parsing ignores entries that don't have a usable string id/slug -/// (lax on purpose — many OpenAI-compatible servers include malformed rows for -/// capabilities they don't fully implement). -fn parse_models_response(body: &serde_json::Value) -> Result, String> { - let obj = body.as_object().ok_or_else(|| { - format!( - "provider response is not a JSON object — endpoint is not OpenAI-compatible (got {} at top level)", - json_value_kind(body) - ) - })?; - - let (field_name, data_value) = obj - .get("data") - .map(|value| ("data", value)) - .or_else(|| obj.get("models").map(|value| ("models", value))) - .ok_or_else(|| { - let keys = obj.keys().cloned().collect::>().join(", "); - format!( - "provider response missing `data` or `models` field — endpoint is not OpenAI-compatible (got keys: {})", - keys - ) - })?; - - let data = data_value.as_array().ok_or_else(|| { - // Include the sibling `object` field if present — OpenAI-shaped - // servers set it to `"list"` on success and `"error"` (or omit) - // on failure, so its value is the fastest triage signal for - // future Sentry events on the wrong-type arm. - let object_field = obj - .get("object") - .map(|v| v.to_string()) - .unwrap_or_else(|| "".to_string()); - format!( - "provider response has `{}` field but it is {}, expected array — endpoint may be returning an error envelope (\"object\" = {})", - field_name, - json_value_kind(data_value), - object_field, - ) - })?; - - Ok(data - .iter() - .filter_map(model_info_from_catalog_item) - .collect()) -} - -/// Name the JSON value kind for use in `parse_models_response` error -/// messages. Mirrors `serde_json::Value::*` variants exactly so test -/// assertions on the rendered token (`object`/`string`/`null`/…) stay -/// in lock-step with the matcher. -fn json_value_kind(v: &serde_json::Value) -> &'static str { - match v { - serde_json::Value::Null => "null", - serde_json::Value::Bool(_) => "bool", - serde_json::Value::Number(_) => "number", - serde_json::Value::String(_) => "string", - serde_json::Value::Array(_) => "array", - serde_json::Value::Object(_) => "object", - } -} - -/// Synthesize a transient [`CloudProviderCreds`] entry for the well-known -/// local-runtime slugs (`ollama`, `lmstudio`) so [`list_configured_models`] -/// can probe their OpenAI-compatible `/v1/models` endpoint even when the -/// user has not registered a matching `cloud_providers` row. -/// -/// Background: the AI settings panel registers an `ollama` `cloud_providers` -/// entry when the user configures Ollama (see comment on -/// [`crate::openhuman::config::schema::cloud_providers::is_slug_reserved`]), -/// but in practice some users hit -/// `inference_list_models("ollama")` without that entry — config drift, -/// flush-vs-probe race, or upgrade from a build that only persisted -/// `config.local_ai.base_url`. Sentry TAURI-RUST-28Z captures this: -/// 24 events / 7d, all `domain=rpc, method=openhuman.inference_list_models, -/// operation=invoke_method`. Without this fallback, the dropdown surfaces -/// the bare `"no cloud provider with id or slug 'ollama' found"` error -/// (also visible in the Sentry breadcrumb) instead of returning models. -/// -/// Returns `None` for any slug that is not a recognized local-runtime -/// alias — callers continue down the normal "no cloud provider" error -/// path for `openai` / `anthropic` / opaque ids / typos. -fn synthesize_local_runtime_entry( - slug: &str, - config: &crate::openhuman::config::Config, -) -> Option { - use crate::openhuman::config::schema::cloud_providers::{AuthStyle, CloudProviderCreds}; - - let endpoint = match slug { - // Ollama's OpenAI-compatible surface at `/v1/models` returns - // the same `{"data": [...]}` shape the existing parser handles, so - // we route through that rather than the native `/api/tags`. - "ollama" => { - let base = crate::openhuman::inference::local::ollama_base_url_from_config(config); - format!("{}/v1", base.trim_end_matches('/')) - } - // `lm_studio_base_url` already ends in `/v1`. - "lmstudio" => crate::openhuman::inference::local::lm_studio::lm_studio_base_url(config), - _ => return None, - }; - - Some(CloudProviderCreds { - id: format!("synthetic_local_{slug}"), - slug: slug.to_string(), - label: slug.to_string(), - endpoint, - // Local runtimes accept unauthenticated requests on loopback. - // The probe at `/models` runs without an Authorization - // header — `lookup_key_for_slug` may still return a key, but - // `AuthStyle::None` ignores it (see auth-style match below). - auth_style: AuthStyle::None, - legacy_type: None, - default_model: None, - }) -} - -fn merge_openai_codex_model_hints(models: &mut Vec) { - let mut seen = models - .iter() - .map(|model| model.id.to_ascii_lowercase()) - .collect::>(); - - for id in OPENAI_CODEX_MODEL_HINTS { - if seen.insert(id.to_ascii_lowercase()) { - models.push(ModelInfo { - id: (*id).to_string(), - owned_by: Some("openai-codex".to_string()), - context_window: None, - }); - } - } -} - -fn is_openrouter_provider( - entry: &crate::openhuman::config::schema::cloud_providers::CloudProviderCreds, -) -> bool { - if entry.slug.eq_ignore_ascii_case("openrouter") { - return true; - } - - reqwest::Url::parse(&entry.endpoint) - .ok() - .and_then(|url| url.host_str().map(|host| host.to_ascii_lowercase())) - .is_some_and(|host| host == "openrouter.ai" || host.ends_with(".openrouter.ai")) -} - -fn append_query_param(url: &str, key: &str, value: &str) -> String { - if let Ok(mut parsed) = reqwest::Url::parse(url) { - parsed.query_pairs_mut().append_pair(key, value); - return parsed.to_string(); - } - - let separator = if url.contains('?') { '&' } else { '?' }; - format!("{url}{separator}{key}={value}") -} - -fn model_items_from_body(body: &serde_json::Value) -> Option> { - body.get("data") - .and_then(|d| d.as_array()) - .or_else(|| body.get("models").and_then(|d| d.as_array())) - .cloned() -} - -fn model_info_from_catalog_item(item: &serde_json::Value) -> Option { - if let Some(id) = item.as_str().map(str::trim).filter(|id| !id.is_empty()) { - return Some(ModelInfo { - id: id.to_string(), - owned_by: None, - context_window: None, - }); - } - - let id = item - .get("id") - .or_else(|| item.get("slug")) - .or_else(|| item.get("name")) - .and_then(|v| v.as_str()) - .map(str::trim) - .filter(|id| !id.is_empty())? - .to_string(); - let owned_by = item - .get("owned_by") - .or_else(|| item.get("owned_by_organization")) - .and_then(|v| v.as_str()) - .map(|s| s.to_string()); - let context_window = item - .get("context_length") - .or_else(|| item.get("context_window")) - .or_else(|| item.get("max_context_window")) - .and_then(|v| v.as_u64()); - Some(ModelInfo { - id, - owned_by, - context_window, - }) -} - -async fn validate_openrouter_api_key( - client: &reqwest::Client, - base: &str, - api_key: &str, -) -> Result<(), String> { - if api_key.is_empty() { - return Err("OpenRouter API key is required before enabling the provider".to_string()); - } - - let key_url = format!("{}/key", base); - log::debug!("[providers][list_models] validating OpenRouter API key"); - let response = client - .get(&key_url) - .header("Authorization", format!("Bearer {api_key}")) - .send() - .await - .map_err(|e| format!("[providers][list_models] OpenRouter key validation failed: {e}"))?; - - let status = response.status(); - let text = response.text().await.unwrap_or_default(); - if !status.is_success() { - let sanitized = sanitize_api_error(&text); - let truncated = crate::openhuman::util::truncate_with_ellipsis(&sanitized, 300); - log::debug!( - "[providers][list_models] OpenRouter key validation failed status={} body={}", - status.as_u16(), - truncated - ); - return Err(format!( - "OpenRouter key validation returned {}: {}", - status.as_u16(), - truncated - )); - } - - if let Ok(body) = serde_json::from_str::(&text) { - if let Some(err_field) = body.get("error") { - let msg = err_field - .as_str() - .map(|s| s.to_string()) - .or_else(|| { - err_field - .get("message") - .and_then(|m| m.as_str()) - .map(|s| s.to_string()) - }) - .unwrap_or_else(|| err_field.to_string()); - let sanitized = sanitize_api_error(&msg); - log::debug!( - "[providers][list_models] OpenRouter key validation returned error payload={}", - sanitized - ); - return Err(format!( - "OpenRouter key validation returned error payload: {}", - sanitized - )); - } - } - - Ok(()) -} - -impl Default for ProviderRuntimeOptions { - fn default() -> Self { - Self { - auth_profile_override: None, - openhuman_dir: None, - secrets_encrypt: true, - reasoning_enabled: None, - } - } -} - -fn is_secret_char(c: char) -> bool { - c.is_ascii_alphanumeric() || matches!(c, '-' | '_' | '.' | ':') -} - -fn token_end(input: &str, from: usize) -> usize { - let mut end = from; - for (i, c) in input[from..].char_indices() { - if is_secret_char(c) { - end = from + i + c.len_utf8(); - } else { - break; - } - } - end -} - -/// Scrub known secret-like token prefixes from provider error strings. -pub fn scrub_secret_patterns(input: &str) -> String { - const PREFIXES: [&str; 7] = [ - "sk-", - "xoxb-", - "xoxp-", - "ghp_", - "gho_", - "ghu_", - "github_pat_", - ]; - - let mut scrubbed = input.to_string(); - - for prefix in PREFIXES { - let mut search_from = 0; - loop { - let Some(rel) = scrubbed[search_from..].find(prefix) else { - break; - }; - - let start = search_from + rel; - let content_start = start + prefix.len(); - let end = token_end(&scrubbed, content_start); - - if end == content_start { - search_from = content_start; - continue; - } - - scrubbed.replace_range(start..end, "[REDACTED]"); - search_from = start + "[REDACTED]".len(); - } - } - - scrubbed -} - -/// Sanitize API error text by scrubbing secrets and truncating length. -pub fn sanitize_api_error(input: &str) -> String { - let scrubbed = scrub_secret_patterns(input); - crate::openhuman::util::truncate_with_ellipsis(&scrubbed, MAX_API_ERROR_CHARS) -} - -const TRANSPORT_ERROR_MAX_CHARS: usize = 1200; - -/// Full `source()` chain for connection / TLS failures (scrubbed, longer than API body snippets). -pub fn format_error_chain(err: &dyn std::error::Error) -> String { - let mut parts: Vec = vec![err.to_string()]; - let mut src = std::error::Error::source(err); - while let Some(e) = src { - parts.push(e.to_string()); - src = std::error::Error::source(e); - } - let joined = parts.join(" | "); - let scrubbed = scrub_secret_patterns(&joined); - crate::openhuman::util::truncate_with_suffix(&scrubbed, TRANSPORT_ERROR_MAX_CHARS, "…") -} - -/// Cause chain from [`anyhow::Error`] (e.g. responses fallback), scrubbed and length-limited. -pub fn format_anyhow_chain(err: &anyhow::Error) -> String { - let joined = err - .chain() - .map(|e| e.to_string()) - .collect::>() - .join(" | "); - let scrubbed = scrub_secret_patterns(&joined); - crate::openhuman::util::truncate_with_suffix(&scrubbed, TRANSPORT_ERROR_MAX_CHARS, "…") -} - -/// Whether a non-2xx provider response is worth reporting to Sentry. -/// -/// Transient upstream statuses — 429 Too Many Requests, 408 Request Timeout, -/// and 502/503/504 gateway-layer failures — are caller-side throttling or -/// upstream-capacity signals. The reliable-provider layer already retries -/// with backoff and falls back across providers/models, and the aggregate -/// "all providers exhausted" event still fires if every attempt fails. -/// Reporting each individual transient failure floods Sentry (see -/// OPENHUMAN-TAURI-6Y / 2E / 84 / T: thousands of events/day per user from -/// a single upstream rate-limit / outage window). Callers should still -/// propagate the error so retry and fallback logic runs unchanged; this -/// only gates the per-attempt Sentry report. -pub fn should_report_provider_http_failure(status: reqwest::StatusCode) -> bool { - !crate::core::observability::TRANSIENT_PROVIDER_HTTP_STATUSES.contains(&status.as_u16()) -} - -/// Whether a provider non-2xx response is a deterministic budget-exhausted -/// user-state error that should be demoted from Sentry to an info log. -pub(super) fn is_budget_exhausted_http_400(status: reqwest::StatusCode, body: &str) -> bool { - status == reqwest::StatusCode::BAD_REQUEST && super::is_budget_exhausted_message(body) -} - -/// Whether a custom OpenAI-compatible proxy returned the known generic -/// upstream 400 envelope: -/// `{"error":{"message":"Bad request to upstream provider","type":"upstream_error","status":400}}`. -/// -/// This shape is deterministic provider/user-state (endpoint-model mismatch, -/// unsupported schema, provider-side validation) and does not provide -/// actionable signal for OpenHuman Sentry triage. -pub(super) fn is_custom_openai_upstream_bad_request_http_400( - provider: &str, - status: reqwest::StatusCode, - body: &str, -) -> bool { - if provider != "custom_openai" || status != reqwest::StatusCode::BAD_REQUEST { - return false; - } - let lower = body.to_ascii_lowercase(); - lower.contains("bad request to upstream provider") && lower.contains("upstream_error") -} - -/// Whether a provider non-2xx response is a deterministic provider-policy -/// denial (not a product bug) that should be demoted from Sentry. -/// -/// Canonical example: Kimi's coding endpoint rejects non-agent clients with -/// HTTP 403 + `access_terminated_error` and a message like: -/// "currently only available for Coding Agents …". -pub(super) fn is_provider_access_policy_denied_http_403( - status: reqwest::StatusCode, - body: &str, -) -> bool { - if status != reqwest::StatusCode::FORBIDDEN { - return false; - } - let lower = body.to_ascii_lowercase(); - lower.contains("access_terminated_error") - || lower.contains("currently only available for coding agents") -} - -pub(super) fn log_budget_exhausted_http_400( - operation: &str, - provider: &str, - model: Option<&str>, - status: reqwest::StatusCode, -) { - tracing::info!( - domain = "llm_provider", - operation = operation, - provider = provider, - model = model.unwrap_or(""), - status = status.as_u16(), - failure = "non_2xx", - kind = "budget", - "[llm_provider] {operation} budget-exhausted 400 — not reporting to Sentry" - ); -} - -pub(super) fn log_custom_openai_upstream_bad_request_http_400( - operation: &str, - provider: &str, - model: Option<&str>, - status: reqwest::StatusCode, -) { - tracing::info!( - domain = "llm_provider", - operation = operation, - provider = provider, - model = model.unwrap_or(""), - status = status.as_u16(), - failure = "non_2xx", - kind = "provider_user_state", - reason = "custom_openai_upstream_bad_request", - "[llm_provider] {operation} custom_openai upstream 400 — not reporting to Sentry" - ); -} - -pub(super) fn log_provider_access_policy_denied_http_403( - operation: &str, - provider: &str, - model: Option<&str>, - status: reqwest::StatusCode, -) { - tracing::info!( - domain = "llm_provider", - operation = operation, - provider = provider, - model = model.unwrap_or(""), - status = status.as_u16(), - failure = "non_2xx", - kind = "provider_access_policy", - "[llm_provider] {operation} provider access-policy 403 — not reporting to Sentry" - ); -} - -/// Whether a provider non-2xx response is a deterministic -/// **configuration-rejection** user-state error (unknown model id, -/// abstract tier leaked to a custom provider, model-specific temperature -/// constraint) that should be demoted from Sentry to an info log. -/// -/// Provider-aware (inverted polarity vs. the 401/403 backend rule): for -/// most config-rejection phrases the same body from the OpenHuman -/// **backend** stays Sentry-actionable — that would mean we sent our own -/// backend a bad request (a regression, e.g. #2079). Restricted to the -/// observed shapes (400 invalid-param / unknown-model, 404 -/// model-does-not-exist, 422 unprocessable); 408/429 are transient and -/// handled separately. -/// -/// **Exception: OpenAI-compatible "unknown model"** (`Model 'X' is not -/// available. Use GET /openai/v1/models …`). The OpenHuman backend now -/// emits this exact body for user-configured unknown model ids, so it is -/// user-state regardless of provider — the polarity guard is dropped for -/// this specific shape (TAURI-RUST-2Z1). See -/// [`super::is_openai_compatible_unknown_model_message`]. -pub(super) fn is_provider_config_rejection_http( - status: reqwest::StatusCode, - provider: &str, - body: &str, -) -> bool { - if !matches!(status.as_u16(), 400 | 404 | 422) { - return false; - } - if !super::is_provider_config_rejection_message(body) { - return false; - } - // OpenAI-compatible "unknown model" body is user-state regardless of - // provider — both third-party `custom_openai` upstreams and our own - // OpenHuman backend now emit it for user-configured model ids that - // aren't in the registry (TAURI-RUST-2Z1). - if super::is_openai_compatible_unknown_model_message(body) { - return true; - } - // Remaining config-rejection phrases (DeepSeek `supported api model - // names are`, Moonshot `invalid temperature`, litellm envelopes, …) - // are intrinsically scoped to third-party providers — keep the - // polarity guard so a regression where our own backend emits one of - // those still reaches Sentry. - provider != openhuman_backend::PROVIDER_LABEL -} - -pub(super) fn log_provider_config_rejection( - operation: &str, - provider: &str, - model: Option<&str>, - status: reqwest::StatusCode, -) { - tracing::info!( - domain = "llm_provider", - operation = operation, - provider = provider, - model = model.unwrap_or(""), - status = status.as_u16(), - failure = "non_2xx", - kind = "provider_config_rejection", - "[llm_provider] {operation} provider config-rejection ({status}) — \ - user model/param configuration, not reporting to Sentry" - ); -} - -/// Whether a provider error body indicates the request exceeded the model's -/// context window (the conversation/prompt is too long for the configured -/// model). This is a deterministic user-state / usage condition — the -/// remediation is "start a new chat, trim the conversation, or pick a -/// larger-context model" — not a product bug. Sentry has no signal to act -/// on. -/// -/// Single source of truth for the context-overflow phrasing, shared by: -/// - [`super::reliable`]'s non-retryable classifier (retrying the same -/// oversized request can't help), -/// - the [`api_error`] Sentry-suppression cascade (below), and -/// - the `core::observability` `ContextWindowExceeded` classifier (which -/// catches the higher-layer re-report under `domain=agent` / -/// `web_channel`). -/// -/// Status-agnostic on purpose: providers disagree on the HTTP code for this -/// condition — OpenAI / most emit `400 context_length_exceeded`, but some -/// custom / self-hosted gateways mis-report it as `500` (Sentry -/// TAURI-RUST-501: `"custom API error (500 …): Context size has been -/// exceeded."`). Matching on the body keeps all of them in one bucket. -/// -/// Anchoring is deliberately two-tier because this matcher now also feeds -/// `core::observability::expected_error_kind` (Sentry suppression) and the -/// `reliable` non-retryable decision, so an over-broad match would both -/// hide a real error from Sentry *and* wrongly mark a retryable error as -/// permanent: -/// -/// - **Length/context phrases** ([`CONTEXT_HINTS`]) are unambiguous — -/// "context window", "context length", "prompt is too long" only describe -/// request-size overflow — so they match alone. -/// - **Token-count phrases** ([`TOKEN_HINTS`]) collide with per-minute token -/// *rate* limits ("rate limit reached … too many tokens per min"), which -/// are transient 429s that MUST stay retryable and keep reaching Sentry. -/// They only count as context-overflow when no rate-limit marker is -/// present. -pub fn is_context_window_exceeded_message(body: &str) -> bool { - let lower = body.to_ascii_lowercase(); - - // Unambiguous request-size / context phrases — match on their own. - const CONTEXT_HINTS: &[&str] = &[ - "exceeds the context window", - "context window of this model", - "maximum context length", - "context length exceeded", - "context size has been exceeded", - "prompt is too long", - "input is too long", - ]; - if CONTEXT_HINTS.iter().any(|hint| lower.contains(hint)) { - return true; - } - - // Token-count phrases are ambiguous with token-per-minute RATE limits. - // Treat them as context-overflow only when the body carries no - // rate-limit marker — otherwise a transient TPM 429 would be silenced - // from Sentry and (via `reliable`) wrongly classified as non-retryable. - const TOKEN_HINTS: &[&str] = &["too many tokens", "token limit exceeded"]; - if TOKEN_HINTS.iter().any(|hint| lower.contains(hint)) { - const RATE_LIMIT_MARKERS: &[&str] = &[ - "per minute", - "per min", - "rate limit", - "rate_limit", - "tpm", - "requests per", - "retry after", - "try again in", - ]; - return !RATE_LIMIT_MARKERS - .iter() - .any(|marker| lower.contains(marker)); - } - - false -} - -pub(super) fn log_context_window_exceeded( - operation: &str, - provider: &str, - model: Option<&str>, - status: reqwest::StatusCode, -) { - tracing::warn!( - domain = "llm_provider", - operation = operation, - provider = provider, - model = model.unwrap_or(""), - status = status.as_u16(), - failure = "non_2xx", - kind = "context_window_exceeded", - "[llm_provider] {operation} context-window exceeded ({status}) — \ - request too long for the model, not reporting to Sentry" - ); -} - -/// Whether a provider non-2xx response is the OpenHuman **backend** rejecting -/// the app session JWT (`401`/`403`). This is expected user-session state -/// (token expired / revoked / rotated server-side), not a product bug — the -/// auth domain owns recovery. `401`/`403` from **other** providers (OpenAI, -/// Anthropic, …) mean a misconfigured BYO API key and stay Sentry-actionable, -/// so the predicate is provider-scoped to [`openhuman_backend::PROVIDER_LABEL`]. -pub(super) fn is_backend_auth_failure(provider: &str, status: reqwest::StatusCode) -> bool { - matches!(status.as_u16(), 401 | 403) && provider == openhuman_backend::PROVIDER_LABEL -} - -/// Handle a backend session-expiry auth failure: publish a -/// [`crate::core::event_bus::DomainEvent::SessionExpired`] so the credentials -/// subscriber clears the session and flips the scheduler-gate signed-out -/// override (halting downstream LLM work — see OPENHUMAN-TAURI-1T), and skip -/// the Sentry report. Mirrors the `is_auth_failure && is_backend` arm in -/// [`api_error`], factored out for the hand-rolled provider HTTP-error chains -/// in [`super::compatible::OpenAiCompatibleProvider`] which consume the -/// response body inline and so can't delegate to `api_error`. The -/// `chat_completions` chain lacked this branch and reported the backend -/// `401 Invalid token` to Sentry — that drift was TAURI-RUST-N. -/// -/// `message` is the already-formatted `"{provider} API error ({status}): …"` -/// string; it embeds the sanitized body, but the prefix and caller-controlled -/// provider name aren't scrubbed, so re-run [`sanitize_api_error`] on the final -/// string before it reaches the SessionExpired subscriber's logs. -pub(super) fn publish_backend_session_expired( - operation: &str, - provider: &str, - status: reqwest::StatusCode, - message: &str, -) { - tracing::warn!( - domain = "llm_provider", - operation = operation, - provider = provider, - status = status.as_u16(), - "[llm_provider] backend auth failure ({status}) — publishing SessionExpired" - ); - crate::core::event_bus::publish_global(crate::core::event_bus::DomainEvent::SessionExpired { - source: "llm_provider.openhuman_backend".to_string(), - reason: sanitize_api_error(message), - }); -} - -/// Build a sanitized provider error from a failed HTTP response. -/// -/// Reports the failure to Sentry with `provider` and `status` tags so -/// upstream LLM errors are visible in observability without every call-site -/// having to remember to log — except for: -/// -/// - **Transient statuses** (429 — see [`should_report_provider_http_failure`]). -/// These get retried by the reliable-provider layer and don't deserve a -/// per-attempt Sentry event. -/// - **401/403 from the OpenHuman backend provider** — the user's app session -/// expired. That is expected user-state, not a server bug, and reporting it -/// spams Sentry (OPENHUMAN-TAURI-1T: 5,414 events from a single user whose -/// cron loops kept firing post-expiry). Instead we publish a -/// [`crate::core::event_bus::DomainEvent::SessionExpired`] so the credentials -/// subscriber clears the session and flips the scheduler-gate signed-out -/// override, halting downstream LLM work. 401/403 from **other** providers -/// (OpenAI, Anthropic, …) still go to Sentry — those mean a misconfigured -/// API key, which is actionable. -/// - **Provider config-rejection** (4xx unknown-model / abstract-tier / -/// model-specific temperature) from a **non-backend** provider — the -/// user pointed a custom provider at a model/param it doesn't accept. -/// Deterministic user-config state, surfaced in the UI; demoted to an -/// info log (#2079 / #2076 / #2202). See -/// [`is_provider_config_rejection_http`]. -pub async fn api_error(provider: &str, response: reqwest::Response) -> anyhow::Error { - let status = response.status(); - let status_str = status.as_u16().to_string(); - let body = response - .text() - .await - .unwrap_or_else(|_| "".to_string()); - let sanitized = sanitize_api_error(&body); - let message = format!("{provider} API error ({status}): {sanitized}"); - - let is_auth_failure = matches!(status.as_u16(), 401 | 403); - let is_backend = provider == openhuman_backend::PROVIDER_LABEL; - let is_budget_exhausted_user_state = is_budget_exhausted_http_400(status, &body); - let is_custom_openai_upstream_bad_request = - is_custom_openai_upstream_bad_request_http_400(provider, status, &body); - let is_provider_access_policy_denied = is_provider_access_policy_denied_http_403(status, &body); - let is_provider_config_rejection = is_provider_config_rejection_http(status, provider, &body); - // Context-overflow is status-agnostic: match the body directly (some - // custom gateways mis-report it as 500 — TAURI-RUST-501 — so a status - // gate would let those through to `should_report_provider_http_failure`). - let is_context_window_exceeded = is_context_window_exceeded_message(&body); - - if is_auth_failure && is_backend { - // Single source of truth for backend session-expiry handling (warn + - // SessionExpired publish + final-string sanitize) — shared with the - // hand-rolled `chat_completions` chain in `compatible.rs`. - publish_backend_session_expired("api_error", provider, status, &message); - } else if is_budget_exhausted_user_state { - log_budget_exhausted_http_400("api_error", provider, None, status); - } else if is_custom_openai_upstream_bad_request { - log_custom_openai_upstream_bad_request_http_400("api_error", provider, None, status); - } else if is_provider_access_policy_denied { - log_provider_access_policy_denied_http_403("api_error", provider, None, status); - } else if is_provider_config_rejection { - log_provider_config_rejection("api_error", provider, None, status); - } else if is_context_window_exceeded { - log_context_window_exceeded("api_error", provider, None, status); - } else if should_report_provider_http_failure(status) { - crate::core::observability::report_error( - message.as_str(), - "llm_provider", - "api_error", - &[ - ("provider", provider), - ("status", status_str.as_str()), - ("failure", "non_2xx"), - ], - ); - } - anyhow::anyhow!(message) -} - -/// Create the inference provider. -/// -/// - `inference_url`: optional custom OpenAI-compatible LLM endpoint -/// (`config.inference_url`). When set together with `api_key`, inference -/// talks directly to this URL — keeping product-backend traffic -/// (auth/billing/voice) on `backend_url` where it belongs. -/// - `backend_url`: the OpenHuman product backend URL (`config.api_url`). -/// Used by the fallback [`openhuman_backend::OpenHumanBackendProvider`] -/// which routes inference to `{backend}/openai/v1/...` with the app -/// session JWT. -/// - `api_key`: the API key for the custom inference endpoint. Ignored on -/// the OpenHuman fallback path (the backend uses a session JWT, not a -/// user-supplied key). -pub fn create_backend_inference_provider( - inference_url: Option<&str>, - backend_url: Option<&str>, - api_key: Option<&str>, - options: &ProviderRuntimeOptions, -) -> anyhow::Result> { - if let (Some(url), Some(key)) = (inference_url, api_key) { - log::info!( - "[providers] inference target = custom_openai @ {} (api_key bytes={})", - url, - key.len() - ); - Ok(Box::new( - crate::openhuman::inference::provider::compatible::OpenAiCompatibleProvider::new_no_responses_fallback( - "custom_openai", - url, - Some(key), - crate::openhuman::inference::provider::compatible::AuthStyle::Bearer, - ), - )) - } else { - if api_key.is_some() && inference_url.is_none() { - log::warn!( - "[providers] api_key provided without inference_url — key will be ignored, using OpenHuman backend" - ); - } - log::info!( - "[providers] inference target = openhuman_backend (backend_url={}, inference_url_set={}, api_key_set={})", - backend_url.unwrap_or(""), - inference_url.is_some(), - api_key.is_some() - ); - Ok(Box::new(openhuman_backend::OpenHumanBackendProvider::new( - backend_url, - options, - ))) - } -} - -/// Create provider chain with retry and fallback behavior. -pub fn create_resilient_provider( - inference_url: Option<&str>, - backend_url: Option<&str>, - api_key: Option<&str>, - reliability: &crate::openhuman::config::ReliabilityConfig, -) -> anyhow::Result> { - create_resilient_provider_with_options( - inference_url, - backend_url, - api_key, - reliability, - &ProviderRuntimeOptions::default(), - ) -} - -/// Create provider chain with retry/fallback behavior and auth runtime options. -pub fn create_resilient_provider_with_options( - inference_url: Option<&str>, - backend_url: Option<&str>, - api_key: Option<&str>, - reliability: &crate::openhuman::config::ReliabilityConfig, - options: &ProviderRuntimeOptions, -) -> anyhow::Result> { - if !reliability.fallback_providers.is_empty() { - tracing::warn!( - "reliability.fallback_providers is ignored; inference uses only the OpenHuman backend" - ); - } - - let primary_provider = - create_backend_inference_provider(inference_url, backend_url, api_key, options)?; - let providers: Vec<(String, Box)> = - vec![(INFERENCE_BACKEND_ID.to_string(), primary_provider)]; - - let reliable = reliable::ReliableProvider::new( - providers, - reliability.provider_retries, - reliability.provider_backoff_ms, - ) - .with_model_fallbacks(reliability.model_fallbacks.clone()); - - Ok(Box::new(reliable)) -} - -/// Create a RouterProvider if model routes are configured, otherwise return a resilient provider. -pub fn create_routed_provider( - inference_url: Option<&str>, - backend_url: Option<&str>, - api_key: Option<&str>, - reliability: &crate::openhuman::config::ReliabilityConfig, - model_routes: &[crate::openhuman::config::ModelRouteConfig], - default_model: &str, -) -> anyhow::Result> { - create_routed_provider_with_options( - inference_url, - backend_url, - api_key, - reliability, - model_routes, - default_model, - &ProviderRuntimeOptions::default(), - ) -} - -pub fn create_routed_provider_with_options( - inference_url: Option<&str>, - backend_url: Option<&str>, - api_key: Option<&str>, - reliability: &crate::openhuman::config::ReliabilityConfig, - model_routes: &[crate::openhuman::config::ModelRouteConfig], - default_model: &str, - options: &ProviderRuntimeOptions, -) -> anyhow::Result> { - if model_routes.is_empty() { - return create_resilient_provider_with_options( - inference_url, - backend_url, - api_key, - reliability, - options, - ); - } - - let backend = create_backend_inference_provider(inference_url, backend_url, api_key, options)?; - let providers: Vec<(String, Box)> = - vec![(INFERENCE_BACKEND_ID.to_string(), backend)]; - - let routes: Vec<(String, router::Route)> = model_routes - .iter() - .map(|r| { - ( - r.hint.clone(), - router::Route { - provider_name: INFERENCE_BACKEND_ID.to_string(), - model: r.model.clone(), - context_window: - crate::openhuman::inference::model_context::context_window_for_model( - &r.model, - ), - }, - ) - }) - .collect(); - - Ok(Box::new(router::RouterProvider::new( - providers, - routes, - default_model.to_string(), - ))) -} - -/// Create a provider with intelligent local/remote routing. -/// -/// When `config.local_ai.runtime_enabled` is `true` and Ollama is reachable, -/// lightweight and medium tasks (e.g. `hint:reaction`, `hint:summarize`) are -/// served by the local model. Heavy tasks (`hint:reasoning`, `hint:agentic`, -/// `hint:coding`) always go to the remote backend. A health-gated fallback -/// transparently promotes failed local calls to the remote backend. -/// -/// Telemetry for every routing decision is emitted at `INFO` level under the -/// `"routing"` tracing target. -pub fn create_intelligent_routing_provider( - inference_url: Option<&str>, - backend_url: Option<&str>, - api_key: Option<&str>, - config: &crate::openhuman::config::Config, - options: &ProviderRuntimeOptions, -) -> anyhow::Result> { - let raw_backend = - create_backend_inference_provider(inference_url, backend_url, api_key, options)?; - // Wrap the raw backend in ReliableProvider so transient 502/503/504 errors - // are retried before propagating to the agent turn. Without this, a single - // 502 from the backend bypasses the retry layer entirely and surfaces as a - // fatal `run_single` failure. - log::debug!( - "[providers] initialising reliable wrapper: retries={} backoff_ms={} fallbacks={}", - config.reliability.provider_retries, - config.reliability.provider_backoff_ms, - config.reliability.model_fallbacks.len() - ); - let reliable_backend: Box = Box::new( - reliable::ReliableProvider::new( - vec![(INFERENCE_BACKEND_ID.to_string(), raw_backend)], - config.reliability.provider_retries, - config.reliability.provider_backoff_ms, - ) - .with_model_fallbacks(config.reliability.model_fallbacks.clone()), - ); - let default_model = config - .default_model - .as_deref() - .unwrap_or(crate::openhuman::config::DEFAULT_MODEL); - - // When the user has configured `model_routes` (custom provider via - // BackendProviderPanel), wrap the reliable remote in a RouterProvider so - // abstract tier names like `reasoning-v1` get translated to the configured - // provider-specific model id (e.g. `gpt-5.5`) BEFORE the request leaves - // the host. Without this step the abstract tier name would reach - // `custom_openai` and 404. The OpenHuman backend can dispatch tier names - // natively, so we skip the wrap when routes are empty. - log::info!( - "[providers] intelligent routing: model_routes_count={} default_model={} inference_url_set={}", - config.model_routes.len(), - default_model, - inference_url.is_some() - ); - let remote: Box = if config.model_routes.is_empty() { - reliable_backend - } else { - let providers: Vec<(String, Box)> = - vec![(INFERENCE_BACKEND_ID.to_string(), reliable_backend)]; - let routes: Vec<(String, router::Route)> = config - .model_routes - .iter() - .map(|r| { - ( - r.hint.clone(), - router::Route { - provider_name: INFERENCE_BACKEND_ID.to_string(), - model: r.model.clone(), - context_window: - crate::openhuman::inference::model_context::context_window_for_model( - &r.model, - ), - }, - ) - }) - .collect(); - Box::new(router::RouterProvider::new( - providers, - routes, - default_model.to_string(), - )) - }; - - let provider = crate::openhuman::routing::new_provider( - remote, - &config.local_ai, - default_model, - &config.temperature_unsupported_models, - ); - Ok(Box::new(provider)) -} - -/// Information about a supported provider for display purposes. -pub struct ProviderInfo { - pub name: &'static str, - pub display_name: &'static str, - pub aliases: &'static [&'static str], - pub local: bool, -} - -/// Return known providers for display (single backend path). -pub fn list_providers() -> Vec { - vec![ProviderInfo { - name: INFERENCE_BACKEND_ID, - display_name: "OpenHuman (backend)", - aliases: &["backend", "openhuman-backend"], - local: false, - }] -} - -// Legacy provider alias stubs (integrations / config); remote providers were removed. -pub fn is_glm_alias(_name: &str) -> bool { - false -} -pub fn is_zai_alias(_name: &str) -> bool { - false -} -pub fn is_minimax_alias(_name: &str) -> bool { - false -} -pub fn is_moonshot_alias(_name: &str) -> bool { - false -} -pub fn is_qianfan_alias(_name: &str) -> bool { - false -} -pub fn is_qwen_alias(_name: &str) -> bool { - false -} -pub fn is_qwen_oauth_alias(_name: &str) -> bool { - false -} -pub fn canonical_china_provider_name(_name: &str) -> Option<&'static str> { - let _ = _name; - None -} - -#[cfg(test)] -#[path = "ops_tests.rs"] -mod tests; diff --git a/src/openhuman/inference/provider/ops/http_error.rs b/src/openhuman/inference/provider/ops/http_error.rs new file mode 100644 index 0000000000..e09d15d916 --- /dev/null +++ b/src/openhuman/inference/provider/ops/http_error.rs @@ -0,0 +1,393 @@ +use super::sanitize::sanitize_api_error; +use crate::openhuman::inference::provider::openhuman_backend; + +/// Whether a non-2xx provider response is worth reporting to Sentry. +/// +/// Transient upstream statuses — 429 Too Many Requests, 408 Request Timeout, +/// and 502/503/504 gateway-layer failures — are caller-side throttling or +/// upstream-capacity signals. The reliable-provider layer already retries +/// with backoff and falls back across providers/models, and the aggregate +/// "all providers exhausted" event still fires if every attempt fails. +/// Reporting each individual transient failure floods Sentry (see +/// OPENHUMAN-TAURI-6Y / 2E / 84 / T: thousands of events/day per user from +/// a single upstream rate-limit / outage window). Callers should still +/// propagate the error so retry and fallback logic runs unchanged; this +/// only gates the per-attempt Sentry report. +pub fn should_report_provider_http_failure(status: reqwest::StatusCode) -> bool { + !crate::core::observability::TRANSIENT_PROVIDER_HTTP_STATUSES.contains(&status.as_u16()) +} + +/// Whether a provider non-2xx response is a deterministic budget-exhausted +/// user-state error that should be demoted from Sentry to an info log. +pub fn is_budget_exhausted_http_400(status: reqwest::StatusCode, body: &str) -> bool { + status == reqwest::StatusCode::BAD_REQUEST + && crate::openhuman::inference::provider::is_budget_exhausted_message(body) +} + +/// Whether a custom OpenAI-compatible proxy returned the known generic +/// upstream 400 envelope: +/// `{"error":{"message":"Bad request to upstream provider","type":"upstream_error","status":400}}`. +/// +/// This shape is deterministic provider/user-state (endpoint-model mismatch, +/// unsupported schema, provider-side validation) and does not provide +/// actionable signal for OpenHuman Sentry triage. +pub fn is_custom_openai_upstream_bad_request_http_400( + provider: &str, + status: reqwest::StatusCode, + body: &str, +) -> bool { + if provider != "custom_openai" || status != reqwest::StatusCode::BAD_REQUEST { + return false; + } + let lower = body.to_ascii_lowercase(); + lower.contains("bad request to upstream provider") && lower.contains("upstream_error") +} + +/// Whether a provider non-2xx response is a deterministic provider-policy +/// denial (not a product bug) that should be demoted from Sentry. +/// +/// Canonical example: Kimi's coding endpoint rejects non-agent clients with +/// HTTP 403 + `access_terminated_error` and a message like: +/// "currently only available for Coding Agents …". +pub fn is_provider_access_policy_denied_http_403(status: reqwest::StatusCode, body: &str) -> bool { + if status != reqwest::StatusCode::FORBIDDEN { + return false; + } + let lower = body.to_ascii_lowercase(); + lower.contains("access_terminated_error") + || lower.contains("currently only available for coding agents") +} + +pub fn log_budget_exhausted_http_400( + operation: &str, + provider: &str, + model: Option<&str>, + status: reqwest::StatusCode, +) { + tracing::info!( + domain = "llm_provider", + operation = operation, + provider = provider, + model = model.unwrap_or(""), + status = status.as_u16(), + failure = "non_2xx", + kind = "budget", + "[llm_provider] {operation} budget-exhausted 400 — not reporting to Sentry" + ); +} + +pub fn log_custom_openai_upstream_bad_request_http_400( + operation: &str, + provider: &str, + model: Option<&str>, + status: reqwest::StatusCode, +) { + tracing::info!( + domain = "llm_provider", + operation = operation, + provider = provider, + model = model.unwrap_or(""), + status = status.as_u16(), + failure = "non_2xx", + kind = "provider_user_state", + reason = "custom_openai_upstream_bad_request", + "[llm_provider] {operation} custom_openai upstream 400 — not reporting to Sentry" + ); +} + +pub fn log_provider_access_policy_denied_http_403( + operation: &str, + provider: &str, + model: Option<&str>, + status: reqwest::StatusCode, +) { + tracing::info!( + domain = "llm_provider", + operation = operation, + provider = provider, + model = model.unwrap_or(""), + status = status.as_u16(), + failure = "non_2xx", + kind = "provider_access_policy", + "[llm_provider] {operation} provider access-policy 403 — not reporting to Sentry" + ); +} + +/// Whether a provider non-2xx response is a deterministic +/// **configuration-rejection** user-state error (unknown model id, +/// abstract tier leaked to a custom provider, model-specific temperature +/// constraint) that should be demoted from Sentry to an info log. +/// +/// Provider-aware (inverted polarity vs. the 401/403 backend rule): for +/// most config-rejection phrases the same body from the OpenHuman +/// **backend** stays Sentry-actionable — that would mean we sent our own +/// backend a bad request (a regression, e.g. #2079). Restricted to the +/// observed shapes (400 invalid-param / unknown-model, 404 +/// model-does-not-exist, 422 unprocessable); 408/429 are transient and +/// handled separately. +/// +/// **Exception: OpenAI-compatible "unknown model"** (`Model 'X' is not +/// available. Use GET /openai/v1/models …`). The OpenHuman backend now +/// emits this exact body for user-configured unknown model ids, so it is +/// user-state regardless of provider — the polarity guard is dropped for +/// this specific shape (TAURI-RUST-2Z1). See +/// [`super::is_openai_compatible_unknown_model_message`]. +pub fn is_provider_config_rejection_http( + status: reqwest::StatusCode, + provider: &str, + body: &str, +) -> bool { + if !matches!(status.as_u16(), 400 | 404 | 422) { + return false; + } + if !crate::openhuman::inference::provider::is_provider_config_rejection_message(body) { + return false; + } + // OpenAI-compatible "unknown model" body is user-state regardless of + // provider — both third-party `custom_openai` upstreams and our own + // OpenHuman backend now emit it for user-configured model ids that + // aren't in the registry (TAURI-RUST-2Z1). + if crate::openhuman::inference::provider::is_openai_compatible_unknown_model_message(body) { + return true; + } + // Remaining config-rejection phrases (DeepSeek `supported api model + // names are`, Moonshot `invalid temperature`, litellm envelopes, …) + // are intrinsically scoped to third-party providers — keep the + // polarity guard so a regression where our own backend emits one of + // those still reaches Sentry. + provider != openhuman_backend::PROVIDER_LABEL +} + +pub fn log_provider_config_rejection( + operation: &str, + provider: &str, + model: Option<&str>, + status: reqwest::StatusCode, +) { + tracing::info!( + domain = "llm_provider", + operation = operation, + provider = provider, + model = model.unwrap_or(""), + status = status.as_u16(), + failure = "non_2xx", + kind = "provider_config_rejection", + "[llm_provider] {operation} provider config-rejection ({status}) — \ + user model/param configuration, not reporting to Sentry" + ); +} + +/// Whether a provider error body indicates the request exceeded the model's +/// context window (the conversation/prompt is too long for the configured +/// model). This is a deterministic user-state / usage condition — the +/// remediation is "start a new chat, trim the conversation, or pick a +/// larger-context model" — not a product bug. Sentry has no signal to act +/// on. +/// +/// Single source of truth for the context-overflow phrasing, shared by: +/// - [`super::reliable`]'s non-retryable classifier (retrying the same +/// oversized request can't help), +/// - the [`api_error`] Sentry-suppression cascade (below), and +/// - the `core::observability` `ContextWindowExceeded` classifier (which +/// catches the higher-layer re-report under `domain=agent` / +/// `web_channel`). +/// +/// Status-agnostic on purpose: providers disagree on the HTTP code for this +/// condition — OpenAI / most emit `400 context_length_exceeded`, but some +/// custom / self-hosted gateways mis-report it as `500` (Sentry +/// TAURI-RUST-501: `"custom API error (500 …): Context size has been +/// exceeded."`). Matching on the body keeps all of them in one bucket. +/// +/// Anchoring is deliberately two-tier because this matcher now also feeds +/// `core::observability::expected_error_kind` (Sentry suppression) and the +/// `reliable` non-retryable decision, so an over-broad match would both +/// hide a real error from Sentry *and* wrongly mark a retryable error as +/// permanent: +/// +/// - **Length/context phrases** ([`CONTEXT_HINTS`]) are unambiguous — +/// "context window", "context length", "prompt is too long" only describe +/// request-size overflow — so they match alone. +/// - **Token-count phrases** ([`TOKEN_HINTS`]) collide with per-minute token +/// *rate* limits ("rate limit reached … too many tokens per min"), which +/// are transient 429s that MUST stay retryable and keep reaching Sentry. +/// They only count as context-overflow when no rate-limit marker is +/// present. +pub fn is_context_window_exceeded_message(body: &str) -> bool { + let lower = body.to_ascii_lowercase(); + + // Unambiguous request-size / context phrases — match on their own. + const CONTEXT_HINTS: &[&str] = &[ + "exceeds the context window", + "context window of this model", + "maximum context length", + "context length exceeded", + "context size has been exceeded", + "prompt is too long", + "input is too long", + ]; + if CONTEXT_HINTS.iter().any(|hint| lower.contains(hint)) { + return true; + } + + // Token-count phrases are ambiguous with token-per-minute RATE limits. + // Treat them as context-overflow only when the body carries no + // rate-limit marker — otherwise a transient TPM 429 would be silenced + // from Sentry and (via `reliable`) wrongly classified as non-retryable. + const TOKEN_HINTS: &[&str] = &["too many tokens", "token limit exceeded"]; + if TOKEN_HINTS.iter().any(|hint| lower.contains(hint)) { + const RATE_LIMIT_MARKERS: &[&str] = &[ + "per minute", + "per min", + "rate limit", + "rate_limit", + "tpm", + "requests per", + "retry after", + "try again in", + ]; + return !RATE_LIMIT_MARKERS + .iter() + .any(|marker| lower.contains(marker)); + } + + false +} + +pub fn log_context_window_exceeded( + operation: &str, + provider: &str, + model: Option<&str>, + status: reqwest::StatusCode, +) { + tracing::warn!( + domain = "llm_provider", + operation = operation, + provider = provider, + model = model.unwrap_or(""), + status = status.as_u16(), + failure = "non_2xx", + kind = "context_window_exceeded", + "[llm_provider] {operation} context-window exceeded ({status}) — \ + request too long for the model, not reporting to Sentry" + ); +} + +/// Whether a provider non-2xx response is the OpenHuman **backend** rejecting +/// the app session JWT (`401`/`403`). This is expected user-session state +/// (token expired / revoked / rotated server-side), not a product bug — the +/// auth domain owns recovery. `401`/`403` from **other** providers (OpenAI, +/// Anthropic, …) mean a misconfigured BYO API key and stay Sentry-actionable, +/// so the predicate is provider-scoped to [`openhuman_backend::PROVIDER_LABEL`]. +pub fn is_backend_auth_failure(provider: &str, status: reqwest::StatusCode) -> bool { + matches!(status.as_u16(), 401 | 403) && provider == openhuman_backend::PROVIDER_LABEL +} + +/// Handle a backend session-expiry auth failure: publish a +/// [`crate::core::event_bus::DomainEvent::SessionExpired`] so the credentials +/// subscriber clears the session and flips the scheduler-gate signed-out +/// override (halting downstream LLM work — see OPENHUMAN-TAURI-1T), and skip +/// the Sentry report. Mirrors the `is_auth_failure && is_backend` arm in +/// [`api_error`], factored out for the hand-rolled provider HTTP-error chains +/// in [`super::compatible::OpenAiCompatibleProvider`] which consume the +/// response body inline and so can't delegate to `api_error`. The +/// `chat_completions` chain lacked this branch and reported the backend +/// `401 Invalid token` to Sentry — that drift was TAURI-RUST-N. +/// +/// `message` is the already-formatted `"{provider} API error ({status}): …"` +/// string; it embeds the sanitized body, but the prefix and caller-controlled +/// provider name aren't scrubbed, so re-run [`sanitize_api_error`] on the final +/// string before it reaches the SessionExpired subscriber's logs. +pub fn publish_backend_session_expired( + operation: &str, + provider: &str, + status: reqwest::StatusCode, + message: &str, +) { + tracing::warn!( + domain = "llm_provider", + operation = operation, + provider = provider, + status = status.as_u16(), + "[llm_provider] backend auth failure ({status}) — publishing SessionExpired" + ); + crate::core::event_bus::publish_global(crate::core::event_bus::DomainEvent::SessionExpired { + source: "llm_provider.openhuman_backend".to_string(), + reason: sanitize_api_error(message), + }); +} + +/// Build a sanitized provider error from a failed HTTP response. +/// +/// Reports the failure to Sentry with `provider` and `status` tags so +/// upstream LLM errors are visible in observability without every call-site +/// having to remember to log — except for: +/// +/// - **Transient statuses** (429 — see [`should_report_provider_http_failure`]). +/// These get retried by the reliable-provider layer and don't deserve a +/// per-attempt Sentry event. +/// - **401/403 from the OpenHuman backend provider** — the user's app session +/// expired. That is expected user-state, not a server bug, and reporting it +/// spams Sentry (OPENHUMAN-TAURI-1T: 5,414 events from a single user whose +/// cron loops kept firing post-expiry). Instead we publish a +/// [`crate::core::event_bus::DomainEvent::SessionExpired`] so the credentials +/// subscriber clears the session and flips the scheduler-gate signed-out +/// override, halting downstream LLM work. 401/403 from **other** providers +/// (OpenAI, Anthropic, …) still go to Sentry — those mean a misconfigured +/// API key, which is actionable. +/// - **Provider config-rejection** (4xx unknown-model / abstract-tier / +/// model-specific temperature) from a **non-backend** provider — the +/// user pointed a custom provider at a model/param it doesn't accept. +/// Deterministic user-config state, surfaced in the UI; demoted to an +/// info log (#2079 / #2076 / #2202). See +/// [`is_provider_config_rejection_http`]. +pub async fn api_error(provider: &str, response: reqwest::Response) -> anyhow::Error { + let status = response.status(); + let status_str = status.as_u16().to_string(); + let body = response + .text() + .await + .unwrap_or_else(|_| "".to_string()); + let sanitized = sanitize_api_error(&body); + let message = format!("{provider} API error ({status}): {sanitized}"); + + let is_auth_failure = matches!(status.as_u16(), 401 | 403); + let is_backend = provider == openhuman_backend::PROVIDER_LABEL; + let is_budget_exhausted_user_state = is_budget_exhausted_http_400(status, &body); + let is_custom_openai_upstream_bad_request = + is_custom_openai_upstream_bad_request_http_400(provider, status, &body); + let is_provider_access_policy_denied = is_provider_access_policy_denied_http_403(status, &body); + let is_provider_config_rejection = is_provider_config_rejection_http(status, provider, &body); + // Context-overflow is status-agnostic: match the body directly (some + // custom gateways mis-report it as 500 — TAURI-RUST-501 — so a status + // gate would let those through to `should_report_provider_http_failure`). + let is_context_window_exceeded = is_context_window_exceeded_message(&body); + + if is_auth_failure && is_backend { + // Single source of truth for backend session-expiry handling (warn + + // SessionExpired publish + final-string sanitize) — shared with the + // hand-rolled `chat_completions` chain in `compatible.rs`. + publish_backend_session_expired("api_error", provider, status, &message); + } else if is_budget_exhausted_user_state { + log_budget_exhausted_http_400("api_error", provider, None, status); + } else if is_custom_openai_upstream_bad_request { + log_custom_openai_upstream_bad_request_http_400("api_error", provider, None, status); + } else if is_provider_access_policy_denied { + log_provider_access_policy_denied_http_403("api_error", provider, None, status); + } else if is_provider_config_rejection { + log_provider_config_rejection("api_error", provider, None, status); + } else if is_context_window_exceeded { + log_context_window_exceeded("api_error", provider, None, status); + } else if should_report_provider_http_failure(status) { + crate::core::observability::report_error( + message.as_str(), + "llm_provider", + "api_error", + &[ + ("provider", provider), + ("status", status_str.as_str()), + ("failure", "non_2xx"), + ], + ); + } + anyhow::anyhow!(message) +} diff --git a/src/openhuman/inference/provider/ops/mod.rs b/src/openhuman/inference/provider/ops/mod.rs new file mode 100644 index 0000000000..9e371a9209 --- /dev/null +++ b/src/openhuman/inference/provider/ops/mod.rs @@ -0,0 +1,57 @@ +//! Provider operations — split from a single `ops.rs` into sub-modules. +//! +//! Sub-modules: +//! - `sanitize` — secret scrubbing, error formatting +//! - `http_error` — HTTP error classification, Sentry routing, `api_error` +//! - `models` — model listing (`list_configured_models`, parsing) +//! - `provider_factory` — provider construction (`create_*`, `ProviderRuntimeOptions`) + +mod http_error; +mod models; +mod provider_factory; +mod sanitize; + +// ── public surface (preserves the original `pub use ops::*` contract) ── + +pub use sanitize::{ + format_anyhow_chain, format_error_chain, sanitize_api_error, scrub_secret_patterns, + MAX_API_ERROR_CHARS, +}; + +pub use http_error::{ + api_error, is_backend_auth_failure, is_budget_exhausted_http_400, + is_context_window_exceeded_message, is_custom_openai_upstream_bad_request_http_400, + is_provider_access_policy_denied_http_403, is_provider_config_rejection_http, + log_budget_exhausted_http_400, log_context_window_exceeded, + log_custom_openai_upstream_bad_request_http_400, log_provider_access_policy_denied_http_403, + log_provider_config_rejection, publish_backend_session_expired, + should_report_provider_http_failure, +}; + +pub use models::{ + append_query_param, is_openrouter_provider, list_configured_models, + list_configured_models_from_config, merge_openai_codex_model_hints, model_items_from_body, + parse_models_response, synthesize_local_runtime_entry, ModelInfo, +}; + +pub use provider_factory::{ + canonical_china_provider_name, create_backend_inference_provider, + create_intelligent_routing_provider, create_resilient_provider, + create_resilient_provider_with_options, create_routed_provider, + create_routed_provider_with_options, is_glm_alias, is_minimax_alias, is_moonshot_alias, + is_qianfan_alias, is_qwen_alias, is_qwen_oauth_alias, is_zai_alias, list_providers, + ProviderInfo, ProviderRuntimeOptions, INFERENCE_BACKEND_ID, +}; + +// ── test re-exports for ops_tests.rs ── + +#[cfg(test)] +pub(crate) use super::openai_codex::openai_codex_client_version; +#[cfg(test)] +pub(crate) use super::openhuman_backend; + +// ── test companion ── + +#[cfg(test)] +#[path = "../ops_tests.rs"] +mod tests; diff --git a/src/openhuman/inference/provider/ops/models.rs b/src/openhuman/inference/provider/ops/models.rs new file mode 100644 index 0000000000..752428d0a4 --- /dev/null +++ b/src/openhuman/inference/provider/ops/models.rs @@ -0,0 +1,480 @@ +use serde::Serialize; + +use super::super::openai_codex::{ + openai_codex_client_version, openai_codex_user_agent, resolve_openai_codex_routing, + OpenAiCodexRouting, OPENAI_CODEX_ACCOUNT_HEADER, OPENAI_CODEX_MODEL_HINTS, + OPENAI_CODEX_ORIGINATOR, OPENAI_CODEX_ORIGINATOR_HEADER, +}; +use super::sanitize::sanitize_api_error; + +#[derive(Debug, Serialize)] +pub struct ModelInfo { + pub id: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub owned_by: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub context_window: Option, +} + +pub async fn list_configured_models( + provider_id: &str, +) -> Result, String> { + let config = crate::openhuman::config::Config::load_or_init() + .await + .map_err(|e| e.to_string())?; + + list_configured_models_from_config(provider_id, &config).await +} + +pub async fn list_configured_models_from_config( + provider_id: &str, + config: &crate::openhuman::config::Config, +) -> Result, String> { + let provider_id = provider_id.trim().to_string(); + if provider_id.is_empty() { + return Err("provider_id must not be empty".to_string()); + } + + log::debug!("[providers][list_models] provider_id={}", provider_id); + + // Explicit `cloud_providers` entry wins (e.g. a user-pointed remote + // ollama box at https://ollama.example.com/v1). Falling back to the + // local-runtime synthesis below only happens when no entry matches. + let entry = config + .cloud_providers + .iter() + .find(|e| e.id == provider_id || e.slug == provider_id) + .cloned() + .or_else(|| synthesize_local_runtime_entry(&provider_id, config)) + .ok_or_else(|| format!("no cloud provider with id or slug '{}' found", provider_id))?; + + let api_key = + crate::openhuman::inference::provider::factory::lookup_key_for_slug(&entry.slug, config) + .unwrap_or_default(); + let api_key = api_key.trim().to_string(); + + let routing = resolve_openai_codex_routing(config, &entry.slug, &entry.endpoint, &api_key) + .unwrap_or_else(|err| { + log::warn!( + "[providers][list_models] openai codex routing unavailable; continuing with configured endpoint: {err}" + ); + OpenAiCodexRouting::standard(&entry.endpoint) + }); + + let mut models_url = format!("{}/models", routing.endpoint); + if routing.using_oauth { + models_url = + append_query_param(&models_url, "client_version", openai_codex_client_version()); + } + + log::debug!( + "[providers][list_models] fetching url={} slug={} codex_oauth={} account_id_header={}", + models_url, + entry.slug, + routing.using_oauth, + routing.account_id.is_some() + ); + + let client = crate::openhuman::config::build_runtime_proxy_client_with_timeouts( + "providers.list_models", + 30, + 10, + ); + + use crate::openhuman::config::schema::cloud_providers::AuthStyle; + if is_openrouter_provider(&entry) { + validate_openrouter_api_key(&client, &routing.endpoint, &api_key).await?; + } + + let mut request = client.get(&models_url); + if routing.using_oauth { + request = request + .header(reqwest::header::USER_AGENT, openai_codex_user_agent()) + .header(OPENAI_CODEX_ORIGINATOR_HEADER, OPENAI_CODEX_ORIGINATOR); + } + + request = match entry.auth_style { + AuthStyle::Bearer => { + if !api_key.is_empty() { + let mut r = request.header("Authorization", format!("Bearer {}", api_key)); + if let Some(account_id) = routing.account_id.as_deref() { + r = r.header(OPENAI_CODEX_ACCOUNT_HEADER, account_id); + } + r + } else { + request + } + } + AuthStyle::Anthropic => { + let mut r = request.header("anthropic-version", "2023-06-01"); + if !api_key.is_empty() { + r = r.header("x-api-key", &api_key); + } + r + } + AuthStyle::OpenhumanJwt => { + if !api_key.is_empty() { + request.header("Authorization", format!("Bearer {}", api_key)) + } else { + request + } + } + AuthStyle::None => request, + }; + + let response = request + .send() + .await + .map_err(|e| format!("[providers][list_models] HTTP request failed: {}", e))?; + + let status = response.status(); + if !status.is_success() { + let body = response.text().await.unwrap_or_default(); + let sanitized = sanitize_api_error(&body); + let truncated = crate::openhuman::util::truncate_with_ellipsis(&sanitized, 300); + return Err(format!( + "provider returned {}: {}", + status.as_u16(), + truncated + )); + } + + // TAURI-RUST-12: `response.json()` discards the body when decoding fails, + // so Sentry just sees `error decoding response body` with no clue what the + // server actually sent. In practice the offending body is HTML from a + // captive portal / corporate proxy login page, an upstream load-balancer + // 502 served as HTML with a `200 OK`, or a JSON parser tripping on a + // wrong-path endpoint. Read the body as text first, then parse, and + // surface a sanitized + truncated snippet so the failure is diagnosable + // from the error string alone. + let raw_body = response.text().await.map_err(|e| { + format!( + "[providers][list_models] failed to read response body: {}", + e + ) + })?; + let body: serde_json::Value = serde_json::from_str(&raw_body).map_err(|e| { + let sanitized = sanitize_api_error(&raw_body); + let snippet = crate::openhuman::util::truncate_with_ellipsis(&sanitized, 300); + format!( + "[providers][list_models] failed to parse JSON: {} (body: {})", + e, snippet + ) + })?; + + // OpenAI-compatible servers occasionally return HTTP 200 with an error + // payload instead of a 4xx (LM Studio does this for unknown paths like + // `/v11/models` — body `{"error":"Unexpected endpoint or method..."}`). + // Treat any top-level `error` field as a failure so the AI-panel probe + // doesn't silently accept a typo'd endpoint. + if let Some(err_field) = body.get("error") { + let msg = err_field + .as_str() + .map(|s| s.to_string()) + .or_else(|| { + err_field + .get("message") + .and_then(|m| m.as_str()) + .map(|s| s.to_string()) + }) + .unwrap_or_else(|| err_field.to_string()); + let sanitized = sanitize_api_error(&msg); + return Err(format!("provider returned error payload: {}", sanitized)); + } + + // Parse the OpenAI-compatible `/models` envelope into typed model + // entries. See `parse_models_response` for the distinct error shapes + // returned for "missing field" vs "field present but wrong type" + // (TAURI-RUST-4Y). The ChatGPT Codex backend uses a sibling `models` + // array keyed by `slug`, so that shape is accepted here too. + let mut models = parse_models_response(&body)?; + if routing.using_oauth { + merge_openai_codex_model_hints(&mut models); + } + + log::info!( + "[providers][list_models] slug={} fetched {} models", + entry.slug, + models.len() + ); + + Ok(crate::rpc::RpcOutcome::new( + serde_json::json!({ "models": models }), + vec![format!("fetched {} models", models.len())], + )) +} + +/// Parse the OpenAI-compatible `/models` response envelope, or the ChatGPT +/// Codex backend's sibling `models` envelope, into typed [`ModelInfo`] entries. +/// +/// Returns distinct errors for the three failure modes the wild has +/// produced in `inference_list_models` Sentry events: +/// +/// 1. **Missing `data`/`models` field** — endpoint isn't `/models`-compatible +/// (user typo'd the base URL, pointed at a vector-DB host, etc.). +/// 2. **`data`/`models` field present but wrong type** — provider returned +/// `{"object":"error","data":{…}}`, `{"data":null}`, or similar +/// non-array. The error names the actual JSON type so triage knows what +/// the provider sent. +/// 3. **Non-object top-level body** — provider returned a bare array, +/// string, etc. Caught explicitly so the parser doesn't silently +/// drop into the missing-data arm with a `` keys list. +/// +/// Per-entry parsing ignores entries that don't have a usable string id/slug +/// (lax on purpose — many OpenAI-compatible servers include malformed rows for +/// capabilities they don't fully implement). +pub fn parse_models_response(body: &serde_json::Value) -> Result, String> { + let obj = body.as_object().ok_or_else(|| { + format!( + "provider response is not a JSON object — endpoint is not OpenAI-compatible (got {} at top level)", + json_value_kind(body) + ) + })?; + + let (field_name, data_value) = obj + .get("data") + .map(|value| ("data", value)) + .or_else(|| obj.get("models").map(|value| ("models", value))) + .ok_or_else(|| { + let keys = obj.keys().cloned().collect::>().join(", "); + format!( + "provider response missing `data` or `models` field — endpoint is not OpenAI-compatible (got keys: {})", + keys + ) + })?; + + let data = data_value.as_array().ok_or_else(|| { + // Include the sibling `object` field if present — OpenAI-shaped + // servers set it to `"list"` on success and `"error"` (or omit) + // on failure, so its value is the fastest triage signal for + // future Sentry events on the wrong-type arm. + let object_field = obj + .get("object") + .map(|v| v.to_string()) + .unwrap_or_else(|| "".to_string()); + format!( + "provider response has `{}` field but it is {}, expected array — endpoint may be returning an error envelope (\"object\" = {})", + field_name, + json_value_kind(data_value), + object_field, + ) + })?; + + Ok(data + .iter() + .filter_map(model_info_from_catalog_item) + .collect()) +} + +/// Name the JSON value kind for use in `parse_models_response` error +/// messages. Mirrors `serde_json::Value::*` variants exactly so test +/// assertions on the rendered token (`object`/`string`/`null`/…) stay +/// in lock-step with the matcher. +fn json_value_kind(v: &serde_json::Value) -> &'static str { + match v { + serde_json::Value::Null => "null", + serde_json::Value::Bool(_) => "bool", + serde_json::Value::Number(_) => "number", + serde_json::Value::String(_) => "string", + serde_json::Value::Array(_) => "array", + serde_json::Value::Object(_) => "object", + } +} + +/// Synthesize a transient [`CloudProviderCreds`] entry for the well-known +/// local-runtime slugs (`ollama`, `lmstudio`) so [`list_configured_models`] +/// can probe their OpenAI-compatible `/v1/models` endpoint even when the +/// user has not registered a matching `cloud_providers` row. +/// +/// Background: the AI settings panel registers an `ollama` `cloud_providers` +/// entry when the user configures Ollama (see comment on +/// [`crate::openhuman::config::schema::cloud_providers::is_slug_reserved`]), +/// but in practice some users hit +/// `inference_list_models("ollama")` without that entry — config drift, +/// flush-vs-probe race, or upgrade from a build that only persisted +/// `config.local_ai.base_url`. Sentry TAURI-RUST-28Z captures this: +/// 24 events / 7d, all `domain=rpc, method=openhuman.inference_list_models, +/// operation=invoke_method`. Without this fallback, the dropdown surfaces +/// the bare `"no cloud provider with id or slug 'ollama' found"` error +/// (also visible in the Sentry breadcrumb) instead of returning models. +/// +/// Returns `None` for any slug that is not a recognized local-runtime +/// alias — callers continue down the normal "no cloud provider" error +/// path for `openai` / `anthropic` / opaque ids / typos. +pub fn synthesize_local_runtime_entry( + slug: &str, + config: &crate::openhuman::config::Config, +) -> Option { + use crate::openhuman::config::schema::cloud_providers::{AuthStyle, CloudProviderCreds}; + + let endpoint = match slug { + // Ollama's OpenAI-compatible surface at `/v1/models` returns + // the same `{"data": [...]}` shape the existing parser handles, so + // we route through that rather than the native `/api/tags`. + "ollama" => { + let base = crate::openhuman::inference::local::ollama_base_url_from_config(config); + format!("{}/v1", base.trim_end_matches('/')) + } + // `lm_studio_base_url` already ends in `/v1`. + "lmstudio" => crate::openhuman::inference::local::lm_studio::lm_studio_base_url(config), + _ => return None, + }; + + Some(CloudProviderCreds { + id: format!("synthetic_local_{slug}"), + slug: slug.to_string(), + label: slug.to_string(), + endpoint, + // Local runtimes accept unauthenticated requests on loopback. + // The probe at `/models` runs without an Authorization + // header — `lookup_key_for_slug` may still return a key, but + // `AuthStyle::None` ignores it (see auth-style match below). + auth_style: AuthStyle::None, + legacy_type: None, + default_model: None, + }) +} + +pub fn merge_openai_codex_model_hints(models: &mut Vec) { + let mut seen = models + .iter() + .map(|model| model.id.to_ascii_lowercase()) + .collect::>(); + + for id in OPENAI_CODEX_MODEL_HINTS { + if seen.insert(id.to_ascii_lowercase()) { + models.push(ModelInfo { + id: (*id).to_string(), + owned_by: Some("openai-codex".to_string()), + context_window: None, + }); + } + } +} + +pub fn is_openrouter_provider( + entry: &crate::openhuman::config::schema::cloud_providers::CloudProviderCreds, +) -> bool { + if entry.slug.eq_ignore_ascii_case("openrouter") { + return true; + } + + reqwest::Url::parse(&entry.endpoint) + .ok() + .and_then(|url| url.host_str().map(|host| host.to_ascii_lowercase())) + .is_some_and(|host| host == "openrouter.ai" || host.ends_with(".openrouter.ai")) +} + +pub fn append_query_param(url: &str, key: &str, value: &str) -> String { + if let Ok(mut parsed) = reqwest::Url::parse(url) { + parsed.query_pairs_mut().append_pair(key, value); + return parsed.to_string(); + } + + let separator = if url.contains('?') { '&' } else { '?' }; + format!("{url}{separator}{key}={value}") +} + +#[allow(dead_code)] +pub fn model_items_from_body(body: &serde_json::Value) -> Option> { + body.get("data") + .and_then(|d| d.as_array()) + .or_else(|| body.get("models").and_then(|d| d.as_array())) + .cloned() +} + +fn model_info_from_catalog_item(item: &serde_json::Value) -> Option { + if let Some(id) = item.as_str().map(str::trim).filter(|id| !id.is_empty()) { + return Some(ModelInfo { + id: id.to_string(), + owned_by: None, + context_window: None, + }); + } + + let id = item + .get("id") + .or_else(|| item.get("slug")) + .or_else(|| item.get("name")) + .and_then(|v| v.as_str()) + .map(str::trim) + .filter(|id| !id.is_empty())? + .to_string(); + let owned_by = item + .get("owned_by") + .or_else(|| item.get("owned_by_organization")) + .and_then(|v| v.as_str()) + .map(|s| s.to_string()); + let context_window = item + .get("context_length") + .or_else(|| item.get("context_window")) + .or_else(|| item.get("max_context_window")) + .and_then(|v| v.as_u64()); + Some(ModelInfo { + id, + owned_by, + context_window, + }) +} + +async fn validate_openrouter_api_key( + client: &reqwest::Client, + base: &str, + api_key: &str, +) -> Result<(), String> { + if api_key.is_empty() { + return Err("OpenRouter API key is required before enabling the provider".to_string()); + } + + let key_url = format!("{}/key", base); + log::debug!("[providers][list_models] validating OpenRouter API key"); + let response = client + .get(&key_url) + .header("Authorization", format!("Bearer {api_key}")) + .send() + .await + .map_err(|e| format!("[providers][list_models] OpenRouter key validation failed: {e}"))?; + + let status = response.status(); + let text = response.text().await.unwrap_or_default(); + if !status.is_success() { + let sanitized = sanitize_api_error(&text); + let truncated = crate::openhuman::util::truncate_with_ellipsis(&sanitized, 300); + log::debug!( + "[providers][list_models] OpenRouter key validation failed status={} body={}", + status.as_u16(), + truncated + ); + return Err(format!( + "OpenRouter key validation returned {}: {}", + status.as_u16(), + truncated + )); + } + + if let Ok(body) = serde_json::from_str::(&text) { + if let Some(err_field) = body.get("error") { + let msg = err_field + .as_str() + .map(|s| s.to_string()) + .or_else(|| { + err_field + .get("message") + .and_then(|m| m.as_str()) + .map(|s| s.to_string()) + }) + .unwrap_or_else(|| err_field.to_string()); + let sanitized = sanitize_api_error(&msg); + log::debug!( + "[providers][list_models] OpenRouter key validation returned error payload={}", + sanitized + ); + return Err(format!( + "OpenRouter key validation returned error payload: {}", + sanitized + )); + } + } + + Ok(()) +} diff --git a/src/openhuman/inference/provider/ops/provider_factory.rs b/src/openhuman/inference/provider/ops/provider_factory.rs new file mode 100644 index 0000000000..09dc0436b1 --- /dev/null +++ b/src/openhuman/inference/provider/ops/provider_factory.rs @@ -0,0 +1,329 @@ +use std::path::PathBuf; + +use super::super::{reliable, router, traits::Provider}; + +/// Fixed id for the single inference backend (OpenHuman API). +pub const INFERENCE_BACKEND_ID: &str = "openhuman"; + +#[derive(Debug, Clone)] +pub struct ProviderRuntimeOptions { + pub auth_profile_override: Option, + pub openhuman_dir: Option, + pub secrets_encrypt: bool, + pub reasoning_enabled: Option, +} + +impl Default for ProviderRuntimeOptions { + fn default() -> Self { + Self { + auth_profile_override: None, + openhuman_dir: None, + secrets_encrypt: true, + reasoning_enabled: None, + } + } +} + +/// Create the inference provider. +/// +/// - `inference_url`: optional custom OpenAI-compatible LLM endpoint +/// (`config.inference_url`). When set together with `api_key`, inference +/// talks directly to this URL — keeping product-backend traffic +/// (auth/billing/voice) on `backend_url` where it belongs. +/// - `backend_url`: the OpenHuman product backend URL (`config.api_url`). +/// Used by the fallback [`openhuman_backend::OpenHumanBackendProvider`] +/// which routes inference to `{backend}/openai/v1/...` with the app +/// session JWT. +/// - `api_key`: the API key for the custom inference endpoint. Ignored on +/// the OpenHuman fallback path (the backend uses a session JWT, not a +/// user-supplied key). +pub fn create_backend_inference_provider( + inference_url: Option<&str>, + backend_url: Option<&str>, + api_key: Option<&str>, + options: &ProviderRuntimeOptions, +) -> anyhow::Result> { + if let (Some(url), Some(key)) = (inference_url, api_key) { + log::info!( + "[providers] inference target = custom_openai @ {} (api_key bytes={})", + url, + key.len() + ); + Ok(Box::new( + crate::openhuman::inference::provider::compatible::OpenAiCompatibleProvider::new_no_responses_fallback( + "custom_openai", + url, + Some(key), + crate::openhuman::inference::provider::compatible::AuthStyle::Bearer, + ), + )) + } else { + if api_key.is_some() && inference_url.is_none() { + log::warn!( + "[providers] api_key provided without inference_url — key will be ignored, using OpenHuman backend" + ); + } + log::info!( + "[providers] inference target = openhuman_backend (backend_url={}, inference_url_set={}, api_key_set={})", + backend_url.unwrap_or(""), + inference_url.is_some(), + api_key.is_some() + ); + Ok(Box::new( + crate::openhuman::inference::provider::openhuman_backend::OpenHumanBackendProvider::new( + backend_url, + options, + ), + )) + } +} + +/// Create provider chain with retry and fallback behavior. +pub fn create_resilient_provider( + inference_url: Option<&str>, + backend_url: Option<&str>, + api_key: Option<&str>, + reliability: &crate::openhuman::config::ReliabilityConfig, +) -> anyhow::Result> { + create_resilient_provider_with_options( + inference_url, + backend_url, + api_key, + reliability, + &ProviderRuntimeOptions::default(), + ) +} + +/// Create provider chain with retry/fallback behavior and auth runtime options. +pub fn create_resilient_provider_with_options( + inference_url: Option<&str>, + backend_url: Option<&str>, + api_key: Option<&str>, + reliability: &crate::openhuman::config::ReliabilityConfig, + options: &ProviderRuntimeOptions, +) -> anyhow::Result> { + if !reliability.fallback_providers.is_empty() { + tracing::warn!( + "reliability.fallback_providers is ignored; inference uses only the OpenHuman backend" + ); + } + + let primary_provider = + create_backend_inference_provider(inference_url, backend_url, api_key, options)?; + let providers: Vec<(String, Box)> = + vec![(INFERENCE_BACKEND_ID.to_string(), primary_provider)]; + + let reliable = reliable::ReliableProvider::new( + providers, + reliability.provider_retries, + reliability.provider_backoff_ms, + ) + .with_model_fallbacks(reliability.model_fallbacks.clone()); + + Ok(Box::new(reliable)) +} + +/// Create a RouterProvider if model routes are configured, otherwise return a resilient provider. +pub fn create_routed_provider( + inference_url: Option<&str>, + backend_url: Option<&str>, + api_key: Option<&str>, + reliability: &crate::openhuman::config::ReliabilityConfig, + model_routes: &[crate::openhuman::config::ModelRouteConfig], + default_model: &str, +) -> anyhow::Result> { + create_routed_provider_with_options( + inference_url, + backend_url, + api_key, + reliability, + model_routes, + default_model, + &ProviderRuntimeOptions::default(), + ) +} + +pub fn create_routed_provider_with_options( + inference_url: Option<&str>, + backend_url: Option<&str>, + api_key: Option<&str>, + reliability: &crate::openhuman::config::ReliabilityConfig, + model_routes: &[crate::openhuman::config::ModelRouteConfig], + default_model: &str, + options: &ProviderRuntimeOptions, +) -> anyhow::Result> { + if model_routes.is_empty() { + return create_resilient_provider_with_options( + inference_url, + backend_url, + api_key, + reliability, + options, + ); + } + + let backend = create_backend_inference_provider(inference_url, backend_url, api_key, options)?; + let providers: Vec<(String, Box)> = + vec![(INFERENCE_BACKEND_ID.to_string(), backend)]; + + let routes: Vec<(String, router::Route)> = model_routes + .iter() + .map(|r| { + ( + r.hint.clone(), + router::Route { + provider_name: INFERENCE_BACKEND_ID.to_string(), + model: r.model.clone(), + context_window: + crate::openhuman::inference::model_context::context_window_for_model( + &r.model, + ), + }, + ) + }) + .collect(); + + Ok(Box::new(router::RouterProvider::new( + providers, + routes, + default_model.to_string(), + ))) +} + +/// Create a provider with intelligent local/remote routing. +/// +/// When `config.local_ai.runtime_enabled` is `true` and Ollama is reachable, +/// lightweight and medium tasks (e.g. `hint:reaction`, `hint:summarize`) are +/// served by the local model. Heavy tasks (`hint:reasoning`, `hint:agentic`, +/// `hint:coding`) always go to the remote backend. A health-gated fallback +/// transparently promotes failed local calls to the remote backend. +/// +/// Telemetry for every routing decision is emitted at `INFO` level under the +/// `"routing"` tracing target. +pub fn create_intelligent_routing_provider( + inference_url: Option<&str>, + backend_url: Option<&str>, + api_key: Option<&str>, + config: &crate::openhuman::config::Config, + options: &ProviderRuntimeOptions, +) -> anyhow::Result> { + let raw_backend = + create_backend_inference_provider(inference_url, backend_url, api_key, options)?; + // Wrap the raw backend in ReliableProvider so transient 502/503/504 errors + // are retried before propagating to the agent turn. Without this, a single + // 502 from the backend bypasses the retry layer entirely and surfaces as a + // fatal `run_single` failure. + log::debug!( + "[providers] initialising reliable wrapper: retries={} backoff_ms={} fallbacks={}", + config.reliability.provider_retries, + config.reliability.provider_backoff_ms, + config.reliability.model_fallbacks.len() + ); + let reliable_backend: Box = Box::new( + reliable::ReliableProvider::new( + vec![(INFERENCE_BACKEND_ID.to_string(), raw_backend)], + config.reliability.provider_retries, + config.reliability.provider_backoff_ms, + ) + .with_model_fallbacks(config.reliability.model_fallbacks.clone()), + ); + let default_model = config + .default_model + .as_deref() + .unwrap_or(crate::openhuman::config::DEFAULT_MODEL); + + // When the user has configured `model_routes` (custom provider via + // BackendProviderPanel), wrap the reliable remote in a RouterProvider so + // abstract tier names like `reasoning-v1` get translated to the configured + // provider-specific model id (e.g. `gpt-5.5`) BEFORE the request leaves + // the host. Without this step the abstract tier name would reach + // `custom_openai` and 404. The OpenHuman backend can dispatch tier names + // natively, so we skip the wrap when routes are empty. + log::info!( + "[providers] intelligent routing: model_routes_count={} default_model={} inference_url_set={}", + config.model_routes.len(), + default_model, + inference_url.is_some() + ); + let remote: Box = if config.model_routes.is_empty() { + reliable_backend + } else { + let providers: Vec<(String, Box)> = + vec![(INFERENCE_BACKEND_ID.to_string(), reliable_backend)]; + let routes: Vec<(String, router::Route)> = config + .model_routes + .iter() + .map(|r| { + ( + r.hint.clone(), + router::Route { + provider_name: INFERENCE_BACKEND_ID.to_string(), + model: r.model.clone(), + context_window: + crate::openhuman::inference::model_context::context_window_for_model( + &r.model, + ), + }, + ) + }) + .collect(); + Box::new(router::RouterProvider::new( + providers, + routes, + default_model.to_string(), + )) + }; + + let provider = crate::openhuman::routing::new_provider( + remote, + &config.local_ai, + default_model, + &config.temperature_unsupported_models, + ); + Ok(Box::new(provider)) +} + +/// Information about a supported provider for display purposes. +pub struct ProviderInfo { + pub name: &'static str, + pub display_name: &'static str, + pub aliases: &'static [&'static str], + pub local: bool, +} + +/// Return known providers for display (single backend path). +pub fn list_providers() -> Vec { + vec![ProviderInfo { + name: INFERENCE_BACKEND_ID, + display_name: "OpenHuman (backend)", + aliases: &["backend", "openhuman-backend"], + local: false, + }] +} + +// Legacy provider alias stubs (integrations / config); remote providers were removed. +pub fn is_glm_alias(_name: &str) -> bool { + false +} +pub fn is_zai_alias(_name: &str) -> bool { + false +} +pub fn is_minimax_alias(_name: &str) -> bool { + false +} +pub fn is_moonshot_alias(_name: &str) -> bool { + false +} +pub fn is_qianfan_alias(_name: &str) -> bool { + false +} +pub fn is_qwen_alias(_name: &str) -> bool { + false +} +pub fn is_qwen_oauth_alias(_name: &str) -> bool { + false +} +pub fn canonical_china_provider_name(_name: &str) -> Option<&'static str> { + let _ = _name; + None +} diff --git a/src/openhuman/inference/provider/ops/sanitize.rs b/src/openhuman/inference/provider/ops/sanitize.rs new file mode 100644 index 0000000000..d5adcfa104 --- /dev/null +++ b/src/openhuman/inference/provider/ops/sanitize.rs @@ -0,0 +1,88 @@ +use crate::openhuman::util; + +pub const MAX_API_ERROR_CHARS: usize = 200; +const TRANSPORT_ERROR_MAX_CHARS: usize = 1200; + +fn is_secret_char(c: char) -> bool { + c.is_ascii_alphanumeric() || matches!(c, '-' | '_' | '.' | ':') +} + +fn token_end(input: &str, from: usize) -> usize { + let mut end = from; + for (i, c) in input[from..].char_indices() { + if is_secret_char(c) { + end = from + i + c.len_utf8(); + } else { + break; + } + } + end +} + +/// Scrub known secret-like token prefixes from provider error strings. +pub fn scrub_secret_patterns(input: &str) -> String { + const PREFIXES: [&str; 7] = [ + "sk-", + "xoxb-", + "xoxp-", + "ghp_", + "gho_", + "ghu_", + "github_pat_", + ]; + + let mut scrubbed = input.to_string(); + + for prefix in PREFIXES { + let mut search_from = 0; + loop { + let Some(rel) = scrubbed[search_from..].find(prefix) else { + break; + }; + + let start = search_from + rel; + let content_start = start + prefix.len(); + let end = token_end(&scrubbed, content_start); + + if end == content_start { + search_from = content_start; + continue; + } + + scrubbed.replace_range(start..end, "[REDACTED]"); + search_from = start + "[REDACTED]".len(); + } + } + + scrubbed +} + +/// Sanitize API error text by scrubbing secrets and truncating length. +pub fn sanitize_api_error(input: &str) -> String { + let scrubbed = scrub_secret_patterns(input); + util::truncate_with_ellipsis(&scrubbed, MAX_API_ERROR_CHARS) +} + +/// Full `source()` chain for connection / TLS failures (scrubbed, longer than API body snippets). +pub fn format_error_chain(err: &dyn std::error::Error) -> String { + let mut parts: Vec = vec![err.to_string()]; + let mut src = std::error::Error::source(err); + while let Some(e) = src { + parts.push(e.to_string()); + src = std::error::Error::source(e); + } + let joined = parts.join(" | "); + let scrubbed = scrub_secret_patterns(&joined); + util::truncate_with_suffix(&scrubbed, TRANSPORT_ERROR_MAX_CHARS, "…") +} + +/// Cause chain from [`anyhow::Error`] (e.g. responses fallback), scrubbed and length-limited. +pub fn format_anyhow_chain(err: &anyhow::Error) -> String { + let joined = err + .chain() + .map(|e| e.to_string()) + .collect::>() + .join(" | "); + let scrubbed = scrub_secret_patterns(&joined); + util::truncate_with_suffix(&scrubbed, TRANSPORT_ERROR_MAX_CHARS, "…") +} diff --git a/src/openhuman/mcp_server/tools.rs b/src/openhuman/mcp_server/tools.rs deleted file mode 100644 index 4a5a97f889..0000000000 --- a/src/openhuman/mcp_server/tools.rs +++ /dev/null @@ -1,1438 +0,0 @@ -use serde_json::{json, Map, Value}; - -use crate::core::all; -use crate::openhuman::agent::harness::AgentDefinitionRegistry; -use crate::openhuman::agent::Agent; -use crate::openhuman::config::rpc as config_rpc; -use crate::openhuman::inference::provider::traits::build_tool_instructions_text; -use crate::openhuman::security::{SecurityPolicy, ToolOperation}; -use crate::openhuman::tools::SEARXNG_MAX_RESULTS; - -use super::write_dispatch; - -const DEFAULT_LIMIT: u64 = 10; -const MAX_LIMIT: u64 = 50; -const QUERY_ARGUMENTS: &[&str] = &["query", "k"]; -const SEARXNG_SEARCH_ARGUMENTS: &[&str] = &["query", "categories", "language", "max_results"]; -const TREE_READ_CHUNK_ARGUMENTS: &[&str] = &["chunk_id"]; -const SUBAGENT_RUN_ARGUMENTS: &[&str] = &["agent_id", "prompt"]; -const TREE_BROWSE_ARGUMENTS: &[&str] = &[ - "source_kinds", - "source_ids", - "entity_ids", - "since_ms", - "until_ms", - "query", - "k", - "offset", -]; -const TREE_TOP_ENTITIES_ARGUMENTS: &[&str] = &["kind", "k"]; -const TREE_LIST_SOURCES_ARGUMENTS: &[&str] = &["user_email_hint"]; -const MEMORY_STORE_ARGUMENTS: &[&str] = &["title", "content", "namespace", "tags"]; -const MEMORY_NOTE_ARGUMENTS: &[&str] = &["chunk_id", "note_text"]; -const TREE_TAG_ARGUMENTS: &[&str] = &["chunk_id", "tags"]; -/// Upper bound on the number of tags `tree.tag` accepts per call. -/// Matches the "explicit rejection over silent clamping" pattern used -/// elsewhere in the MCP layer; prevents a misbehaving client from -/// flooding a chunk's tag-record document with thousands of entries. -const TREE_TAG_MAX_TAGS: usize = 50; -/// Upper bound on a single tag's character length. Tags are categorical -/// labels — anything past ~128 chars is almost certainly free-form text -/// that should be `memory.note` instead, so reject up-front to surface -/// the misuse rather than silently writing a giant token into the -/// queryable `tags` index. -const TREE_TAG_MAX_TAG_LENGTH: usize = 128; - -#[derive(Debug, Clone)] -pub struct McpToolSpec { - pub name: &'static str, - pub title: &'static str, - pub description: &'static str, - pub rpc_method: Option<&'static str>, - pub input_schema: Value, - /// MCP `ToolAnnotations` per the 2025-03-26+ spec — `readOnlyHint`, - /// `destructiveHint`, `idempotentHint`, `openWorldHint`. Hints, not - /// guarantees; clients use them to surface accurate safety affordances - /// (e.g. Claude Desktop's "this tool can take destructive actions" - /// confirmation gate). Per spec, destructive/idempotent are meaningful - /// only when `readOnlyHint == false`, so read-only tools omit them. - pub annotations: Value, -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum ToolCallError { - /// Client-side problem: malformed arguments, unknown tool, validation - /// failure. Maps to JSON-RPC `-32602 Invalid params`. - InvalidParams(String), - /// Server-side problem outside the caller's control: config load failure, - /// missing platform resources. Maps to JSON-RPC `-32603 Internal error`. - /// Kept distinct from `InvalidParams` so MCP clients don't display - /// internal failures as if the user supplied bad arguments. - Internal(String), -} - -impl ToolCallError { - pub fn message(&self) -> &str { - match self { - Self::InvalidParams(message) | Self::Internal(message) => message, - } - } - - /// JSON-RPC error code corresponding to this variant. - pub fn code(&self) -> i64 { - match self { - Self::InvalidParams(_) => -32602, - Self::Internal(_) => -32603, - } - } - - /// JSON-RPC error `message` field (short, spec-canonical phrase). The - /// human-readable detail belongs in the response's `data` field. - pub fn jsonrpc_message(&self) -> &'static str { - match self { - Self::InvalidParams(_) => "Invalid params", - Self::Internal(_) => "Internal error", - } - } -} - -pub fn tool_specs() -> Vec { - let mut specs = base_tool_specs(); - specs.push(searxng_tool_spec()); - specs -} - -fn base_tool_specs() -> Vec { - vec![ - McpToolSpec { - name: "core.list_tools", - title: "List Core Tools", - description: "List the live core agent tool catalog that OpenHuman exposes to its orchestrator session.", - rpc_method: None, - input_schema: no_args_schema(), - annotations: read_only_local_annotations(), - }, - McpToolSpec { - name: "core.tool_instructions", - title: "Get Tool Instructions", - description: "Emit the markdown tool-use instructions block that OpenHuman injects into prompt-guided agents.", - rpc_method: None, - input_schema: no_args_schema(), - annotations: read_only_local_annotations(), - }, - McpToolSpec { - name: "agent.list_subagents", - title: "List Subagents", - description: "List registered sub-agent definitions that the core can dispatch for specialized work.", - rpc_method: None, - input_schema: no_args_schema(), - annotations: read_only_local_annotations(), - }, - McpToolSpec { - name: "agent.run_subagent", - title: "Run Subagent", - description: "Run a registered OpenHuman sub-agent directly from the core and return its final response.", - rpc_method: None, - input_schema: json!({ - "type": "object", - "properties": { - "agent_id": { - "type": "string", - "description": "Registered sub-agent id (for example `researcher`, `planner`, `code_executor`)." - }, - "prompt": { - "type": "string", - "description": "Task prompt for the sub-agent. Include the context it needs because this is a fresh session." - } - }, - "required": ["agent_id", "prompt"], - "additionalProperties": false - }), - // Sub-agent execution is the one Act-policy surface on the MCP - // server today (see `enforce_act_policy` dispatch in `call_tool`). - // Sub-agents can call further tools, so destructive/openWorld are - // both true; running the same agent twice is not a no-op so - // idempotent is false. - annotations: json!({ - "readOnlyHint": false, - "destructiveHint": true, - "idempotentHint": false, - "openWorldHint": true - }), - }, - McpToolSpec { - name: "memory.search", - title: "Search Memory", - description: "Keyword-search OpenHuman's local memory tree and return matching chunks ordered by recency.", - rpc_method: Some("openhuman.memory_tree_search"), - input_schema: query_schema("Substring to match against stored memory chunks."), - annotations: read_only_local_annotations(), - }, - McpToolSpec { - name: "memory.recall", - title: "Recall Memory", - description: "Semantically recall local memory-tree chunks relevant to a natural-language query.", - rpc_method: Some("openhuman.memory_tree_recall"), - input_schema: query_schema("Natural-language query to embed and rerank against memory summaries."), - annotations: read_only_local_annotations(), - }, - McpToolSpec { - name: "tree.read_chunk", - title: "Read Memory Chunk", - description: "Read one memory-tree chunk by id. Use this to inspect the source text behind search or recall results.", - rpc_method: Some("openhuman.memory_tree_get_chunk"), - input_schema: json!({ - "type": "object", - "properties": { - "chunk_id": { - "type": "string", - "description": "Chunk id returned by memory.search or memory.recall." - } - }, - "required": ["chunk_id"], - "additionalProperties": false - }), - annotations: read_only_local_annotations(), - }, - McpToolSpec { - name: "tree.browse", - title: "Browse Memory", - description: "Paginated listing of memory-tree chunks in reverse-chronological order, \ - with optional filters by source kind, source id, entity id, time window, \ - and substring keyword. Use this when the user wants to enumerate (\"what's \ - recent in my Gmail\", \"show me everything from last week about Alice\") \ - rather than search by query. Returns chunks plus a total match count for \ - pagination.", - rpc_method: Some("openhuman.memory_tree_list_chunks"), - input_schema: tree_browse_schema(), - annotations: read_only_local_annotations(), - }, - McpToolSpec { - name: "tree.top_entities", - title: "Top Memory Entities", - description: "List the most-referenced canonical entities (people, organizations, \ - topics, emails) across the local memory tree. Call this for entity \ - discovery before drilling in with `tree.browse` (passing `entity_ids`) \ - or `memory.search`. Returns entities ordered by reference count.", - rpc_method: Some("openhuman.memory_tree_top_entities"), - input_schema: tree_top_entities_schema(), - annotations: read_only_local_annotations(), - }, - McpToolSpec { - name: "tree.list_sources", - title: "List Memory Sources", - description: "List every distinct ingest source (Gmail account, Slack channel, Notion \ - workspace, email thread, …) that has data in the memory tree, with \ - chunk counts and last-activity timestamps. Use this when the user asks \ - \"what data sources do I have\" or to discover source ids to pass into \ - `tree.browse`.", - rpc_method: Some("openhuman.memory_tree_list_sources"), - input_schema: tree_list_sources_schema(), - annotations: read_only_local_annotations(), - }, - McpToolSpec { - name: "memory.store", - title: "Store Memory", - description: "Create a new memory document from content. The document is stored in \ - the specified namespace (default `mcp`) and can be retrieved via \ - `memory.search` or `memory.recall`.", - rpc_method: Some("openhuman.memory_doc_put"), - input_schema: memory_store_schema(), - annotations: write_local_annotations(), - }, - McpToolSpec { - name: "memory.note", - title: "Annotate Memory Chunk", - description: "Append a note to an existing memory chunk by storing a linked annotation \ - document. The note references the original chunk_id for provenance and \ - can be retrieved alongside it.", - rpc_method: Some("openhuman.memory_doc_put"), - input_schema: memory_note_schema(), - annotations: write_local_annotations(), - }, - McpToolSpec { - name: "tree.tag", - title: "Tag Memory Chunk", - description: "Apply one or more category tags to an existing memory chunk. \ - Stored as an upsertable tag-record document linked to the target \ - chunk_id, so re-tagging the same chunk replaces the prior tag set \ - rather than accumulating duplicate annotations. Differs from \ - `memory.note` in that the payload is a categorical label list — \ - queryable via the document `tags` field — rather than free-form text.", - rpc_method: Some("openhuman.memory_doc_put"), - input_schema: tree_tag_schema(), - annotations: write_local_annotations(), - }, - ] -} - -/// Annotation preset for the read-only, closed-world tools that just read -/// OpenHuman's local memory tree or agent registry. The MCP spec defaults are -/// `readOnlyHint: false` / `openWorldHint: true`, so both fields must be set -/// explicitly to communicate the actual shape to clients. Destructive and -/// idempotent hints are deliberately omitted — per the spec they are -/// meaningful only when `readOnlyHint == false`. -fn read_only_local_annotations() -> Value { - json!({ - "readOnlyHint": true, - "openWorldHint": false - }) -} - -/// Annotation preset for the MCP write tools (`memory.store`, `memory.note`, -/// `tree.tag`) that upsert documents into OpenHuman's local memory tree. -/// Writes are keyed deterministically (slug-from-title, `mcp-note-`, -/// `mcp-tag-`) so repeating a call with identical arguments yields -/// the same stored state — `idempotentHint: true`. The upsert can replace a -/// previously stored document for the same key, which is a destructive update -/// in MCP-spec terms — `destructiveHint: true`. Local-only, no external I/O — -/// `openWorldHint: false`. -fn write_local_annotations() -> Value { - json!({ - "readOnlyHint": false, - "destructiveHint": true, - "idempotentHint": true, - "openWorldHint": false - }) -} - -fn searxng_tool_spec() -> McpToolSpec { - McpToolSpec { - name: "searxng_search", - title: "SearXNG Search", - description: "Search the configured self-hosted SearXNG instance and return normalized title, URL, snippet, and source results. Requires searxng.enabled=true in OpenHuman config.", - rpc_method: Some("openhuman.tools_searxng_search"), - input_schema: searxng_search_schema(), - // SearXNG queries an external (self-hosted but network-reachable) - // search engine: read-only (no state mutation), open-world (results - // come from outside OpenHuman). Per spec, destructive/idempotent - // hints are meaningful only when readOnlyHint=false, so omit them. - annotations: json!({ - "readOnlyHint": true, - "openWorldHint": true - }), - } -} - -fn tree_browse_schema() -> Value { - json!({ - "type": "object", - "properties": { - "source_kinds": { - "type": "array", - "items": { "type": "string" }, - "description": "Restrict to one or more source kinds (e.g. `email`, `chat`, `document`). Omit to include all kinds." - }, - "source_ids": { - "type": "array", - "items": { "type": "string" }, - "description": "Restrict to specific logical source ids (e.g. a Slack channel id). Use `tree.list_sources` to discover these." - }, - "entity_ids": { - "type": "array", - "items": { "type": "string" }, - "description": "Restrict to chunks referencing any of these canonical entity ids (e.g. `person:Alice`, `email:alice@example.com`). Use `tree.top_entities` to discover these." - }, - "since_ms": { - "type": "integer", - "minimum": 0, - "description": "Inclusive lower bound on chunk timestamp, in milliseconds since Unix epoch." - }, - "until_ms": { - "type": "integer", - "minimum": 0, - "description": "Inclusive upper bound on chunk timestamp, in milliseconds since Unix epoch." - }, - "query": { - "type": "string", - "minLength": 1, - "description": "Substring keyword filter over the chunk preview text." - }, - "k": { - "type": "integer", - "minimum": 1, - "maximum": MAX_LIMIT, - "description": format!("Maximum chunks per page. Defaults to {DEFAULT_LIMIT}; capped at {MAX_LIMIT}.") - }, - "offset": { - "type": "integer", - "minimum": 0, - "description": "Pagination offset (number of rows to skip). Defaults to 0." - } - }, - "required": [], - "additionalProperties": false - }) -} - -fn tree_top_entities_schema() -> Value { - json!({ - "type": "object", - "properties": { - "kind": { - "type": "string", - "minLength": 1, - "description": "Restrict to a single entity kind (`person`, `email`, `topic`, `org`, …). Omit to span all kinds." - }, - "k": { - "type": "integer", - "minimum": 1, - "maximum": MAX_LIMIT, - "description": format!("Maximum entities to return. Defaults to {DEFAULT_LIMIT}; capped at {MAX_LIMIT}.") - } - }, - "required": [], - "additionalProperties": false - }) -} - -fn tree_list_sources_schema() -> Value { - json!({ - "type": "object", - "properties": { - "user_email_hint": { - "type": "string", - "minLength": 1, - "description": "When provided, the user's own email is stripped from email-thread display names so the other party shows up instead. Optional." - } - }, - "required": [], - "additionalProperties": false - }) -} - -fn memory_store_schema() -> Value { - json!({ - "type": "object", - "properties": { - "title": { - "type": "string", - "minLength": 1, - "description": "Human-readable title for the memory document." - }, - "content": { - "type": "string", - "minLength": 1, - "description": "The text content to store as a memory document." - }, - "namespace": { - "type": "string", - "minLength": 1, - "description": "Namespace to store the document in. Defaults to `mcp` when omitted." - }, - "tags": { - "type": "array", - "items": { "type": "string" }, - "description": "Optional tags for categorisation and filtering." - } - }, - "required": ["title", "content"], - "additionalProperties": false - }) -} - -fn memory_note_schema() -> Value { - json!({ - "type": "object", - "properties": { - "chunk_id": { - "type": "string", - "minLength": 1, - "description": "ID of the memory chunk to annotate. Use an ID from memory.search or memory.recall results." - }, - "note_text": { - "type": "string", - "minLength": 1, - "description": "The note text to attach to the chunk." - } - }, - "required": ["chunk_id", "note_text"], - "additionalProperties": false - }) -} - -fn tree_tag_schema() -> Value { - json!({ - "type": "object", - "properties": { - "chunk_id": { - "type": "string", - "minLength": 1, - "description": "ID of the memory chunk to tag. Use an ID from `memory.search`, `memory.recall`, or `tree.browse` results." - }, - "tags": { - "type": "array", - "items": { - "type": "string", - "minLength": 1 - }, - "minItems": 1, - "description": "One or more category labels to attach (e.g. `[\"todo\", \"q3-planning\"]`). Re-tagging the same chunk replaces the prior tag set; supply the complete desired set on each call." - } - }, - "required": ["chunk_id", "tags"], - "additionalProperties": false - }) -} - -fn searxng_search_schema() -> Value { - json!({ - "type": "object", - "properties": { - "query": { - "type": "string", - "minLength": 1, - "description": "Search query string." - }, - "categories": { - "type": "array", - "items": { - "type": "string", - "enum": ["web", "general", "news", "images"] - }, - "description": "Optional SearXNG categories. `web` maps to SearXNG `general`." - }, - "language": { - "type": "string", - "minLength": 1, - "description": "Optional language code, e.g. `en`, `zh-CN`, or `fr`." - }, - "max_results": { - "type": "integer", - "minimum": 1, - "maximum": SEARXNG_MAX_RESULTS, - "description": format!("Maximum results to return. Defaults to searxng.max_results; capped at {SEARXNG_MAX_RESULTS}.") - } - }, - "required": ["query"], - "additionalProperties": false - }) -} - -pub async fn list_tools_result() -> Value { - match config_rpc::load_config_with_timeout().await { - Ok(config) => list_tools_result_for_config(&config), - Err(err) => { - log::warn!( - "[mcp_server] tools/list config load failed; omitting config-gated tools: {err}" - ); - list_tools_result_from_specs(base_tool_specs()) - } - } -} - -fn list_tools_result_for_config(config: &crate::openhuman::config::Config) -> Value { - let mut specs = base_tool_specs(); - if config.searxng.enabled { - specs.push(searxng_tool_spec()); - } - list_tools_result_from_specs(specs) -} - -fn list_tools_result_from_specs(specs: Vec) -> Value { - let tools = specs - .into_iter() - .map(|tool| { - json!({ - "name": tool.name, - "title": tool.title, - "description": tool.description, - "inputSchema": tool.input_schema, - "annotations": tool.annotations, - }) - }) - .collect::>(); - json!({ "tools": tools }) -} - -pub async fn call_tool( - name: &str, - arguments: Value, - client_info: &str, -) -> Result { - let spec = tool_specs() - .into_iter() - .find(|tool| tool.name == name) - .ok_or_else(|| ToolCallError::InvalidParams(format!("unknown MCP tool `{name}`")))?; - - let audit_arguments = arguments.clone(); - let mut params = match build_rpc_params(spec.name, arguments) { - Ok(params) => params, - Err(err) => { - if write_dispatch::is_write_tool(spec.name) { - write_dispatch::audit_write_rejection_without_config( - spec.name, - &audit_arguments, - client_info, - err.message(), - ); - } - return Err(err); - } - }; - match spec.name { - "core.list_tools" => { - reject_unexpected_arguments(¶ms, &[])?; - enforce_read_policy(spec.name).await?; - return list_core_tools().await; - } - "core.tool_instructions" => { - reject_unexpected_arguments(¶ms, &[])?; - enforce_read_policy(spec.name).await?; - return core_tool_instructions().await; - } - "agent.list_subagents" => { - reject_unexpected_arguments(¶ms, &[])?; - enforce_read_policy(spec.name).await?; - return list_subagents().await; - } - "agent.run_subagent" => { - enforce_act_policy(spec.name).await?; - return run_subagent_tool(¶ms).await; - } - "memory.store" | "memory.note" | "tree.tag" => { - let config = write_dispatch::load_write_config(spec.name).await?; - if let Err(err) = write_dispatch::enforce_write_policy_for_config(spec.name, &config) { - write_dispatch::audit_write_rejection( - &config, - spec.name, - &audit_arguments, - Some(¶ms), - client_info, - &err, - ); - return Err(err); - } - params.insert( - "source_type".to_string(), - Value::String(client_info.to_string()), - ); - if let Err(err) = validate_controller_params(&spec, ¶ms) { - write_dispatch::audit_write_rejection( - &config, - spec.name, - &audit_arguments, - Some(¶ms), - client_info, - &err, - ); - return Err(err); - } - return write_dispatch::dispatch_write_tool( - spec.name, - ¶ms, - &audit_arguments, - client_info, - &config, - ) - .await; - } - _ => {} - } - - validate_controller_params(&spec, ¶ms)?; - enforce_read_policy(spec.name).await?; - - let rpc_method = spec.rpc_method.ok_or_else(|| { - ToolCallError::Internal(format!( - "MCP tool `{}` is missing its RPC mapping", - spec.name - )) - })?; - - log::debug!( - "[mcp_server] tools/call dispatch tool={} rpc_method={} arg_keys={:?}", - spec.name, - rpc_method, - params.keys().collect::>() - ); - - match all::try_invoke_registered_rpc(rpc_method, params).await { - Some(Ok(value)) => { - log::debug!("[mcp_server] tools/call success tool={}", spec.name); - Ok(tool_success(value)) - } - Some(Err(message)) => { - log::warn!( - "[mcp_server] tools/call handler error tool={} error={}", - spec.name, - message - ); - Ok(tool_error(format!("{} failed: {message}", spec.name))) - } - None => { - log::error!( - "[mcp_server] tools/call mapping missing registered RPC method tool={} rpc_method={}", - spec.name, - rpc_method - ); - Ok(tool_error(format!( - "{} is unavailable: mapped RPC method `{}` is not registered", - spec.name, rpc_method - ))) - } - } -} - -fn no_args_schema() -> Value { - json!({ - "type": "object", - "properties": {}, - "additionalProperties": false - }) -} - -fn query_schema(query_description: &str) -> Value { - json!({ - "type": "object", - "properties": { - "query": { - "type": "string", - "description": query_description, - "minLength": 1 - }, - "k": { - "type": "integer", - "description": format!("Maximum chunks to return. Defaults to {DEFAULT_LIMIT}; capped at {MAX_LIMIT}."), - "minimum": 1, - "maximum": MAX_LIMIT - } - }, - "required": ["query"], - "additionalProperties": false - }) -} - -fn build_rpc_params( - tool_name: &str, - arguments: Value, -) -> Result, ToolCallError> { - let args = object_arguments(arguments)?; - match tool_name { - "core.list_tools" | "core.tool_instructions" | "agent.list_subagents" => { - reject_unexpected_arguments(&args, &[])?; - Ok(Map::new()) - } - "agent.run_subagent" => { - reject_unexpected_arguments(&args, SUBAGENT_RUN_ARGUMENTS)?; - let agent_id = required_non_empty_string(&args, "agent_id")?; - let prompt = required_non_empty_string(&args, "prompt")?; - Ok(Map::from_iter([ - ("agent_id".to_string(), Value::String(agent_id)), - ("prompt".to_string(), Value::String(prompt)), - ])) - } - "memory.search" | "memory.recall" => { - reject_unexpected_arguments(&args, QUERY_ARGUMENTS)?; - let query = required_non_empty_string(&args, "query")?; - let limit = optional_limit(&args)?; - Ok(Map::from_iter([ - ("query".to_string(), Value::String(query)), - ("k".to_string(), Value::from(limit)), - ])) - } - "searxng_search" => { - reject_unexpected_arguments(&args, SEARXNG_SEARCH_ARGUMENTS)?; - let query = required_non_empty_string(&args, "query")?; - let mut params = Map::new(); - params.insert("query".to_string(), Value::String(query)); - if let Some(categories) = optional_string_array(&args, "categories")? { - crate::openhuman::tools::normalize_categories(categories.clone()) - .map_err(|err| ToolCallError::InvalidParams(err.to_string()))?; - params.insert("categories".to_string(), Value::from(categories)); - } - if let Some(language) = optional_non_empty_string(&args, "language")? { - params.insert("language".to_string(), Value::String(language)); - } - if let Some(max_results) = optional_max_results(&args, "max_results")? { - params.insert("max_results".to_string(), Value::from(max_results)); - } - Ok(params) - } - "tree.read_chunk" => { - reject_unexpected_arguments(&args, TREE_READ_CHUNK_ARGUMENTS)?; - let chunk_id = required_non_empty_string(&args, "chunk_id")?; - Ok(Map::from_iter([( - "id".to_string(), - Value::String(chunk_id), - )])) - } - "tree.browse" => { - reject_unexpected_arguments(&args, TREE_BROWSE_ARGUMENTS)?; - let mut params = Map::new(); - // MCP-side `k` maps to the controller's `limit` and is capped at - // MAX_LIMIT for parity with the search / recall tools. The - // controller itself accepts up to 1000, but the MCP layer keeps - // the surface narrow so the LLM doesn't waste tokens pulling a - // huge page. - params.insert("limit".to_string(), Value::from(optional_limit(&args)?)); - if let Some(values) = optional_string_array(&args, "source_kinds")? { - params.insert("source_kinds".to_string(), Value::from(values)); - } - if let Some(values) = optional_string_array(&args, "source_ids")? { - params.insert("source_ids".to_string(), Value::from(values)); - } - if let Some(values) = optional_string_array(&args, "entity_ids")? { - params.insert("entity_ids".to_string(), Value::from(values)); - } - if let Some(value) = optional_i64(&args, "since_ms")? { - params.insert("since_ms".to_string(), Value::from(value)); - } - if let Some(value) = optional_i64(&args, "until_ms")? { - params.insert("until_ms".to_string(), Value::from(value)); - } - if let Some(value) = optional_non_empty_string(&args, "query")? { - params.insert("query".to_string(), Value::String(value)); - } - if let Some(value) = optional_u64(&args, "offset")? { - params.insert("offset".to_string(), Value::from(value)); - } - Ok(params) - } - "tree.top_entities" => { - reject_unexpected_arguments(&args, TREE_TOP_ENTITIES_ARGUMENTS)?; - // The controller's `limit` is required; default + cap at the MCP - // layer so the LLM doesn't have to know the underlying contract. - let mut params = Map::new(); - params.insert("limit".to_string(), Value::from(optional_limit(&args)?)); - if let Some(value) = optional_non_empty_string(&args, "kind")? { - params.insert("kind".to_string(), Value::String(value)); - } - Ok(params) - } - "tree.list_sources" => { - reject_unexpected_arguments(&args, TREE_LIST_SOURCES_ARGUMENTS)?; - let mut params = Map::new(); - if let Some(value) = optional_non_empty_string(&args, "user_email_hint")? { - params.insert("user_email_hint".to_string(), Value::String(value)); - } - Ok(params) - } - "memory.store" => { - reject_unexpected_arguments(&args, MEMORY_STORE_ARGUMENTS)?; - let title = required_non_empty_string(&args, "title")?; - let content = required_non_empty_string(&args, "content")?; - let namespace = - optional_non_empty_string(&args, "namespace")?.unwrap_or_else(|| "mcp".to_string()); - // Generate a deterministic key from the title for upsert dedup. - let key = format!("mcp-store-{}", slug_from(&title)); - let mut params = Map::new(); - params.insert("namespace".to_string(), Value::String(namespace)); - params.insert("key".to_string(), Value::String(key)); - params.insert("title".to_string(), Value::String(title)); - params.insert("content".to_string(), Value::String(content)); - params.insert("source_type".to_string(), Value::String("mcp".to_string())); - if let Some(tags) = optional_string_array(&args, "tags")? { - params.insert( - "tags".to_string(), - Value::Array(tags.into_iter().map(Value::String).collect()), - ); - } - Ok(params) - } - "memory.note" => { - reject_unexpected_arguments(&args, MEMORY_NOTE_ARGUMENTS)?; - let chunk_id = required_non_empty_string(&args, "chunk_id")?; - let note_text = required_non_empty_string(&args, "note_text")?; - let key = format!("mcp-note-{chunk_id}"); - let title = format!("Note on chunk {chunk_id}"); - let content = format!("[annotation for chunk_id={chunk_id}]\n\n{note_text}"); - let mut metadata = Map::new(); - metadata.insert("annotates_chunk_id".to_string(), Value::String(chunk_id)); - let mut params = Map::new(); - params.insert("namespace".to_string(), Value::String("mcp".to_string())); - params.insert("key".to_string(), Value::String(key)); - params.insert("title".to_string(), Value::String(title)); - params.insert("content".to_string(), Value::String(content)); - params.insert("source_type".to_string(), Value::String("mcp".to_string())); - params.insert("metadata".to_string(), Value::Object(metadata)); - Ok(params) - } - "tree.tag" => { - reject_unexpected_arguments(&args, TREE_TAG_ARGUMENTS)?; - let chunk_id = required_non_empty_string(&args, "chunk_id")?; - // `required_non_empty_string_array` checks both presence and - // that the resulting list isn't empty after trimming — keeps - // the LLM honest about supplying at least one label per call. - let tags = required_non_empty_string_array(&args, "tags")?; - // Cap the tag set to keep the tag-record document bounded: - // * `TREE_TAG_MAX_TAGS` rejects pathological cases where a - // misbehaving client floods one chunk with hundreds of - // labels (would also bloat the document tags index). - // * `TREE_TAG_MAX_TAG_LENGTH` rejects oversize labels that - // are almost certainly free-form text (which belongs in - // `memory.note`, not the categorical tag surface). - // Both reject up-front rather than silently truncating — same - // "explicit rejection" pattern as `required_non_empty_string_array`. - if tags.len() > TREE_TAG_MAX_TAGS { - return Err(ToolCallError::InvalidParams(format!( - "argument `tags` accepts at most {TREE_TAG_MAX_TAGS} entries (got {})", - tags.len() - ))); - } - if let Some(oversize) = tags.iter().find(|t| t.len() > TREE_TAG_MAX_TAG_LENGTH) { - return Err(ToolCallError::InvalidParams(format!( - "argument `tags` entry exceeds {TREE_TAG_MAX_TAG_LENGTH} bytes (got {} bytes)", - oversize.len() - ))); - } - // Deterministic key keyed on `chunk_id` (not on tag content) - // so re-tagging the same chunk upserts the prior tag-record - // document rather than accumulating duplicate annotations. - // This is the structural difference from `memory.note` - // (which keys on chunk_id too but is content-additive in - // intent; the LLM is expected to call note again to append). - let key = format!("mcp-tag-{chunk_id}"); - let title = format!("Tags for chunk {chunk_id}"); - let content = format!( - "[tag record for chunk_id={chunk_id}]\n\nApplied tags: {}", - tags.join(", ") - ); - // Build the tag list as a JSON array once, then share it - // between metadata.applied_tags and the top-level `tags` - // field. `tags_array.clone()` on the cached Value is the - // cheapest path — it clones each tag String once total, - // matching what an in-place double-collect would do. - let tags_array = Value::Array(tags.into_iter().map(Value::String).collect()); - let mut metadata = Map::new(); - metadata.insert("tags_for_chunk_id".to_string(), Value::String(chunk_id)); - // `applied_tags` mirrors `tags` for callers that consume the - // metadata view; the top-level `tags` field below feeds the - // document tags index (queryable through `doc_list` etc.). - metadata.insert("applied_tags".to_string(), tags_array.clone()); - let mut params = Map::new(); - params.insert("namespace".to_string(), Value::String("mcp".to_string())); - params.insert("key".to_string(), Value::String(key)); - params.insert("title".to_string(), Value::String(title)); - params.insert("content".to_string(), Value::String(content)); - params.insert("source_type".to_string(), Value::String("mcp".to_string())); - params.insert("tags".to_string(), tags_array); - params.insert("metadata".to_string(), Value::Object(metadata)); - Ok(params) - } - _ => Err(ToolCallError::InvalidParams(format!( - "unknown MCP tool `{tool_name}`" - ))), - } -} - -fn reject_unexpected_arguments( - args: &Map, - allowed: &[&str], -) -> Result<(), ToolCallError> { - let mut unexpected = args - .keys() - .filter(|key| !allowed.contains(&key.as_str())) - .cloned() - .collect::>(); - if unexpected.is_empty() { - return Ok(()); - } - unexpected.sort(); - Err(ToolCallError::InvalidParams(format!( - "unexpected argument `{}`", - unexpected.join("`, `") - ))) -} - -fn object_arguments(arguments: Value) -> Result, ToolCallError> { - match arguments { - Value::Null => Ok(Map::new()), - Value::Object(map) => Ok(map), - other => Err(ToolCallError::InvalidParams(format!( - "tools/call arguments must be an object, got {}", - json_type_name(&other) - ))), - } -} - -fn required_non_empty_string( - args: &Map, - key: &str, -) -> Result { - let raw = args.get(key).and_then(Value::as_str).ok_or_else(|| { - ToolCallError::InvalidParams(format!("missing required argument `{key}`")) - })?; - let trimmed = raw.trim(); - if trimmed.is_empty() { - return Err(ToolCallError::InvalidParams(format!( - "argument `{key}` must not be empty" - ))); - } - Ok(trimmed.to_string()) -} - -fn optional_non_empty_string( - args: &Map, - key: &str, -) -> Result, ToolCallError> { - let Some(value) = args.get(key) else { - return Ok(None); - }; - if value.is_null() { - return Ok(None); - } - let Some(raw) = value.as_str() else { - return Err(ToolCallError::InvalidParams(format!( - "argument `{key}` must be a string" - ))); - }; - let trimmed = raw.trim(); - if trimmed.is_empty() { - // Distinguish "absent" (Ok(None)) from "present but blank" — the - // latter is a client bug worth surfacing so the LLM can drop the - // field entirely on the next call instead of resending whitespace. - return Err(ToolCallError::InvalidParams(format!( - "argument `{key}` must not be empty when provided" - ))); - } - Ok(Some(trimmed.to_string())) -} - -fn optional_string_array( - args: &Map, - key: &str, -) -> Result>, ToolCallError> { - let Some(value) = args.get(key) else { - return Ok(None); - }; - if value.is_null() { - return Ok(None); - } - let Some(items) = value.as_array() else { - return Err(ToolCallError::InvalidParams(format!( - "argument `{key}` must be an array of strings, got {}", - json_type_name(value) - ))); - }; - let mut out = Vec::with_capacity(items.len()); - let mut dropped_blank = 0usize; - for item in items { - let Some(s) = item.as_str() else { - return Err(ToolCallError::InvalidParams(format!( - "argument `{key}` must contain only strings, got {} entry", - json_type_name(item) - ))); - }; - let trimmed = s.trim(); - if trimmed.is_empty() { - dropped_blank += 1; - continue; - } - out.push(trimmed.to_string()); - } - if dropped_blank > 0 { - // Visibility for the silent-drop behaviour: callers don't see how many - // entries were skipped, and a downstream "the filter didn't match" - // bug is much faster to triage when this trace is in the log. - log::trace!( - "[mcp_server] optional_string_array key={key} dropped_blank_entries={dropped_blank}" - ); - } - Ok(Some(out)) -} - -/// Variant of [`optional_string_array`] that errors when the field is -/// absent, null, or resolves to an empty list after blank-trim. -/// -/// Used by tools where supplying an empty `tags: []` is a no-op the -/// caller almost certainly didn't mean (e.g. `tree.tag`). The MCP layer -/// rejects it up-front instead of letting it through to the document -/// RPC where the failure mode is silent. -fn required_non_empty_string_array( - args: &Map, - key: &str, -) -> Result, ToolCallError> { - let trimmed = optional_string_array(args, key)?.ok_or_else(|| { - ToolCallError::InvalidParams(format!("missing required argument `{key}`")) - })?; - if trimmed.is_empty() { - return Err(ToolCallError::InvalidParams(format!( - "argument `{key}` must contain at least one non-empty string" - ))); - } - Ok(trimmed) -} - -fn optional_i64(args: &Map, key: &str) -> Result, ToolCallError> { - let Some(value) = args.get(key) else { - return Ok(None); - }; - if value.is_null() { - return Ok(None); - } - value.as_i64().map(Some).ok_or_else(|| { - ToolCallError::InvalidParams(format!( - "argument `{key}` must be an integer in the i64 range" - )) - }) -} - -fn optional_u64(args: &Map, key: &str) -> Result, ToolCallError> { - let Some(value) = args.get(key) else { - return Ok(None); - }; - if value.is_null() { - return Ok(None); - } - value.as_u64().map(Some).ok_or_else(|| { - ToolCallError::InvalidParams(format!("argument `{key}` must be a non-negative integer")) - }) -} - -fn optional_limit(args: &Map) -> Result { - let Some(value) = args.get("k") else { - return Ok(DEFAULT_LIMIT); - }; - let Some(limit) = value.as_u64() else { - return Err(ToolCallError::InvalidParams( - "argument `k` must be a positive integer".to_string(), - )); - }; - if limit == 0 { - return Err(ToolCallError::InvalidParams( - "argument `k` must be greater than zero".to_string(), - )); - } - if limit > MAX_LIMIT { - // Reject explicitly instead of silently clamping. The schema advertises - // `maximum: MAX_LIMIT`, so a higher value is a client bug; surfacing it - // lets the LLM self-correct on the next call instead of believing it - // received the page size it asked for. - return Err(ToolCallError::InvalidParams(format!( - "argument `k` must not exceed {MAX_LIMIT} (got {limit})" - ))); - } - Ok(limit) -} - -fn optional_max_results( - args: &Map, - key: &str, -) -> Result, ToolCallError> { - let Some(value) = args.get(key) else { - return Ok(None); - }; - if value.is_null() { - return Ok(None); - } - let Some(limit) = value.as_u64() else { - return Err(ToolCallError::InvalidParams(format!( - "argument `{key}` must be a positive integer" - ))); - }; - if limit == 0 { - return Err(ToolCallError::InvalidParams(format!( - "argument `{key}` must be greater than zero" - ))); - } - if limit > SEARXNG_MAX_RESULTS as u64 { - return Err(ToolCallError::InvalidParams(format!( - "argument `{key}` must not exceed {SEARXNG_MAX_RESULTS} (got {limit})" - ))); - } - Ok(Some(limit)) -} - -fn validate_controller_params( - spec: &McpToolSpec, - params: &Map, -) -> Result<(), ToolCallError> { - let rpc_method = spec.rpc_method.ok_or_else(|| { - ToolCallError::Internal(format!( - "MCP tool `{}` does not dispatch through RPC validation", - spec.name - )) - })?; - let schema = all::schema_for_rpc_method(rpc_method).ok_or_else(|| { - ToolCallError::InvalidParams(format!( - "mapped RPC method `{}` is not registered", - rpc_method - )) - })?; - all::validate_params(&schema, params).map_err(ToolCallError::InvalidParams) -} - -async fn enforce_read_policy(tool_name: &str) -> Result<(), ToolCallError> { - // Config-load failure is an internal/server issue (disk error, corrupt - // config), not bad client input — report it as `-32603 Internal error` - // rather than `-32602 Invalid params`. - let config = match config_rpc::load_config_with_timeout().await { - Ok(config) => config, - Err(err) => { - log::warn!( - "[mcp_server] enforce_read_policy config load failed tool={tool_name} error={err}" - ); - return Err(ToolCallError::Internal(format!( - "failed to load config: {err}" - ))); - } - }; - let policy = - SecurityPolicy::from_config(&config.autonomy, &config.workspace_dir, &config.action_dir); - // A policy denial *is* something the caller can act on (toggle autonomy, - // approve the tool) — keep that as `InvalidParams` so clients surface the - // reason text instead of a generic internal-error banner. - policy - .enforce_tool_operation(ToolOperation::Read, tool_name) - .map_err(ToolCallError::InvalidParams) -} - -async fn enforce_act_policy(tool_name: &str) -> Result<(), ToolCallError> { - let config = match config_rpc::load_config_with_timeout().await { - Ok(config) => config, - Err(err) => { - log::warn!( - "[mcp_server] enforce_act_policy config load failed tool={tool_name} error={err}" - ); - return Err(ToolCallError::Internal(format!( - "failed to load config: {err}" - ))); - } - }; - let policy = - SecurityPolicy::from_config(&config.autonomy, &config.workspace_dir, &config.action_dir); - policy - .enforce_tool_operation(ToolOperation::Act, tool_name) - .map_err(ToolCallError::InvalidParams) -} - -async fn load_config_and_init_registry() -> Result -{ - let config = config_rpc::load_config_with_timeout() - .await - .map_err(|err| ToolCallError::Internal(format!("failed to load config: {err}")))?; - AgentDefinitionRegistry::init_global(&config.workspace_dir).map_err(|err| { - ToolCallError::Internal(format!( - "failed to initialise AgentDefinitionRegistry: {err}" - )) - })?; - Ok(config) -} - -async fn build_orchestrator_agent() -> Result { - let config = load_config_and_init_registry().await?; - let mut agent = Agent::from_config_for_agent(&config, "orchestrator").map_err(|err| { - ToolCallError::Internal(format!("failed to build orchestrator agent: {err}")) - })?; - agent.fetch_connected_integrations().await; - let _ = agent.refresh_delegation_tools(); - Ok(agent) -} - -async fn list_core_tools() -> Result { - let agent = build_orchestrator_agent().await?; - let tools = agent - .tool_specs() - .iter() - .map(|spec| { - json!({ - "name": spec.name, - "description": spec.description, - "parameters": spec.parameters, - }) - }) - .collect::>(); - Ok(tool_success(json!({ "tools": tools }))) -} - -async fn core_tool_instructions() -> Result { - let agent = build_orchestrator_agent().await?; - Ok(tool_text_success(build_tool_instructions_text( - agent.tool_specs(), - ))) -} - -async fn list_subagents() -> Result { - let config = load_config_and_init_registry().await?; - let registry = AgentDefinitionRegistry::global().ok_or_else(|| { - ToolCallError::Internal("AgentDefinitionRegistry missing after init".to_string()) - })?; - - let definitions = registry - .list() - .into_iter() - .map(|def| { - json!({ - "id": def.id, - "display_name": def.display_name(), - "when_to_use": def.when_to_use, - "temperature": def.temperature, - "max_iterations": def.max_iterations, - "sandbox_mode": def.sandbox_mode, - "tool_scope": def.tools, - "subagents": def.subagents, - "source": def.source, - }) - }) - .collect::>(); - - let summary = format!( - "# OpenHuman Subagents\n\nWorkspace: `{}`\n\n{}", - config.workspace_dir.display(), - definitions - .iter() - .map(|def| { - let id = def.get("id").and_then(Value::as_str).unwrap_or(""); - let when = def.get("when_to_use").and_then(Value::as_str).unwrap_or(""); - format!("- **{id}**: {when}") - }) - .collect::>() - .join("\n") - ); - - Ok(json!({ - "content": [{ - "type": "text", - "text": summary, - }], - "structuredContent": { - "definitions": definitions, - } - })) -} - -async fn run_subagent_tool(params: &Map) -> Result { - let agent_id = required_non_empty_string(params, "agent_id")?; - let prompt = required_non_empty_string(params, "prompt")?; - if agent_id == "integrations_agent" { - return Err(ToolCallError::InvalidParams( - "agent.run_subagent does not yet support `integrations_agent`; first-level MCP support is currently limited to standalone agents that do not require toolkit binding".to_string(), - )); - } - - let config = load_config_and_init_registry().await?; - let mut agent = Agent::from_config_for_agent(&config, &agent_id).map_err(|err| { - ToolCallError::InvalidParams(format!("failed to build agent `{agent_id}`: {err}")) - })?; - agent.set_event_context( - format!("mcp:{}:{}", agent_id, uuid::Uuid::new_v4()), - "mcp_server", - ); - agent.fetch_connected_integrations().await; - let _ = agent.refresh_delegation_tools(); - - // The MCP server surface exposes openhuman agents to remote MCP - // clients. Treat callers as ExternalChannel — their prompt text is - // remote-controlled and any external_effect tool the agent tries to - // run must route through the gate's audit + TTL-deny path. - let origin = crate::openhuman::agent::turn_origin::AgentTurnOrigin::ExternalChannel { - channel: "mcp_server".to_string(), - // MCP server callers don't carry a per-user identity at this - // layer — the calling MCP client is the addressing primitive. - // Leave sender unset; the gate's per-channel TTL-deny still - // gates any external_effect tool the agent tries to run. - sender: None, - reply_target: agent_id.clone(), - message_id: uuid::Uuid::new_v4().to_string(), - }; - let response = - crate::openhuman::agent::turn_origin::with_origin(origin, agent.run_single(&prompt)) - .await - .map_err(|err| { - ToolCallError::Internal(format!("subagent `{agent_id}` failed: {err}")) - })?; - - Ok(json!({ - "content": [{ - "type": "text", - "text": response, - }], - "structuredContent": { - "agent_id": agent_id, - "response": response, - } - })) -} - -pub(super) fn tool_success(value: Value) -> Value { - json!({ - "content": [{ - "type": "text", - "text": serde_json::to_string_pretty(&value).unwrap_or_else(|_| value.to_string()), - }] - }) -} - -fn tool_text_success(text: String) -> Value { - json!({ - "content": [{ - "type": "text", - "text": text, - }] - }) -} - -pub(super) fn tool_error(message: String) -> Value { - json!({ - "content": [{ - "type": "text", - "text": message, - }], - "isError": true - }) -} - -/// Produce a URL-safe slug from a title for use as a document key. -/// Lowercases, replaces non-alphanumeric runs with a single hyphen, and -/// truncates at 64 characters. -fn slug_from(title: &str) -> String { - let slug: String = title - .chars() - .map(|c| { - if c.is_ascii_alphanumeric() { - c.to_ascii_lowercase() - } else { - '-' - } - }) - .collect(); - // Collapse runs of hyphens, trim leading/trailing. - let mut result = String::with_capacity(slug.len()); - let mut prev_hyphen = true; // treat start as hyphen to trim leading - for ch in slug.chars() { - if ch == '-' { - if !prev_hyphen { - result.push('-'); - } - prev_hyphen = true; - } else { - result.push(ch); - prev_hyphen = false; - } - } - // Trim trailing hyphen - while result.ends_with('-') { - result.pop(); - } - if result.len() > 64 { - result.truncate(64); - while result.ends_with('-') { - result.pop(); - } - } - if result.is_empty() { - // Fallback for titles with no ASCII-alphanumeric characters (e.g. - // Unicode-only titles like "会议记录" or "Протокол"). Use a short - // stable hash of the original title to ensure distinct slugs. - use sha2::{Digest, Sha256}; - let hash = hex::encode(&Sha256::digest(title.as_bytes())[..8]); - return format!("untitled-{hash}"); - } - result -} - -fn json_type_name(value: &Value) -> &'static str { - match value { - Value::Null => "null", - Value::Bool(_) => "bool", - Value::Number(_) => "number", - Value::String(_) => "string", - Value::Array(_) => "array", - Value::Object(_) => "object", - } -} - -#[cfg(test)] -#[path = "tools_tests.rs"] -mod tests; diff --git a/src/openhuman/mcp_server/tools/dispatch.rs b/src/openhuman/mcp_server/tools/dispatch.rs new file mode 100644 index 0000000000..f9661f4948 --- /dev/null +++ b/src/openhuman/mcp_server/tools/dispatch.rs @@ -0,0 +1,376 @@ +use serde_json::{json, Map, Value}; + +use crate::core::all; +use crate::openhuman::agent::harness::AgentDefinitionRegistry; +use crate::openhuman::agent::Agent; +use crate::openhuman::config::rpc as config_rpc; +use crate::openhuman::inference::provider::traits::build_tool_instructions_text; +use crate::openhuman::security::{SecurityPolicy, ToolOperation}; + +use super::super::write_dispatch; +use super::params::{build_rpc_params, validate_controller_params}; +use super::specs::{ + base_tool_specs, list_tools_result_for_config, list_tools_result_from_specs, searxng_tool_spec, + tool_specs, +}; +use super::types::ToolCallError; + +pub async fn list_tools_result() -> Value { + match config_rpc::load_config_with_timeout().await { + Ok(config) => list_tools_result_for_config(&config), + Err(err) => { + log::warn!( + "[mcp_server] tools/list config load failed; omitting config-gated tools: {err}" + ); + list_tools_result_from_specs(base_tool_specs()) + } + } +} + +pub async fn call_tool( + name: &str, + arguments: Value, + client_info: &str, +) -> Result { + let spec = tool_specs() + .into_iter() + .find(|tool| tool.name == name) + .ok_or_else(|| ToolCallError::InvalidParams(format!("unknown MCP tool `{name}`")))?; + + let audit_arguments = arguments.clone(); + let mut params = match build_rpc_params(spec.name, arguments) { + Ok(params) => params, + Err(err) => { + if write_dispatch::is_write_tool(spec.name) { + write_dispatch::audit_write_rejection_without_config( + spec.name, + &audit_arguments, + client_info, + err.message(), + ); + } + return Err(err); + } + }; + match spec.name { + "core.list_tools" => { + enforce_read_policy(spec.name).await?; + return list_core_tools().await; + } + "core.tool_instructions" => { + enforce_read_policy(spec.name).await?; + return core_tool_instructions().await; + } + "agent.list_subagents" => { + enforce_read_policy(spec.name).await?; + return list_subagents().await; + } + "agent.run_subagent" => { + enforce_act_policy(spec.name).await?; + return run_subagent_tool(¶ms).await; + } + "memory.store" | "memory.note" | "tree.tag" => { + let config = write_dispatch::load_write_config(spec.name).await?; + if let Err(err) = write_dispatch::enforce_write_policy_for_config(spec.name, &config) { + write_dispatch::audit_write_rejection( + &config, + spec.name, + &audit_arguments, + Some(¶ms), + client_info, + &err, + ); + return Err(err); + } + params.insert( + "source_type".to_string(), + Value::String(client_info.to_string()), + ); + if let Err(err) = validate_controller_params(&spec, ¶ms) { + write_dispatch::audit_write_rejection( + &config, + spec.name, + &audit_arguments, + Some(¶ms), + client_info, + &err, + ); + return Err(err); + } + return write_dispatch::dispatch_write_tool( + spec.name, + ¶ms, + &audit_arguments, + client_info, + &config, + ) + .await; + } + _ => {} + } + + validate_controller_params(&spec, ¶ms)?; + enforce_read_policy(spec.name).await?; + + let rpc_method = spec.rpc_method.ok_or_else(|| { + ToolCallError::Internal(format!( + "MCP tool `{}` is missing its RPC mapping", + spec.name + )) + })?; + + log::debug!( + "[mcp_server] tools/call dispatch tool={} rpc_method={} arg_keys={:?}", + spec.name, + rpc_method, + params.keys().collect::>() + ); + + match all::try_invoke_registered_rpc(rpc_method, params).await { + Some(Ok(value)) => { + log::debug!("[mcp_server] tools/call success tool={}", spec.name); + Ok(tool_success(value)) + } + Some(Err(message)) => { + log::warn!( + "[mcp_server] tools/call handler error tool={} error={}", + spec.name, + message + ); + Ok(tool_error(format!("{} failed: {message}", spec.name))) + } + None => { + log::error!( + "[mcp_server] tools/call mapping missing registered RPC method tool={} rpc_method={}", + spec.name, + rpc_method + ); + Ok(tool_error(format!( + "{} is unavailable: mapped RPC method `{}` is not registered", + spec.name, rpc_method + ))) + } + } +} + +async fn enforce_read_policy(tool_name: &str) -> Result<(), ToolCallError> { + // Config-load failure is an internal/server issue (disk error, corrupt + // config), not bad client input — report it as `-32603 Internal error` + // rather than `-32602 Invalid params`. + let config = match config_rpc::load_config_with_timeout().await { + Ok(config) => config, + Err(err) => { + log::warn!( + "[mcp_server] enforce_read_policy config load failed tool={tool_name} error={err}" + ); + return Err(ToolCallError::Internal(format!( + "failed to load config: {err}" + ))); + } + }; + let policy = + SecurityPolicy::from_config(&config.autonomy, &config.workspace_dir, &config.action_dir); + // A policy denial *is* something the caller can act on (toggle autonomy, + // approve the tool) — keep that as `InvalidParams` so clients surface the + // reason text instead of a generic internal-error banner. + policy + .enforce_tool_operation(ToolOperation::Read, tool_name) + .map_err(ToolCallError::InvalidParams) +} + +async fn enforce_act_policy(tool_name: &str) -> Result<(), ToolCallError> { + let config = match config_rpc::load_config_with_timeout().await { + Ok(config) => config, + Err(err) => { + log::warn!( + "[mcp_server] enforce_act_policy config load failed tool={tool_name} error={err}" + ); + return Err(ToolCallError::Internal(format!( + "failed to load config: {err}" + ))); + } + }; + let policy = + SecurityPolicy::from_config(&config.autonomy, &config.workspace_dir, &config.action_dir); + policy + .enforce_tool_operation(ToolOperation::Act, tool_name) + .map_err(ToolCallError::InvalidParams) +} + +async fn load_config_and_init_registry() -> Result +{ + let config = config_rpc::load_config_with_timeout() + .await + .map_err(|err| ToolCallError::Internal(format!("failed to load config: {err}")))?; + AgentDefinitionRegistry::init_global(&config.workspace_dir).map_err(|err| { + ToolCallError::Internal(format!( + "failed to initialise AgentDefinitionRegistry: {err}" + )) + })?; + Ok(config) +} + +async fn build_orchestrator_agent() -> Result { + let config = load_config_and_init_registry().await?; + let mut agent = Agent::from_config_for_agent(&config, "orchestrator").map_err(|err| { + ToolCallError::Internal(format!("failed to build orchestrator agent: {err}")) + })?; + agent.fetch_connected_integrations().await; + let _ = agent.refresh_delegation_tools(); + Ok(agent) +} + +async fn list_core_tools() -> Result { + let agent = build_orchestrator_agent().await?; + let tools = agent + .tool_specs() + .iter() + .map(|spec| { + json!({ + "name": spec.name, + "description": spec.description, + "parameters": spec.parameters, + }) + }) + .collect::>(); + Ok(tool_success(json!({ "tools": tools }))) +} + +async fn core_tool_instructions() -> Result { + let agent = build_orchestrator_agent().await?; + Ok(tool_text_success(build_tool_instructions_text( + agent.tool_specs(), + ))) +} + +async fn list_subagents() -> Result { + let config = load_config_and_init_registry().await?; + let registry = AgentDefinitionRegistry::global().ok_or_else(|| { + ToolCallError::Internal("AgentDefinitionRegistry missing after init".to_string()) + })?; + + let definitions = registry + .list() + .into_iter() + .map(|def| { + json!({ + "id": def.id, + "display_name": def.display_name(), + "when_to_use": def.when_to_use, + "temperature": def.temperature, + "max_iterations": def.max_iterations, + "sandbox_mode": def.sandbox_mode, + "tool_scope": def.tools, + "subagents": def.subagents, + "source": def.source, + }) + }) + .collect::>(); + + let summary = format!( + "# OpenHuman Subagents\n\nWorkspace: `{}`\n\n{}", + config.workspace_dir.display(), + definitions + .iter() + .map(|def| { + let id = def.get("id").and_then(Value::as_str).unwrap_or(""); + let when = def.get("when_to_use").and_then(Value::as_str).unwrap_or(""); + format!("- **{id}**: {when}") + }) + .collect::>() + .join("\n") + ); + + Ok(json!({ + "content": [{ + "type": "text", + "text": summary, + }], + "structuredContent": { + "definitions": definitions, + } + })) +} + +async fn run_subagent_tool(params: &Map) -> Result { + use super::params::required_non_empty_string; + + let agent_id = required_non_empty_string(params, "agent_id")?; + let prompt = required_non_empty_string(params, "prompt")?; + if agent_id == "integrations_agent" { + return Err(ToolCallError::InvalidParams( + "agent.run_subagent does not yet support `integrations_agent`; first-level MCP support is currently limited to standalone agents that do not require toolkit binding".to_string(), + )); + } + + let config = load_config_and_init_registry().await?; + let mut agent = Agent::from_config_for_agent(&config, &agent_id).map_err(|err| { + ToolCallError::InvalidParams(format!("failed to build agent `{agent_id}`: {err}")) + })?; + agent.set_event_context( + format!("mcp:{}:{}", agent_id, uuid::Uuid::new_v4()), + "mcp_server", + ); + agent.fetch_connected_integrations().await; + let _ = agent.refresh_delegation_tools(); + + // The MCP server surface exposes openhuman agents to remote MCP + // clients. Treat callers as ExternalChannel — their prompt text is + // remote-controlled and any external_effect tool the agent tries to + // run must route through the gate's audit + TTL-deny path. + let origin = crate::openhuman::agent::turn_origin::AgentTurnOrigin::ExternalChannel { + channel: "mcp_server".to_string(), + // MCP server callers don't carry a per-user identity at this + // layer — the calling MCP client is the addressing primitive. + // Leave sender unset; the gate's per-channel TTL-deny still + // gates any external_effect tool the agent tries to run. + sender: None, + reply_target: agent_id.clone(), + message_id: uuid::Uuid::new_v4().to_string(), + }; + let response = + crate::openhuman::agent::turn_origin::with_origin(origin, agent.run_single(&prompt)) + .await + .map_err(|err| { + ToolCallError::Internal(format!("subagent `{agent_id}` failed: {err}")) + })?; + + Ok(json!({ + "content": [{ + "type": "text", + "text": response, + }], + "structuredContent": { + "agent_id": agent_id, + "response": response, + } + })) +} + +pub fn tool_success(value: Value) -> Value { + json!({ + "content": [{ + "type": "text", + "text": serde_json::to_string_pretty(&value).unwrap_or_else(|_| value.to_string()), + }] + }) +} + +fn tool_text_success(text: String) -> Value { + json!({ + "content": [{ + "type": "text", + "text": text, + }] + }) +} + +pub fn tool_error(message: String) -> Value { + json!({ + "content": [{ + "type": "text", + "text": message, + }], + "isError": true + }) +} diff --git a/src/openhuman/mcp_server/tools/mod.rs b/src/openhuman/mcp_server/tools/mod.rs new file mode 100644 index 0000000000..acf6f012c1 --- /dev/null +++ b/src/openhuman/mcp_server/tools/mod.rs @@ -0,0 +1,39 @@ +//! MCP tool catalog, parameter validation, and dispatch logic. +//! +//! Split into focused sub-modules: +//! - `types` — `McpToolSpec`, `ToolCallError`, shared constants +//! - `specs` — tool spec builders and schema helpers +//! - `params` — argument parsing and RPC param construction +//! - `dispatch` — `call_tool`, `list_tools_result`, agent/subagent handlers + +mod dispatch; +mod params; +mod specs; +mod types; + +// Public API consumed by the rest of `mcp_server` +pub use dispatch::{call_tool, list_tools_result, tool_error, tool_success}; +pub use specs::{ + base_tool_specs, list_tools_result_for_config, list_tools_result_from_specs, searxng_tool_spec, + tool_specs, +}; +pub use types::{McpToolSpec, ToolCallError}; + +// Re-exports needed by the companion test module via `use super::*`. +// Guarded by `#[cfg(test)]` so they do not pollute the production namespace. +#[cfg(test)] +pub use crate::core::all; +#[cfg(test)] +pub use crate::openhuman::config::rpc as config_rpc; +#[cfg(test)] +pub use crate::openhuman::tools::SEARXNG_MAX_RESULTS; +#[cfg(test)] +pub use params::{build_rpc_params, slug_from}; +#[cfg(test)] +pub use serde_json::{json, Value}; +#[cfg(test)] +pub use types::{DEFAULT_LIMIT, MAX_LIMIT, TREE_TAG_MAX_TAGS, TREE_TAG_MAX_TAG_LENGTH}; + +#[cfg(test)] +#[path = "../tools_tests.rs"] +mod tests; diff --git a/src/openhuman/mcp_server/tools/params.rs b/src/openhuman/mcp_server/tools/params.rs new file mode 100644 index 0000000000..70de38351a --- /dev/null +++ b/src/openhuman/mcp_server/tools/params.rs @@ -0,0 +1,521 @@ +use serde_json::{Map, Value}; + +use crate::core::all; +use crate::openhuman::tools::SEARXNG_MAX_RESULTS; + +use super::types::{ + McpToolSpec, ToolCallError, DEFAULT_LIMIT, MAX_LIMIT, MEMORY_NOTE_ARGUMENTS, + MEMORY_STORE_ARGUMENTS, QUERY_ARGUMENTS, SEARXNG_SEARCH_ARGUMENTS, SUBAGENT_RUN_ARGUMENTS, + TREE_BROWSE_ARGUMENTS, TREE_LIST_SOURCES_ARGUMENTS, TREE_READ_CHUNK_ARGUMENTS, + TREE_TAG_ARGUMENTS, TREE_TAG_MAX_TAGS, TREE_TAG_MAX_TAG_LENGTH, TREE_TOP_ENTITIES_ARGUMENTS, +}; + +pub fn build_rpc_params( + tool_name: &str, + arguments: Value, +) -> Result, ToolCallError> { + let args = object_arguments(arguments)?; + match tool_name { + "core.list_tools" | "core.tool_instructions" | "agent.list_subagents" => { + reject_unexpected_arguments(&args, &[])?; + Ok(Map::new()) + } + "agent.run_subagent" => { + reject_unexpected_arguments(&args, SUBAGENT_RUN_ARGUMENTS)?; + let agent_id = required_non_empty_string(&args, "agent_id")?; + let prompt = required_non_empty_string(&args, "prompt")?; + Ok(Map::from_iter([ + ("agent_id".to_string(), Value::String(agent_id)), + ("prompt".to_string(), Value::String(prompt)), + ])) + } + "memory.search" | "memory.recall" => { + reject_unexpected_arguments(&args, QUERY_ARGUMENTS)?; + let query = required_non_empty_string(&args, "query")?; + let limit = optional_limit(&args)?; + Ok(Map::from_iter([ + ("query".to_string(), Value::String(query)), + ("k".to_string(), Value::from(limit)), + ])) + } + "searxng_search" => { + reject_unexpected_arguments(&args, SEARXNG_SEARCH_ARGUMENTS)?; + let query = required_non_empty_string(&args, "query")?; + let mut params = Map::new(); + params.insert("query".to_string(), Value::String(query)); + if let Some(categories) = optional_string_array(&args, "categories")? { + crate::openhuman::tools::normalize_categories(categories.clone()) + .map_err(|err| ToolCallError::InvalidParams(err.to_string()))?; + params.insert("categories".to_string(), Value::from(categories)); + } + if let Some(language) = optional_non_empty_string(&args, "language")? { + params.insert("language".to_string(), Value::String(language)); + } + if let Some(max_results) = optional_max_results(&args, "max_results")? { + params.insert("max_results".to_string(), Value::from(max_results)); + } + Ok(params) + } + "tree.read_chunk" => { + reject_unexpected_arguments(&args, TREE_READ_CHUNK_ARGUMENTS)?; + let chunk_id = required_non_empty_string(&args, "chunk_id")?; + Ok(Map::from_iter([( + "id".to_string(), + Value::String(chunk_id), + )])) + } + "tree.browse" => { + reject_unexpected_arguments(&args, TREE_BROWSE_ARGUMENTS)?; + let mut params = Map::new(); + // MCP-side `k` maps to the controller's `limit` and is capped at + // MAX_LIMIT for parity with the search / recall tools. The + // controller itself accepts up to 1000, but the MCP layer keeps + // the surface narrow so the LLM doesn't waste tokens pulling a + // huge page. + params.insert("limit".to_string(), Value::from(optional_limit(&args)?)); + if let Some(values) = optional_string_array(&args, "source_kinds")? { + params.insert("source_kinds".to_string(), Value::from(values)); + } + if let Some(values) = optional_string_array(&args, "source_ids")? { + params.insert("source_ids".to_string(), Value::from(values)); + } + if let Some(values) = optional_string_array(&args, "entity_ids")? { + params.insert("entity_ids".to_string(), Value::from(values)); + } + if let Some(value) = optional_i64(&args, "since_ms")? { + params.insert("since_ms".to_string(), Value::from(value)); + } + if let Some(value) = optional_i64(&args, "until_ms")? { + params.insert("until_ms".to_string(), Value::from(value)); + } + if let Some(value) = optional_non_empty_string(&args, "query")? { + params.insert("query".to_string(), Value::String(value)); + } + if let Some(value) = optional_u64(&args, "offset")? { + params.insert("offset".to_string(), Value::from(value)); + } + Ok(params) + } + "tree.top_entities" => { + reject_unexpected_arguments(&args, TREE_TOP_ENTITIES_ARGUMENTS)?; + // The controller's `limit` is required; default + cap at the MCP + // layer so the LLM doesn't have to know the underlying contract. + let mut params = Map::new(); + params.insert("limit".to_string(), Value::from(optional_limit(&args)?)); + if let Some(value) = optional_non_empty_string(&args, "kind")? { + params.insert("kind".to_string(), Value::String(value)); + } + Ok(params) + } + "tree.list_sources" => { + reject_unexpected_arguments(&args, TREE_LIST_SOURCES_ARGUMENTS)?; + let mut params = Map::new(); + if let Some(value) = optional_non_empty_string(&args, "user_email_hint")? { + params.insert("user_email_hint".to_string(), Value::String(value)); + } + Ok(params) + } + "memory.store" => { + reject_unexpected_arguments(&args, MEMORY_STORE_ARGUMENTS)?; + let title = required_non_empty_string(&args, "title")?; + let content = required_non_empty_string(&args, "content")?; + let namespace = + optional_non_empty_string(&args, "namespace")?.unwrap_or_else(|| "mcp".to_string()); + // Generate a deterministic key from the title for upsert dedup. + let key = format!("mcp-store-{}", slug_from(&title)); + let mut params = Map::new(); + params.insert("namespace".to_string(), Value::String(namespace)); + params.insert("key".to_string(), Value::String(key)); + params.insert("title".to_string(), Value::String(title)); + params.insert("content".to_string(), Value::String(content)); + params.insert("source_type".to_string(), Value::String("mcp".to_string())); + if let Some(tags) = optional_string_array(&args, "tags")? { + params.insert( + "tags".to_string(), + Value::Array(tags.into_iter().map(Value::String).collect()), + ); + } + Ok(params) + } + "memory.note" => { + reject_unexpected_arguments(&args, MEMORY_NOTE_ARGUMENTS)?; + let chunk_id = required_non_empty_string(&args, "chunk_id")?; + let note_text = required_non_empty_string(&args, "note_text")?; + let key = format!("mcp-note-{chunk_id}"); + let title = format!("Note on chunk {chunk_id}"); + let content = format!("[annotation for chunk_id={chunk_id}]\n\n{note_text}"); + let mut metadata = Map::new(); + metadata.insert("annotates_chunk_id".to_string(), Value::String(chunk_id)); + let mut params = Map::new(); + params.insert("namespace".to_string(), Value::String("mcp".to_string())); + params.insert("key".to_string(), Value::String(key)); + params.insert("title".to_string(), Value::String(title)); + params.insert("content".to_string(), Value::String(content)); + params.insert("source_type".to_string(), Value::String("mcp".to_string())); + params.insert("metadata".to_string(), Value::Object(metadata)); + Ok(params) + } + "tree.tag" => { + reject_unexpected_arguments(&args, TREE_TAG_ARGUMENTS)?; + let chunk_id = required_non_empty_string(&args, "chunk_id")?; + // `required_non_empty_string_array` checks both presence and + // that the resulting list isn't empty after trimming — keeps + // the LLM honest about supplying at least one label per call. + let tags = required_non_empty_string_array(&args, "tags")?; + // Cap the tag set to keep the tag-record document bounded: + // * `TREE_TAG_MAX_TAGS` rejects pathological cases where a + // misbehaving client floods one chunk with hundreds of + // labels (would also bloat the document tags index). + // * `TREE_TAG_MAX_TAG_LENGTH` rejects oversize labels that + // are almost certainly free-form text (which belongs in + // `memory.note`, not the categorical tag surface). + // Both reject up-front rather than silently truncating — same + // "explicit rejection" pattern as `required_non_empty_string_array`. + if tags.len() > TREE_TAG_MAX_TAGS { + return Err(ToolCallError::InvalidParams(format!( + "argument `tags` accepts at most {TREE_TAG_MAX_TAGS} entries (got {})", + tags.len() + ))); + } + if let Some(oversize) = tags.iter().find(|t| t.len() > TREE_TAG_MAX_TAG_LENGTH) { + return Err(ToolCallError::InvalidParams(format!( + "argument `tags` entry exceeds {TREE_TAG_MAX_TAG_LENGTH} bytes (got {} bytes)", + oversize.len() + ))); + } + // Deterministic key keyed on `chunk_id` (not on tag content) + // so re-tagging the same chunk upserts the prior tag-record + // document rather than accumulating duplicate annotations. + // This is the structural difference from `memory.note` + // (which keys on chunk_id too but is content-additive in + // intent; the LLM is expected to call note again to append). + let key = format!("mcp-tag-{chunk_id}"); + let title = format!("Tags for chunk {chunk_id}"); + let content = format!( + "[tag record for chunk_id={chunk_id}]\n\nApplied tags: {}", + tags.join(", ") + ); + // Build the tag list as a JSON array once, then share it + // between metadata.applied_tags and the top-level `tags` + // field. `tags_array.clone()` on the cached Value is the + // cheapest path — it clones each tag String once total, + // matching what an in-place double-collect would do. + let tags_array = Value::Array(tags.into_iter().map(Value::String).collect()); + let mut metadata = Map::new(); + metadata.insert("tags_for_chunk_id".to_string(), Value::String(chunk_id)); + // `applied_tags` mirrors `tags` for callers that consume the + // metadata view; the top-level `tags` field below feeds the + // document tags index (queryable through `doc_list` etc.). + metadata.insert("applied_tags".to_string(), tags_array.clone()); + let mut params = Map::new(); + params.insert("namespace".to_string(), Value::String("mcp".to_string())); + params.insert("key".to_string(), Value::String(key)); + params.insert("title".to_string(), Value::String(title)); + params.insert("content".to_string(), Value::String(content)); + params.insert("source_type".to_string(), Value::String("mcp".to_string())); + params.insert("tags".to_string(), tags_array); + params.insert("metadata".to_string(), Value::Object(metadata)); + Ok(params) + } + _ => Err(ToolCallError::InvalidParams(format!( + "unknown MCP tool `{tool_name}`" + ))), + } +} + +pub fn reject_unexpected_arguments( + args: &Map, + allowed: &[&str], +) -> Result<(), ToolCallError> { + let mut unexpected = args + .keys() + .filter(|key| !allowed.contains(&key.as_str())) + .cloned() + .collect::>(); + if unexpected.is_empty() { + return Ok(()); + } + unexpected.sort(); + Err(ToolCallError::InvalidParams(format!( + "unexpected argument `{}`", + unexpected.join("`, `") + ))) +} + +pub fn object_arguments(arguments: Value) -> Result, ToolCallError> { + match arguments { + Value::Null => Ok(Map::new()), + Value::Object(map) => Ok(map), + other => Err(ToolCallError::InvalidParams(format!( + "tools/call arguments must be an object, got {}", + json_type_name(&other) + ))), + } +} + +pub fn required_non_empty_string( + args: &Map, + key: &str, +) -> Result { + let raw = args.get(key).and_then(Value::as_str).ok_or_else(|| { + ToolCallError::InvalidParams(format!("missing required argument `{key}`")) + })?; + let trimmed = raw.trim(); + if trimmed.is_empty() { + return Err(ToolCallError::InvalidParams(format!( + "argument `{key}` must not be empty" + ))); + } + Ok(trimmed.to_string()) +} + +pub fn optional_non_empty_string( + args: &Map, + key: &str, +) -> Result, ToolCallError> { + let Some(value) = args.get(key) else { + return Ok(None); + }; + if value.is_null() { + return Ok(None); + } + let Some(raw) = value.as_str() else { + return Err(ToolCallError::InvalidParams(format!( + "argument `{key}` must be a string" + ))); + }; + let trimmed = raw.trim(); + if trimmed.is_empty() { + // Distinguish "absent" (Ok(None)) from "present but blank" — the + // latter is a client bug worth surfacing so the LLM can drop the + // field entirely on the next call instead of resending whitespace. + return Err(ToolCallError::InvalidParams(format!( + "argument `{key}` must not be empty when provided" + ))); + } + Ok(Some(trimmed.to_string())) +} + +pub fn optional_string_array( + args: &Map, + key: &str, +) -> Result>, ToolCallError> { + let Some(value) = args.get(key) else { + return Ok(None); + }; + if value.is_null() { + return Ok(None); + } + let Some(items) = value.as_array() else { + return Err(ToolCallError::InvalidParams(format!( + "argument `{key}` must be an array of strings, got {}", + json_type_name(value) + ))); + }; + let mut out = Vec::with_capacity(items.len()); + let mut dropped_blank = 0usize; + for item in items { + let Some(s) = item.as_str() else { + return Err(ToolCallError::InvalidParams(format!( + "argument `{key}` must contain only strings, got {} entry", + json_type_name(item) + ))); + }; + let trimmed = s.trim(); + if trimmed.is_empty() { + dropped_blank += 1; + continue; + } + out.push(trimmed.to_string()); + } + if dropped_blank > 0 { + // Visibility for the silent-drop behaviour: callers don't see how many + // entries were skipped, and a downstream "the filter didn't match" + // bug is much faster to triage when this trace is in the log. + log::trace!( + "[mcp_server] optional_string_array key={key} dropped_blank_entries={dropped_blank}" + ); + } + Ok(Some(out)) +} + +/// Variant of [`optional_string_array`] that errors when the field is +/// absent, null, or resolves to an empty list after blank-trim. +/// +/// Used by tools where supplying an empty `tags: []` is a no-op the +/// caller almost certainly didn't mean (e.g. `tree.tag`). The MCP layer +/// rejects it up-front instead of letting it through to the document +/// RPC where the failure mode is silent. +pub fn required_non_empty_string_array( + args: &Map, + key: &str, +) -> Result, ToolCallError> { + let trimmed = optional_string_array(args, key)?.ok_or_else(|| { + ToolCallError::InvalidParams(format!("missing required argument `{key}`")) + })?; + if trimmed.is_empty() { + return Err(ToolCallError::InvalidParams(format!( + "argument `{key}` must contain at least one non-empty string" + ))); + } + Ok(trimmed) +} + +pub fn optional_i64(args: &Map, key: &str) -> Result, ToolCallError> { + let Some(value) = args.get(key) else { + return Ok(None); + }; + if value.is_null() { + return Ok(None); + } + value.as_i64().map(Some).ok_or_else(|| { + ToolCallError::InvalidParams(format!( + "argument `{key}` must be an integer in the i64 range" + )) + }) +} + +pub fn optional_u64(args: &Map, key: &str) -> Result, ToolCallError> { + let Some(value) = args.get(key) else { + return Ok(None); + }; + if value.is_null() { + return Ok(None); + } + value.as_u64().map(Some).ok_or_else(|| { + ToolCallError::InvalidParams(format!("argument `{key}` must be a non-negative integer")) + }) +} + +pub fn optional_limit(args: &Map) -> Result { + let Some(value) = args.get("k") else { + return Ok(DEFAULT_LIMIT); + }; + let Some(limit) = value.as_u64() else { + return Err(ToolCallError::InvalidParams( + "argument `k` must be a positive integer".to_string(), + )); + }; + if limit == 0 { + return Err(ToolCallError::InvalidParams( + "argument `k` must be greater than zero".to_string(), + )); + } + if limit > MAX_LIMIT { + // Reject explicitly instead of silently clamping. The schema advertises + // `maximum: MAX_LIMIT`, so a higher value is a client bug; surfacing it + // lets the LLM self-correct on the next call instead of believing it + // received the page size it asked for. + return Err(ToolCallError::InvalidParams(format!( + "argument `k` must not exceed {MAX_LIMIT} (got {limit})" + ))); + } + Ok(limit) +} + +pub fn optional_max_results( + args: &Map, + key: &str, +) -> Result, ToolCallError> { + let Some(value) = args.get(key) else { + return Ok(None); + }; + if value.is_null() { + return Ok(None); + } + let Some(limit) = value.as_u64() else { + return Err(ToolCallError::InvalidParams(format!( + "argument `{key}` must be a positive integer" + ))); + }; + if limit == 0 { + return Err(ToolCallError::InvalidParams(format!( + "argument `{key}` must be greater than zero" + ))); + } + if limit > SEARXNG_MAX_RESULTS as u64 { + return Err(ToolCallError::InvalidParams(format!( + "argument `{key}` must not exceed {SEARXNG_MAX_RESULTS} (got {limit})" + ))); + } + Ok(Some(limit)) +} + +pub fn validate_controller_params( + spec: &McpToolSpec, + params: &Map, +) -> Result<(), ToolCallError> { + let rpc_method = spec.rpc_method.ok_or_else(|| { + ToolCallError::Internal(format!( + "MCP tool `{}` does not dispatch through RPC validation", + spec.name + )) + })?; + let schema = all::schema_for_rpc_method(rpc_method).ok_or_else(|| { + ToolCallError::InvalidParams(format!( + "mapped RPC method `{}` is not registered", + rpc_method + )) + })?; + all::validate_params(&schema, params).map_err(ToolCallError::InvalidParams) +} + +/// Produce a URL-safe slug from a title for use as a document key. +/// Lowercases, replaces non-alphanumeric runs with a single hyphen, and +/// truncates at 64 characters. +pub fn slug_from(title: &str) -> String { + let slug: String = title + .chars() + .map(|c| { + if c.is_ascii_alphanumeric() { + c.to_ascii_lowercase() + } else { + '-' + } + }) + .collect(); + // Collapse runs of hyphens, trim leading/trailing. + let mut result = String::with_capacity(slug.len()); + let mut prev_hyphen = true; // treat start as hyphen to trim leading + for ch in slug.chars() { + if ch == '-' { + if !prev_hyphen { + result.push('-'); + } + prev_hyphen = true; + } else { + result.push(ch); + prev_hyphen = false; + } + } + // Trim trailing hyphen + while result.ends_with('-') { + result.pop(); + } + if result.len() > 64 { + result.truncate(64); + while result.ends_with('-') { + result.pop(); + } + } + if result.is_empty() { + // Fallback for titles with no ASCII-alphanumeric characters (e.g. + // Unicode-only titles like "会议记录" or "Протокол"). Use a short + // stable hash of the original title to ensure distinct slugs. + use sha2::{Digest, Sha256}; + let hash = hex::encode(&Sha256::digest(title.as_bytes())[..8]); + return format!("untitled-{hash}"); + } + result +} + +pub fn json_type_name(value: &Value) -> &'static str { + match value { + Value::Null => "null", + Value::Bool(_) => "bool", + Value::Number(_) => "number", + Value::String(_) => "string", + Value::Array(_) => "array", + Value::Object(_) => "object", + } +} diff --git a/src/openhuman/mcp_server/tools/specs.rs b/src/openhuman/mcp_server/tools/specs.rs new file mode 100644 index 0000000000..2958356f09 --- /dev/null +++ b/src/openhuman/mcp_server/tools/specs.rs @@ -0,0 +1,473 @@ +use serde_json::{json, Value}; + +use crate::openhuman::tools::SEARXNG_MAX_RESULTS; + +use super::types::{McpToolSpec, DEFAULT_LIMIT, MAX_LIMIT}; + +pub fn tool_specs() -> Vec { + let mut specs = base_tool_specs(); + specs.push(searxng_tool_spec()); + specs +} + +pub fn base_tool_specs() -> Vec { + vec![ + McpToolSpec { + name: "core.list_tools", + title: "List Core Tools", + description: "List the live core agent tool catalog that OpenHuman exposes to its orchestrator session.", + rpc_method: None, + input_schema: no_args_schema(), + annotations: read_only_local_annotations(), + }, + McpToolSpec { + name: "core.tool_instructions", + title: "Get Tool Instructions", + description: "Emit the markdown tool-use instructions block that OpenHuman injects into prompt-guided agents.", + rpc_method: None, + input_schema: no_args_schema(), + annotations: read_only_local_annotations(), + }, + McpToolSpec { + name: "agent.list_subagents", + title: "List Subagents", + description: "List registered sub-agent definitions that the core can dispatch for specialized work.", + rpc_method: None, + input_schema: no_args_schema(), + annotations: read_only_local_annotations(), + }, + McpToolSpec { + name: "agent.run_subagent", + title: "Run Subagent", + description: "Run a registered OpenHuman sub-agent directly from the core and return its final response.", + rpc_method: None, + input_schema: json!({ + "type": "object", + "properties": { + "agent_id": { + "type": "string", + "description": "Registered sub-agent id (for example `researcher`, `planner`, `code_executor`)." + }, + "prompt": { + "type": "string", + "description": "Task prompt for the sub-agent. Include the context it needs because this is a fresh session." + } + }, + "required": ["agent_id", "prompt"], + "additionalProperties": false + }), + // Sub-agent execution is the one Act-policy surface on the MCP + // server today (see `enforce_act_policy` dispatch in `call_tool`). + // Sub-agents can call further tools, so destructive/openWorld are + // both true; running the same agent twice is not a no-op so + // idempotent is false. + annotations: json!({ + "readOnlyHint": false, + "destructiveHint": true, + "idempotentHint": false, + "openWorldHint": true + }), + }, + McpToolSpec { + name: "memory.search", + title: "Search Memory", + description: "Keyword-search OpenHuman's local memory tree and return matching chunks ordered by recency.", + rpc_method: Some("openhuman.memory_tree_search"), + input_schema: query_schema("Substring to match against stored memory chunks."), + annotations: read_only_local_annotations(), + }, + McpToolSpec { + name: "memory.recall", + title: "Recall Memory", + description: "Semantically recall local memory-tree chunks relevant to a natural-language query.", + rpc_method: Some("openhuman.memory_tree_recall"), + input_schema: query_schema("Natural-language query to embed and rerank against memory summaries."), + annotations: read_only_local_annotations(), + }, + McpToolSpec { + name: "tree.read_chunk", + title: "Read Memory Chunk", + description: "Read one memory-tree chunk by id. Use this to inspect the source text behind search or recall results.", + rpc_method: Some("openhuman.memory_tree_get_chunk"), + input_schema: json!({ + "type": "object", + "properties": { + "chunk_id": { + "type": "string", + "description": "Chunk id returned by memory.search or memory.recall." + } + }, + "required": ["chunk_id"], + "additionalProperties": false + }), + annotations: read_only_local_annotations(), + }, + McpToolSpec { + name: "tree.browse", + title: "Browse Memory", + description: "Paginated listing of memory-tree chunks in reverse-chronological order, \ + with optional filters by source kind, source id, entity id, time window, \ + and substring keyword. Use this when the user wants to enumerate (\"what's \ + recent in my Gmail\", \"show me everything from last week about Alice\") \ + rather than search by query. Returns chunks plus a total match count for \ + pagination.", + rpc_method: Some("openhuman.memory_tree_list_chunks"), + input_schema: tree_browse_schema(), + annotations: read_only_local_annotations(), + }, + McpToolSpec { + name: "tree.top_entities", + title: "Top Memory Entities", + description: "List the most-referenced canonical entities (people, organizations, \ + topics, emails) across the local memory tree. Call this for entity \ + discovery before drilling in with `tree.browse` (passing `entity_ids`) \ + or `memory.search`. Returns entities ordered by reference count.", + rpc_method: Some("openhuman.memory_tree_top_entities"), + input_schema: tree_top_entities_schema(), + annotations: read_only_local_annotations(), + }, + McpToolSpec { + name: "tree.list_sources", + title: "List Memory Sources", + description: "List every distinct ingest source (Gmail account, Slack channel, Notion \ + workspace, email thread, …) that has data in the memory tree, with \ + chunk counts and last-activity timestamps. Use this when the user asks \ + \"what data sources do I have\" or to discover source ids to pass into \ + `tree.browse`.", + rpc_method: Some("openhuman.memory_tree_list_sources"), + input_schema: tree_list_sources_schema(), + annotations: read_only_local_annotations(), + }, + McpToolSpec { + name: "memory.store", + title: "Store Memory", + description: "Create a new memory document from content. The document is stored in \ + the specified namespace (default `mcp`) and can be retrieved via \ + `memory.search` or `memory.recall`.", + rpc_method: Some("openhuman.memory_doc_put"), + input_schema: memory_store_schema(), + annotations: write_local_annotations(), + }, + McpToolSpec { + name: "memory.note", + title: "Annotate Memory Chunk", + description: "Append a note to an existing memory chunk by storing a linked annotation \ + document. The note references the original chunk_id for provenance and \ + can be retrieved alongside it.", + rpc_method: Some("openhuman.memory_doc_put"), + input_schema: memory_note_schema(), + annotations: write_local_annotations(), + }, + McpToolSpec { + name: "tree.tag", + title: "Tag Memory Chunk", + description: "Apply one or more category tags to an existing memory chunk. \ + Stored as an upsertable tag-record document linked to the target \ + chunk_id, so re-tagging the same chunk replaces the prior tag set \ + rather than accumulating duplicate annotations. Differs from \ + `memory.note` in that the payload is a categorical label list — \ + queryable via the document `tags` field — rather than free-form text.", + rpc_method: Some("openhuman.memory_doc_put"), + input_schema: tree_tag_schema(), + annotations: write_local_annotations(), + }, + ] +} + +/// Annotation preset for the read-only, closed-world tools that just read +/// OpenHuman's local memory tree or agent registry. The MCP spec defaults are +/// `readOnlyHint: false` / `openWorldHint: true`, so both fields must be set +/// explicitly to communicate the actual shape to clients. Destructive and +/// idempotent hints are deliberately omitted — per the spec they are +/// meaningful only when `readOnlyHint == false`. +pub fn read_only_local_annotations() -> Value { + json!({ + "readOnlyHint": true, + "openWorldHint": false + }) +} + +/// Annotation preset for the MCP write tools (`memory.store`, `memory.note`, +/// `tree.tag`) that upsert documents into OpenHuman's local memory tree. +/// Writes are keyed deterministically (slug-from-title, `mcp-note-`, +/// `mcp-tag-`) so repeating a call with identical arguments yields +/// the same stored state — `idempotentHint: true`. The upsert can replace a +/// previously stored document for the same key, which is a destructive update +/// in MCP-spec terms — `destructiveHint: true`. Local-only, no external I/O — +/// `openWorldHint: false`. +pub fn write_local_annotations() -> Value { + json!({ + "readOnlyHint": false, + "destructiveHint": true, + "idempotentHint": true, + "openWorldHint": false + }) +} + +pub fn searxng_tool_spec() -> McpToolSpec { + McpToolSpec { + name: "searxng_search", + title: "SearXNG Search", + description: "Search the configured self-hosted SearXNG instance and return normalized title, URL, snippet, and source results. Requires searxng.enabled=true in OpenHuman config.", + rpc_method: Some("openhuman.tools_searxng_search"), + input_schema: searxng_search_schema(), + // SearXNG queries an external (self-hosted but network-reachable) + // search engine: read-only (no state mutation), open-world (results + // come from outside OpenHuman). Per spec, destructive/idempotent + // hints are meaningful only when readOnlyHint=false, so omit them. + annotations: json!({ + "readOnlyHint": true, + "openWorldHint": true + }), + } +} + +pub fn list_tools_result_for_config(config: &crate::openhuman::config::Config) -> Value { + let mut specs = base_tool_specs(); + if config.searxng.enabled { + specs.push(searxng_tool_spec()); + } + list_tools_result_from_specs(specs) +} + +pub fn list_tools_result_from_specs(specs: Vec) -> Value { + let tools = specs + .into_iter() + .map(|tool| { + json!({ + "name": tool.name, + "title": tool.title, + "description": tool.description, + "inputSchema": tool.input_schema, + "annotations": tool.annotations, + }) + }) + .collect::>(); + json!({ "tools": tools }) +} + +// ── Schema builder helpers ──────────────────────────────────────────────────── + +pub fn no_args_schema() -> Value { + json!({ + "type": "object", + "properties": {}, + "additionalProperties": false + }) +} + +pub fn query_schema(query_description: &str) -> Value { + json!({ + "type": "object", + "properties": { + "query": { + "type": "string", + "description": query_description, + "minLength": 1 + }, + "k": { + "type": "integer", + "description": format!("Maximum chunks to return. Defaults to {DEFAULT_LIMIT}; capped at {MAX_LIMIT}."), + "minimum": 1, + "maximum": MAX_LIMIT + } + }, + "required": ["query"], + "additionalProperties": false + }) +} + +fn tree_browse_schema() -> Value { + json!({ + "type": "object", + "properties": { + "source_kinds": { + "type": "array", + "items": { "type": "string" }, + "description": "Restrict to one or more source kinds (e.g. `email`, `chat`, `document`). Omit to include all kinds." + }, + "source_ids": { + "type": "array", + "items": { "type": "string" }, + "description": "Restrict to specific logical source ids (e.g. a Slack channel id). Use `tree.list_sources` to discover these." + }, + "entity_ids": { + "type": "array", + "items": { "type": "string" }, + "description": "Restrict to chunks referencing any of these canonical entity ids (e.g. `person:Alice`, `email:alice@example.com`). Use `tree.top_entities` to discover these." + }, + "since_ms": { + "type": "integer", + "minimum": 0, + "description": "Inclusive lower bound on chunk timestamp, in milliseconds since Unix epoch." + }, + "until_ms": { + "type": "integer", + "minimum": 0, + "description": "Inclusive upper bound on chunk timestamp, in milliseconds since Unix epoch." + }, + "query": { + "type": "string", + "minLength": 1, + "description": "Substring keyword filter over the chunk preview text." + }, + "k": { + "type": "integer", + "minimum": 1, + "maximum": MAX_LIMIT, + "description": format!("Maximum chunks per page. Defaults to {DEFAULT_LIMIT}; capped at {MAX_LIMIT}.") + }, + "offset": { + "type": "integer", + "minimum": 0, + "description": "Pagination offset (number of rows to skip). Defaults to 0." + } + }, + "required": [], + "additionalProperties": false + }) +} + +fn tree_top_entities_schema() -> Value { + json!({ + "type": "object", + "properties": { + "kind": { + "type": "string", + "minLength": 1, + "description": "Restrict to a single entity kind (`person`, `email`, `topic`, `org`, …). Omit to span all kinds." + }, + "k": { + "type": "integer", + "minimum": 1, + "maximum": MAX_LIMIT, + "description": format!("Maximum entities to return. Defaults to {DEFAULT_LIMIT}; capped at {MAX_LIMIT}.") + } + }, + "required": [], + "additionalProperties": false + }) +} + +fn tree_list_sources_schema() -> Value { + json!({ + "type": "object", + "properties": { + "user_email_hint": { + "type": "string", + "minLength": 1, + "description": "When provided, the user's own email is stripped from email-thread display names so the other party shows up instead. Optional." + } + }, + "required": [], + "additionalProperties": false + }) +} + +fn memory_store_schema() -> Value { + json!({ + "type": "object", + "properties": { + "title": { + "type": "string", + "minLength": 1, + "description": "Human-readable title for the memory document." + }, + "content": { + "type": "string", + "minLength": 1, + "description": "The text content to store as a memory document." + }, + "namespace": { + "type": "string", + "minLength": 1, + "description": "Namespace to store the document in. Defaults to `mcp` when omitted." + }, + "tags": { + "type": "array", + "items": { "type": "string" }, + "description": "Optional tags for categorisation and filtering." + } + }, + "required": ["title", "content"], + "additionalProperties": false + }) +} + +fn memory_note_schema() -> Value { + json!({ + "type": "object", + "properties": { + "chunk_id": { + "type": "string", + "minLength": 1, + "description": "ID of the memory chunk to annotate. Use an ID from memory.search or memory.recall results." + }, + "note_text": { + "type": "string", + "minLength": 1, + "description": "The note text to attach to the chunk." + } + }, + "required": ["chunk_id", "note_text"], + "additionalProperties": false + }) +} + +fn tree_tag_schema() -> Value { + json!({ + "type": "object", + "properties": { + "chunk_id": { + "type": "string", + "minLength": 1, + "description": "ID of the memory chunk to tag. Use an ID from `memory.search`, `memory.recall`, or `tree.browse` results." + }, + "tags": { + "type": "array", + "items": { + "type": "string", + "minLength": 1 + }, + "minItems": 1, + "description": "One or more category labels to attach (e.g. `[\"todo\", \"q3-planning\"]`). Re-tagging the same chunk replaces the prior tag set; supply the complete desired set on each call." + } + }, + "required": ["chunk_id", "tags"], + "additionalProperties": false + }) +} + +fn searxng_search_schema() -> Value { + json!({ + "type": "object", + "properties": { + "query": { + "type": "string", + "minLength": 1, + "description": "Search query string." + }, + "categories": { + "type": "array", + "items": { + "type": "string", + "enum": ["web", "general", "news", "images"] + }, + "description": "Optional SearXNG categories. `web` maps to SearXNG `general`." + }, + "language": { + "type": "string", + "minLength": 1, + "description": "Optional language code, e.g. `en`, `zh-CN`, or `fr`." + }, + "max_results": { + "type": "integer", + "minimum": 1, + "maximum": SEARXNG_MAX_RESULTS, + "description": format!("Maximum results to return. Defaults to searxng.max_results; capped at {SEARXNG_MAX_RESULTS}.") + } + }, + "required": ["query"], + "additionalProperties": false + }) +} diff --git a/src/openhuman/mcp_server/tools/types.rs b/src/openhuman/mcp_server/tools/types.rs new file mode 100644 index 0000000000..07e8dc7087 --- /dev/null +++ b/src/openhuman/mcp_server/tools/types.rs @@ -0,0 +1,87 @@ +use serde_json::Value; + +pub const DEFAULT_LIMIT: u64 = 10; +pub const MAX_LIMIT: u64 = 50; +pub const QUERY_ARGUMENTS: &[&str] = &["query", "k"]; +pub const SEARXNG_SEARCH_ARGUMENTS: &[&str] = &["query", "categories", "language", "max_results"]; +pub const TREE_READ_CHUNK_ARGUMENTS: &[&str] = &["chunk_id"]; +pub const SUBAGENT_RUN_ARGUMENTS: &[&str] = &["agent_id", "prompt"]; +pub const TREE_BROWSE_ARGUMENTS: &[&str] = &[ + "source_kinds", + "source_ids", + "entity_ids", + "since_ms", + "until_ms", + "query", + "k", + "offset", +]; +pub const TREE_TOP_ENTITIES_ARGUMENTS: &[&str] = &["kind", "k"]; +pub const TREE_LIST_SOURCES_ARGUMENTS: &[&str] = &["user_email_hint"]; +pub const MEMORY_STORE_ARGUMENTS: &[&str] = &["title", "content", "namespace", "tags"]; +pub const MEMORY_NOTE_ARGUMENTS: &[&str] = &["chunk_id", "note_text"]; +pub const TREE_TAG_ARGUMENTS: &[&str] = &["chunk_id", "tags"]; +/// Upper bound on the number of tags `tree.tag` accepts per call. +/// Matches the "explicit rejection over silent clamping" pattern used +/// elsewhere in the MCP layer; prevents a misbehaving client from +/// flooding a chunk's tag-record document with thousands of entries. +pub const TREE_TAG_MAX_TAGS: usize = 50; +/// Upper bound on a single tag's character length. Tags are categorical +/// labels — anything past ~128 chars is almost certainly free-form text +/// that should be `memory.note` instead, so reject up-front to surface +/// the misuse rather than silently writing a giant token into the +/// queryable `tags` index. +pub const TREE_TAG_MAX_TAG_LENGTH: usize = 128; + +#[derive(Debug, Clone)] +pub struct McpToolSpec { + pub name: &'static str, + pub title: &'static str, + pub description: &'static str, + pub rpc_method: Option<&'static str>, + pub input_schema: Value, + /// MCP `ToolAnnotations` per the 2025-03-26+ spec — `readOnlyHint`, + /// `destructiveHint`, `idempotentHint`, `openWorldHint`. Hints, not + /// guarantees; clients use them to surface accurate safety affordances + /// (e.g. Claude Desktop's "this tool can take destructive actions" + /// confirmation gate). Per spec, destructive/idempotent are meaningful + /// only when `readOnlyHint == false`, so read-only tools omit them. + pub annotations: Value, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum ToolCallError { + /// Client-side problem: malformed arguments, unknown tool, validation + /// failure. Maps to JSON-RPC `-32602 Invalid params`. + InvalidParams(String), + /// Server-side problem outside the caller's control: config load failure, + /// missing platform resources. Maps to JSON-RPC `-32603 Internal error`. + /// Kept distinct from `InvalidParams` so MCP clients don't display + /// internal failures as if the user supplied bad arguments. + Internal(String), +} + +impl ToolCallError { + pub fn message(&self) -> &str { + match self { + Self::InvalidParams(message) | Self::Internal(message) => message, + } + } + + /// JSON-RPC error code corresponding to this variant. + pub fn code(&self) -> i64 { + match self { + Self::InvalidParams(_) => -32602, + Self::Internal(_) => -32603, + } + } + + /// JSON-RPC error `message` field (short, spec-canonical phrase). The + /// human-readable detail belongs in the response's `data` field. + pub fn jsonrpc_message(&self) -> &'static str { + match self { + Self::InvalidParams(_) => "Invalid params", + Self::Internal(_) => "Internal error", + } + } +} diff --git a/src/openhuman/meet_agent/brain.rs b/src/openhuman/meet_agent/brain.rs deleted file mode 100644 index 4215be6fba..0000000000 --- a/src/openhuman/meet_agent/brain.rs +++ /dev/null @@ -1,1343 +0,0 @@ -//! Turn orchestration: STT → LLM → TTS. -//! -//! ## Pipeline -//! -//! When [`session::Vad`] reports `EndOfUtterance`, [`run_turn`] drains -//! the inbound buffer and runs three serial stages: -//! -//! 1. **STT** — wrap the PCM16LE samples in a WAV container and post -//! to [`crate::openhuman::voice::cloud_transcribe`]. Returns the -//! transcribed text (or `Err` on transport / auth failure). -//! -//! 2. **LLM** — send a tiny chat-completions request through -//! [`crate::api::BackendOAuthClient`] with a "live meeting agent" -//! system prompt and the transcript as the user message. Returns a -//! short reply (or empty string when the agent decides to stay -//! silent). -//! -//! 3. **TTS** — feed the reply text into -//! [`crate::openhuman::voice::reply_speech`] requesting -//! `output_format = "pcm_16000"`. Decode the base64 PCM bytes back -//! into `Vec` and enqueue on the session's outbound queue. -//! -//! ## Fallback -//! -//! When the backend session token is missing (the most common reason -//! a stage fails outside production: tests, no-network smoke runs), -//! we fall back to deterministic stubs so the loop still produces an -//! audible blip and the unit tests stay network-free. Real -//! transport / 5xx errors are *not* swallowed — they surface as -//! `Note` events so a real-call failure is visible in the transcript -//! log, not silently degraded to a stub. - -use base64::{engine::general_purpose::STANDARD as B64, Engine as _}; -use serde_json::{json, Value}; -use std::collections::HashMap; -use std::sync::{Arc, OnceLock}; -use std::time::Duration; -use tokio::sync::Mutex as TokioMutex; - -use super::session::registry; -use super::types::{SessionEvent, SessionEventKind}; -use super::wav; - -use crate::openhuman::agent::harness::session::Agent; - -/// Process-wide cache of orchestrator Agents keyed by `request_id`. -/// Each meet session reuses the same Agent across all its turns so -/// the harness's in-memory `Agent.history` accumulates and the -/// orchestrator can recall prior dialogue ("did I tell you to -/// remember Friday?", "what did Alice say earlier?"). Without the -/// cache each turn builds a fresh Agent, loses the prior turn's -/// memory, and pays the 5-10s build cost every time. -/// -/// Locked with `tokio::sync::Mutex` because we hold the inner -/// `Arc>` lock across `run_single().await` — -/// std::sync::Mutex cannot be held across await without breaking -/// Send + leaking the lock on cancel. -static AGENT_CACHE: OnceLock>>>> = OnceLock::new(); - -fn agent_cache() -> &'static TokioMutex>>> { - AGENT_CACHE.get_or_init(|| TokioMutex::new(HashMap::new())) -} - -/// Drop the cached orchestrator for a meet session. Called from -/// `handle_stop_session` so a finished call doesn't leak the Agent -/// (each one carries memory tree + tool registry handles). -pub async fn forget_session_agent(request_id: &str) { - let mut guard = agent_cache().lock().await; - if guard.remove(request_id).is_some() { - log::info!("[meet-agent] dropped cached orchestrator for request_id={request_id}"); - } -} - -/// Wall-clock ceiling on one agentic turn. Slack / Gmail fetches via -/// Composio + per-message filtering + iteration-2 synthesis can hit -/// 60-80s in the slow path. 90s gives the long integrations a chance -/// to land. The turn_in_progress gate blocks new wakes during the -/// wait, so the user cannot spawn parallel queries by re-asking. -const AGENTIC_TURN_TIMEOUT_SECS: u64 = 90; - -/// Spoken filler played immediately after wake-word fires, before the -/// (possibly slow) orchestrator+tool path runs. Bridges the 30-60s -/// silence on slow integration paths. Kept short (~1s synth) so it -/// doesn't intrude on fast greetings / time questions. -const PREROLL_ACK_PHRASE: &str = "On it."; - -/// How many of the most recent `Heard` / `Spoke` events we feed back -/// into the LLM as rolling conversation context. 12 ≈ a few minutes of -/// captioned dialogue — enough for the model to follow a thread without -/// blowing the prompt budget. -const CONTEXT_EVENT_WINDOW: usize = 12; -/// Spoken-reply ceiling. Each token is roughly ¾ of a word, so 80 -/// tokens ≈ ~60 spoken words ≈ ~12 seconds. The system prompt asks for -/// one short sentence, but reasoning-style backends ignore soft length -/// hints and emit 800+ char monologues. Hard token cap keeps the bot -/// interruptible regardless of model behaviour. -const REPLY_MAX_TOKENS: u32 = 80; -/// ElevenLabs model. `eleven_turbo_v2_5` strikes the best -/// quality/latency balance; the older default the backend would pick -/// (`eleven_monolingual_v1`) sounds noticeably flatter. -const TTS_MODEL_ID: &str = "eleven_turbo_v2_5"; - -/// Hard ceiling on reply characters fed to TTS. The LLM is asked to be -/// concise but reasoning models still emit 800+ char paragraphs. Cap -/// drops everything past the first sentence boundary at-or-before -/// this index, falling back to a raw char cut when no boundary fits. -/// ~25s of speech at average prosody — keeps the bot interruptible -/// and prevents the "60s monologue / can't talk over it" loop. -const MAX_TTS_CHARS: usize = 400; - -/// Minimum samples below which we skip the brain turn entirely. -/// 250 ms @ 16 kHz — under this, VAD almost certainly fired on a -/// transient (cough, click) rather than real speech. -const MIN_TURN_SAMPLES: usize = 4_000; -/// Re-exported from `ops` so any drift (if we ever loosen the -/// boundary check) immediately breaks the WAV / duration math here -/// at compile time. Today the same constant is used in both places — -/// the ops boundary check rejects anything else outright. -const SAMPLE_RATE_HZ: u32 = super::ops::REQUIRED_SAMPLE_RATE; - -/// Classify a non-owner caption that tripped the wake word. The -/// gate has already decided the speaker isn't authorised; this -/// picks between a friendly hi-back (greeting / pleasantry) and -/// a polite refusal (real task ask). Matching is conservative: -/// when the post-wake tail is empty OR only contains greeting -/// words, treat it as a greeting. Anything else is assumed to be -/// a task ask. -fn classify_unauthorized_intent(caption_text: &str) -> UnauthorizedIntent { - // Lift the bit of text that comes after the matched wake - // phrase so we don't get fooled by the wake itself ("hey - // openhuman" obviously contains "hey"). - let lower = caption_text.to_ascii_lowercase(); - let wake_phrases = [ - "hey open human", - "hi open human", - "hello open human", - "hey openhuman", - "hi openhuman", - "hello openhuman", - "open human", - "openhuman", - ]; - let tail = wake_phrases - .iter() - .filter_map(|p| lower.find(p).map(|i| &lower[i + p.len()..])) - .next() - .unwrap_or(&lower); - // Strip punctuation / common filler so "hi there!" reduces to - // ["hi", "there"]. Keeping the word list cheap and English-only - // for v1; the locale-aware story lands with multilingual TTS. - let words: Vec<&str> = tail - .split(|c: char| !c.is_ascii_alphanumeric()) - .filter(|w| !w.is_empty()) - .collect(); - if words.is_empty() { - return UnauthorizedIntent::Greeting; - } - const GREETING_WORDS: &[&str] = &[ - "hi", - "hello", - "hey", - "yo", - "sup", - "howdy", - "greetings", - "hola", - "good", - "morning", - "afternoon", - "evening", - "night", - "there", - "everyone", - "all", - "folks", - "team", - "guys", - "yall", - ]; - if words.iter().all(|w| GREETING_WORDS.contains(w)) { - UnauthorizedIntent::Greeting - } else { - UnauthorizedIntent::TaskAsk - } -} - -/// Output of `classify_unauthorized_intent`. Drives whether the -/// non-owner turn speaks a canned hi-back or routes the prompt -/// through a toolless LLM (general-knowledge + safe deflection). -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -enum UnauthorizedIntent { - /// Just a greeting — bot says hi back without offering tools. - Greeting, - /// Substantive question. Route to a toolless LLM with a strict - /// system prompt — answer general knowledge / casual chat, - /// refuse anything that would require the owner's personal - /// tools or data, and point the owner at the magic word - /// ("allow") if access is needed. - TaskAsk, -} - -/// System prompt for the non-owner branch. The LLM has no tool -/// surface attached and is told to refuse any request that would -/// need the owner's personal data. Kept short and explicit so the -/// model doesn't ad-lib a different boundary. -fn non_owner_system_prompt(owner: &str) -> String { - let owner_label = if owner.trim().is_empty() { - "the meeting host" - } else { - owner.trim() - }; - format!( - "\ -You are openhuman, an AI participant in a live Google Meet call. The speaker is NOT the call \ -owner — the owner is {owner_label}.\n\ -\n\ -WHAT YOU MAY DO:\n\ -- Answer general knowledge questions (history, science, math, definitions, weather concepts).\n\ -- Casual conversation, jokes, small talk, greetings.\n\ -- Explain what you are and what you can do at a high level.\n\ -\n\ -WHAT YOU MUST REFUSE (no exceptions):\n\ -- Anything that would require {owner_label}'s personal data: their Slack, Gmail, Calendar, \ -contacts, memory notes, files, schedule, integrations, or chat history.\n\ -- Sending messages, scheduling, reminding, creating, modifying or deleting any data on their \ -behalf.\n\ -- Revealing what {owner_label} has previously told you or stored with you.\n\ -\n\ -WHEN REFUSING: respond with exactly one short sentence pointing at the magic word, e.g. \ -\"That needs {owner_label}'s permission — {owner_label}, say 'allow' if you'd like me to help.\"\n\ -\n\ -OUTPUT FORMAT (strict):\n\ -- ONE short spoken sentence, max 25 words.\n\ -- Plain English. No markdown, bullets, code fences, or URLs.\n\ -- No meta-narration (\"I should…\", \"Let me…\", \"As an AI…\"). Just answer.\n\ -- Respond in ENGLISH ONLY regardless of the speaker's language — TTS is English-only.\n\ -" - ) -} - -/// Route a non-owner caption through the toolless chat-v1 LLM. -/// Returns the spoken text — the caller TTS's it and enqueues. -async fn llm_general_no_tools(prompt: &str, owner: &str) -> Result { - let system_prompt = non_owner_system_prompt(owner); - // No rolling history for the non-owner path — each ask is a - // fresh conversation. Sharing history between owner turns and - // non-owner turns risks leaking the owner's tool-call results - // into a stranger-facing reply. - llm_meeting_basic(prompt, &[], &system_prompt).await -} - -/// Friendly hi-back canned line when a non-owner just greets the -/// bot. Kept short and warm; doesn't mention the owner / privacy -/// gate at all — that's noise on a "hello". -fn friendly_greeting_message(asker: &str) -> String { - let asker = asker.trim(); - if asker.is_empty() { - "Hi there! Nice to meet you.".to_string() - } else { - format!("Hi {asker}! Nice to meet you.") - } -} - -/// Spoken refusal when a non-owner trips the wake word. Built per -/// call from the configured owner display name so the audible -/// response names the actual person who has the keys, and tells -/// the owner the magic word ("allow") to grant access. Kept short -/// so it doesn't drown the conversation. -fn soft_deny_message(asker: &str, owner: &str) -> String { - let asker = asker.trim(); - let owner = owner.trim(); - match (asker.is_empty(), owner.is_empty()) { - (true, true) => "Sorry, I only respond to my owner.".to_string(), - (true, false) => format!( - "Sorry, only {owner} can ask me things in this call. {owner}, say 'allow' if you'd like me to answer." - ), - (false, true) => format!("Sorry {asker}, I only respond to my owner."), - (false, false) => format!( - "Sorry {asker}, only {owner} can ask me things here. {owner}, say 'allow' to let them in." - ), - } -} - -/// Recognise an "open the gate" intent from the owner's first words -/// after the wake phrase. Conservative: only fires when the prompt -/// begins with one of the canonical permit verbs so an unrelated -/// owner query that happens to contain "allow" or "yes" deeper in -/// the sentence isn't hijacked. -/// -/// Returns `true` when the owner is explicitly granting access to -/// the most-recently-refused asker. The caller still gates on -/// session-level state (`take_pending_unauthorized`) — without a -/// pending request the intent is meaningless and the prompt should -/// just run as a normal LLM turn. -fn looks_like_grant_intent(prompt: &str) -> bool { - let p = prompt.trim().to_ascii_lowercase(); - if p.is_empty() { - return false; - } - // Whole-prompt matches first so short approvals ("allow", "yes") - // don't collide with longer prompts that happen to start with - // the same word. - matches!( - p.as_str(), - "allow" | "yes" | "ok" | "okay" | "go ahead" | "let them in" | "let them ask" | "permit" - ) || p.starts_with("allow ") - || p.starts_with("let them") - || p.starts_with("let him") - || p.starts_with("let her") - || p.starts_with("go ahead") - || p.starts_with("yes go ahead") - || p.starts_with("yes let") - || p.starts_with("permit ") - || p.starts_with("you can answer") - || p.starts_with("you can tell") -} - -/// Owner-grant path: the owner said "allow them" / "go ahead" / -/// "let them in" after a non-owner's wake refusal. Add the -/// previously-refused speaker to the per-call allowlist (so their -/// next wake fires through to the orchestrator), and speak a -/// short confirmation so they know they're in. -pub async fn run_grant_turn(request_id: &str, grantee: &str) -> Result { - let grantee = grantee.trim(); - let message = if grantee.is_empty() { - "Okay, you can ask me now.".to_string() - } else { - format!("Okay, {grantee} can ask me now.") - }; - log::info!("[meet-agent] grant request_id={request_id} grantee=\"{grantee}\""); - // Apply the grant on the session BEFORE speaking — if TTS races - // and the grantee re-asks during synthesis, we want their next - // wake to fire through. Also cancel any prior outbound so the - // confirmation doesn't queue behind a half-drained refusal. - let _ = registry().with_session(request_id, |s| { - s.allow_speaker(grantee); - s.cancel_outbound(); - }); - let samples = match tts(&message).await { - Ok(samples) => samples, - Err(err) => { - log::warn!("[meet-agent] grant TTS failed request_id={request_id} err={err}"); - stub_tts(&message).await - } - }; - registry().with_session(request_id, |s| { - s.record_event( - SessionEventKind::Note, - format!("owner granted wake access to {grantee}"), - ); - s.record_event(SessionEventKind::Spoke, message.clone()); - if !samples.is_empty() { - s.enqueue_outbound_pcm(&samples, true); - } - // Clear the wake_active + turn_in_progress flags so the - // next caption (likely the grantee's actual question) can - // fire a new turn. Without this, the wake state from the - // owner's "allow them" prompt would coalesce the grantee's - // first real caption into a continuation of this grant turn. - s.wake_active = false; - s.turn_in_progress = false; - s.mark_turn_done(); - })?; - Ok(true) -} - -/// Soft-deny path: kick a canned-line TTS reply when the wake word -/// fires from a non-owner. Branches on intent: a bare greeting gets -/// a friendly hi-back; a substantive task ask gets the refusal that -/// tells the owner how to grant access. Does NOT touch the -/// orchestrator agent (no tool calls, no memory writes) — it's a -/// single canned line, so the failure modes are limited to TTS errors. -/// -/// `caption_text` is the full caption from `note_caption` so we can -/// classify intent here; the session has already recorded the -/// pending grant request and dispatch timestamp. -pub async fn run_soft_deny_turn( - request_id: &str, - asker: &str, - caption_text: &str, -) -> Result { - let owner = registry() - .with_session(request_id, |s| s.owner_display_name().to_string()) - .unwrap_or_default(); - let intent = classify_unauthorized_intent(caption_text); - // Greeting → canned hi (no network round-trip needed). - // TaskAsk → toolless LLM. The LLM has no tools attached, has - // an explicit "refuse personal-data asks" system - // prompt, and is asked to point the owner at the - // magic word when refusing. So a Q like "what's - // the capital of France" lands as a normal answer - // ("Paris"), while "read Nikhil's Slack" lands as - // the refusal. The LLM picks; we don't classify. - let message = match intent { - UnauthorizedIntent::Greeting => friendly_greeting_message(asker), - UnauthorizedIntent::TaskAsk => match llm_general_no_tools(caption_text, &owner).await { - Ok(reply) if !reply.trim().is_empty() => reply, - Ok(_) => { - // Empty reply = LLM declined silently. Fall back to - // the explicit canned refusal so the speaker hears - // *something* and knows the bot didn't crash. - log::info!( - "[meet-agent] non-owner LLM returned empty — using canned refusal request_id={request_id}" - ); - soft_deny_message(asker, &owner) - } - Err(err) => { - log::warn!("[meet-agent] non-owner LLM failed request_id={request_id} err={err}"); - soft_deny_message(asker, &owner) - } - }, - }; - log::info!( - "[meet-agent] soft-deny request_id={request_id} asker=\"{asker}\" owner=\"{owner}\" intent={intent:?}" - ); - // Cancel any prior outbound so the refusal doesn't queue behind a - // half-drained reply from a previous turn. - let _ = registry().with_session(request_id, |s| s.cancel_outbound()); - let samples = match tts(&message).await { - Ok(samples) => samples, - Err(err) => { - log::warn!("[meet-agent] soft-deny TTS failed request_id={request_id} err={err}"); - stub_tts(&message).await - } - }; - registry().with_session(request_id, |s| { - let kind = match intent { - UnauthorizedIntent::Greeting => "greeting", - UnauthorizedIntent::TaskAsk => "refusal", - }; - s.record_event( - SessionEventKind::Note, - format!("soft-deny ({kind}): {asker} unauthorised wake"), - ); - s.record_event(SessionEventKind::Spoke, message.clone()); - if !samples.is_empty() { - s.enqueue_outbound_pcm(&samples, true); - } - // NB: do NOT call `mark_turn_done` here — that's the - // owner-min-turn-gap stamp, and we want the owner to be - // able to wake (e.g. say "allow them") within seconds of a - // refusal. The session's own `UNAUTHORIZED_COOLDOWN_MS` is - // what guards against a soft-deny loop from the same - // non-owner speaker. - })?; - Ok(true) -} - -/// Caption-driven turn. Drains the session's pending wake-word prompt -/// (assembled by `session::note_caption`) and runs LLM → TTS → enqueue -/// outbound. Skips STT entirely — the captions are already text. -/// -/// We give the user a short window (`CAPTION_TURN_DELAY_MS`) after the -/// wake word fires so multi-caption utterances ("hey openhuman … -/// what's the weather like in paris") have a chance to assemble -/// before we hit the LLM. The shell calls this on every caption -/// push that flagged the wake word; subsequent calls before the -/// delay expires are coalesced via the session's `wake_active` flag. -pub async fn run_caption_turn(request_id: &str) -> Result { - // Wait briefly so a multi-fragment wake utterance ("hey openhuman - // what's the weather like in paris" arriving as 2-3 captions) has - // a chance to assemble before we drain the prompt. - tokio::time::sleep(std::time::Duration::from_millis(CAPTION_TURN_DELAY_MS)).await; - - // When wake fires from a bare "hey openhuman" with no tail, the - // session returns None from take_pending_prompt — there's nothing - // to feed the LLM. Previously we silently bailed (`return Ok(false)`) - // which made the bot look broken to the user. Treat empty-tail wake - // as a "say hi back" greeting cue: synthesize a short ack so the - // user gets audible proof that the caption→wake→speak loop is - // wired up end-to-end. - // - // Also: drop any queued outbound PCM from the previous turn. - // Reasoning-model replies can run 60+ seconds; if the user re-fires - // the wake mid-reply we need to stop the old speech rather than - // play the entire backlog before the new reply starts. This makes - // the bot interruptible from the user's side. - let (prompt, history, was_bare_wake) = match registry().with_session(request_id, |s| { - // Mark turn as in-flight so note_caption refuses to fire new - // wakes until run_caption_turn returns. Without this, the - // user's continuing speech (or growing-caption re-fires) - // spawns 20 parallel agentic turns for one question and none - // of them complete inside the timeout. - s.turn_in_progress = true; - s.cancel_outbound(); - let prompt = s.take_pending_prompt(); - let history = recent_dialog_history(s.events(), CONTEXT_EVENT_WINDOW); - (prompt, history) - })? { - (Some(p), h) => (p, h, false), - (None, h) => { - log::info!( - "[meet-agent] caption turn bare-wake (no tail) request_id={request_id} — replying with greeting ack" - ); - ("hello".to_string(), h, true) - } - }; - log::info!( - "[meet-agent] caption turn start request_id={request_id} prompt_chars={} history_msgs={} bare_wake={}", - prompt.chars().count(), - history.len(), - was_bare_wake, - ); - - // Grant-intent fast path. When the owner says "hey openhuman, - // allow them" / "let them in" / "go ahead" after a non-owner - // wake refusal, treat the turn as a single-shot session-level - // grant rather than handing the prompt to the orchestrator. - // The pending grantee was captured by `note_caption` at refusal - // time and lives on the session for `PENDING_GRANT_WINDOW_MS`. - if !was_bare_wake && looks_like_grant_intent(&prompt) { - let pending = registry() - .with_session(request_id, |s| s.take_pending_unauthorized()) - .ok() - .flatten(); - if let Some(grantee) = pending { - return run_grant_turn(request_id, &grantee).await; - } - // No pending request to grant — fall through to the normal - // LLM path. The model can interpret "allow" however it - // wants from there; without a pending grantee we have no - // session-level meaning to attach to it. - log::info!( - "[meet-agent] grant-intent prompt detected but no pending request — falling through request_id={request_id}" - ); - } - - // Pre-roll filler. The orchestrator + integration tools take - // 30–60s on slow paths (Slack / Gmail / Calendar). Without an - // immediate acoustic cue, the user assumes the bot is broken and - // re-asks (which the turn_in_progress gate now blocks but still - // burns the call atmosphere). Speak a 2-word ack right away and - // enqueue with done=false so the real reply appends cleanly when - // it lands. - // - // Skip pre-roll on short prompts: greetings ("hi"), checks ("can - // you hear me", "are you there"), time questions ("what's the - // time"), and other trivial asks the agent answers in 2-5s - // without tools — those don't need the ack, and "On it. Yes, I - // can hear you" sounds redundant. The 50-char threshold is a - // rough proxy; real second-brain questions ("am I free Friday - // afternoon for a 30 min slot") are almost always longer. - const PREROLL_SKIP_PROMPT_CHARS: usize = 50; - if !was_bare_wake && prompt.chars().count() > PREROLL_SKIP_PROMPT_CHARS { - if let Ok(ack_pcm) = tts(PREROLL_ACK_PHRASE).await { - let _ = registry().with_session(request_id, |s| { - s.enqueue_outbound_pcm(&ack_pcm, false); - }); - log::info!( - "[meet-agent] pre-roll ack queued request_id={request_id} samples={}", - ack_pcm.len() - ); - } else { - log::debug!( - "[meet-agent] pre-roll ack synth failed request_id={request_id} — skipping pre-roll" - ); - } - } - - // Route the turn through the FULL orchestrator agent first — it - // owns the user's connected integrations, memory tree, MCP - // clients and skills, so it can actually answer "is my Friday - // free", "what did Alice say about the deploy", etc. Falls back - // to the bare chat-completions path on orchestrator build / - // timeout / RPC error so a config-degraded environment still - // produces audible output instead of dead air. - let reply_text = match llm_meeting_agentic(&prompt, request_id).await { - Ok(text) => text, - Err(agentic_err) => { - // Do NOT fall back to basic LLM. The basic path has no - // tool access, so on a calendar/slack/gmail question it - // confidently hallucinates "I don't have access" — which - // is the WRONG answer and worse than silence. Speak a - // short canned "let me get back to you" ack so the user - // knows the question was heard but the bot couldn't - // resolve it in time, then drop the prompt. The user - // can re-ask (turn_in_progress gate clears as we exit). - log::warn!( - "[meet-agent] agentic turn failed — speaking polite ack instead of toolless fallback request_id={request_id} err={agentic_err}" - ); - let _ = registry().with_session(request_id, |s| { - s.record_event( - SessionEventKind::Note, - format!("agentic path failed; speaking ack: {agentic_err}"), - ); - }); - "Let me get back to you on that.".to_string() - } - }; - - let synthesized = if reply_text.trim().is_empty() { - Vec::new() - } else { - match tts(&reply_text).await { - Ok(samples) => samples, - Err(err) => { - log::warn!( - "[meet-agent] caption-turn TTS failed request_id={request_id} err={err}" - ); - let _ = registry().with_session(request_id, |s| { - s.record_event( - SessionEventKind::Note, - format!("TTS failure (using stub): {err}"), - ); - }); - stub_tts(&reply_text).await - } - } - }; - - registry().with_session(request_id, |s| { - s.record_event(SessionEventKind::Heard, prompt.clone()); - if !reply_text.is_empty() { - s.record_event(SessionEventKind::Spoke, reply_text.clone()); - if !synthesized.is_empty() { - s.enqueue_outbound_pcm(&synthesized, true); - } - } else { - s.record_event( - SessionEventKind::Note, - "agent declined to respond".to_string(), - ); - } - s.turn_count += 1; - // Clear the in-flight gate so the next wake can fire. Done - // inside the same with_session so it lands in one critical - // section with the reply enqueue, even if the caller drops - // the future after this point. - s.turn_in_progress = false; - // Stamp turn-done time so note_caption's min-turn-gap - // backstop can suppress wakes that fire within 15s of this - // turn's completion (caption residue / repeat questions). - s.mark_turn_done(); - })?; - - log::info!( - "[meet-agent] caption turn done request_id={request_id} reply_chars={} synth_samples={} reply_preview={:?}", - reply_text.chars().count(), - synthesized.len(), - reply_text.chars().take(120).collect::(), - ); - Ok(true) -} - -/// Delay between wake-word match and prompt drain. Long enough that -/// 2-3 caption fragments can join up; short enough that the user -/// doesn't experience awkward silence after they stop talking. -const CAPTION_TURN_DELAY_MS: u64 = 1_500; - -/// Canned acknowledgements the agent speaks out loud after capturing -/// a note. Short, varied so consecutive notes don't sound robotic. -/// Selected by hashing the prompt so the same dictation reliably -/// produces the same ack (helpful for tests + debugging) while still -/// rotating across the set in a normal conversation. -const ACK_PHRASES: &[&str] = &["Got it.", "Noted.", "Adding that.", "On it.", "Captured."]; - -fn pick_ack_phrase(prompt: &str) -> &'static str { - if prompt.trim().is_empty() { - return ""; - } - let h: u32 = prompt.bytes().fold(0u32, |a, b| a.wrapping_add(b as u32)); - ACK_PHRASES[(h as usize) % ACK_PHRASES.len()] -} - -/// Fire one brain turn for the named session. Returns `Ok(true)` when a -/// turn actually ran, `Ok(false)` when the inbound buffer was below the -/// floor. -pub async fn run_turn(request_id: &str) -> Result { - let drained = registry().with_session(request_id, |s| s.drain_inbound())?; - if drained.len() < MIN_TURN_SAMPLES { - log::debug!( - "[meet-agent] skipping turn request_id={request_id} samples={}", - drained.len() - ); - return Ok(false); - } - - log::info!( - "[meet-agent] turn start request_id={request_id} samples={}", - drained.len() - ); - - // ─── STT ──────────────────────────────────────────────────────── - let heard = match stt(&drained).await { - Ok(text) if text.trim().is_empty() => { - log::info!("[meet-agent] STT empty, skipping turn request_id={request_id}"); - return Ok(false); - } - Ok(text) => text, - Err(err) => { - log::warn!("[meet-agent] STT failed request_id={request_id} err={err}"); - // Record a Note so the transcript log makes the failure - // visible to whoever's looking at logs. - let _ = registry().with_session(request_id, |s| { - s.record_event( - SessionEventKind::Note, - format!("STT failure (using stub): {err}"), - ); - }); - stub_stt(&drained).await - } - }; - log::info!( - "[meet-agent] STT request_id={request_id} text_chars={}", - heard.chars().count() - ); - - // ─── LLM (agentic only; no basic-LLM fallback to avoid toolless hallucinations) ─ - let reply_text = match llm_meeting_agentic(&heard, request_id).await { - Ok(text) => text, - Err(agentic_err) => { - log::warn!( - "[meet-agent] STT-path agentic failed — speaking polite ack request_id={request_id} err={agentic_err}" - ); - let _ = registry().with_session(request_id, |s| { - s.record_event( - SessionEventKind::Note, - format!("agentic path failed; speaking ack: {agentic_err}"), - ); - }); - "Let me get back to you on that.".to_string() - } - }; - - // ─── TTS ──────────────────────────────────────────────────────── - let synthesized = if reply_text.trim().is_empty() { - Vec::new() - } else { - match tts(&reply_text).await { - Ok(samples) => samples, - Err(err) => { - log::warn!("[meet-agent] TTS failed request_id={request_id} err={err}"); - let _ = registry().with_session(request_id, |s| { - s.record_event( - SessionEventKind::Note, - format!("TTS failure (using stub): {err}"), - ); - }); - stub_tts(&reply_text).await - } - } - }; - - registry().with_session(request_id, |s| { - s.record_event(SessionEventKind::Heard, heard.clone()); - if !reply_text.is_empty() { - s.record_event(SessionEventKind::Spoke, reply_text.clone()); - if !synthesized.is_empty() { - s.enqueue_outbound_pcm(&synthesized, true); - } - } else { - s.record_event( - SessionEventKind::Note, - "agent declined to respond".to_string(), - ); - } - s.turn_count += 1; - })?; - - log::info!( - "[meet-agent] turn done request_id={request_id} reply_chars={} synth_samples={}", - reply_text.chars().count(), - synthesized.len() - ); - Ok(true) -} - -// ─── Real adapters ────────────────────────────────────────────────── - -async fn stt(samples: &[i16]) -> Result { - use crate::openhuman::voice::cloud_transcribe::{transcribe_cloud, CloudTranscribeOptions}; - - let config = crate::openhuman::config::ops::load_config_with_timeout().await?; - let wav_bytes = wav::pack_pcm16le_mono_wav(samples, SAMPLE_RATE_HZ); - let audio_b64 = B64.encode(&wav_bytes); - let opts = CloudTranscribeOptions { - mime_type: Some("audio/wav".to_string()), - file_name: Some("meet-agent.wav".to_string()), - ..Default::default() - }; - let outcome = transcribe_cloud(&config, &audio_b64, &opts).await?; - let text = outcome.value.text.clone(); - Ok(text) -} - -/// System prompt for the live meeting agent. Pushes the model toward -/// (a) recognising whether the latest utterance is genuinely directed -/// at it (intent classification — emit empty string when not), and -/// (b) responding conversationally and concisely when it is. -const MEETING_SYSTEM_PROMPT: &str = "\ -You are OpenHuman, joining a live Google Meet call by voice. Every word you \ -produce will be spoken aloud over the call. The transcript shows `user` lines \ -(humans on the call, sometimes prefixed with a name) and `assistant` lines \ -(things you previously said out loud).\n\ -\n\ -STRICT OUTPUT RULES — these are non-negotiable. The output is fed DIRECTLY \ -into TTS and spoken aloud verbatim. Any meta-text becomes audible bot \ -gibberish on a live call.\n\ -1. Output ONE sentence. Maximum 25 spoken words.\n\ -2. Plain spoken English. No markdown. No bullets. No code. No emoji.\n\ -3. NO chain-of-thought. NO reasoning. NO planning. NO blocks. NO \ -preamble. NEVER write phrases like \"We need to…\", \"I should…\", \"Let me…\", \ -\"The user said…\", \"This is a greeting…\", \"So I should respond with…\", \ -\"My response is…\". Output ONLY the final answer that the user should hear.\n\ -4. Never repeat what the user said. Never narrate what you are about to do.\n\ -5. If the latest user line is not directly addressed to you, output the empty \ -string. Do not respond to side conversations or ambient speech.\n\ -6. Examples — good vs bad:\n\ - User: \"hello\" → GOOD: \"Hey there.\" BAD: \"The user said hello, so I should respond with a greeting.\"\n\ - User: \"what's the time\" → GOOD: \"I don't have a clock right now.\" BAD: \"We need to generate a single sentence. The user is asking the time.\"\n\ -\n\ -Address-detection: respond when the user names you (\"OpenHuman\", \"hey \ -openhuman\"), asks a direct question of you, or gives a direct command \ -(remember, summarise, look up). Otherwise stay silent.\n\ -\n\ -For unanswerable questions: say so in one sentence (\"I don't know that off \ -the top of my head\") instead of guessing or stalling.\n\ -For dictation / note requests: a 2-3 word ack (\"Got it.\", \"Noted.\"). Don't \ -read the note back.\n\ -"; - -/// Voice-frontend system-prompt directive prepended to the user -/// utterance before it reaches the orchestrator. The orchestrator -/// already has its own persona, tool catalogue, memory loader and -/// connected integrations; this addendum just tells it the answer is -/// going to be spoken aloud verbatim so it should reply in one short -/// spoken sentence with no markdown / no chain-of-thought / no -/// preamble. Wrapped in a delimiter so the orchestrator can't confuse -/// the directive with the user's actual utterance. -const MEET_VOICE_DIRECTIVE: &str = "\ -MEETING VOICE MODE — this conversation is happening live over voice in a Google Meet call.\n\ -\n\ -LANGUAGE: Respond in ENGLISH ONLY. Do not switch languages even if a user's name, prior memory, or transcript hint suggests another locale. The TTS engine is English-only; non-English output produces garbled audio.\n\ -\n\ -TOOL USE (encouraged):\n\ -- USE TOOLS whenever a tool can give a real answer. Calendar, email, slack, memory, integrations — \ -call them. Tool calls are invisible to the user and DO NOT count toward your reply word budget.\n\ -- If you need data from a tool to answer accurately, CALL THE TOOL. Do not guess from prior training. \ -Do not claim something is not connected before attempting to call its tool — the tool surface above \ -shows what is actually available right now.\n\ -- delegate_to_integrations_agent is your gateway to all connected provider integrations (calendar, \ -gmail, slack, etc.). Use it when the user asks about their schedule, mail, messages, or any other \ -integration-backed data.\n\ -\n\ -FINAL SPOKEN REPLY (strict — this is the only part the user hears):\n\ -- After tool work is done, output ONE short spoken sentence, max 25 words.\n\ -- Plain spoken English only. No markdown. No bullets. No code. No URLs.\n\ -- No meta-narration. Do not say \"Let me check…\", \"I will look…\", \"The user is asking…\", \ -\"We need to…\", \"I should…\". Just give the answer.\n\ -- If the user is not directly addressing you (chit-chat between humans, side conversation, your \ -name appearing inside a longer thought aimed at someone else), output an empty string and stay silent.\n\ -- For dictation / note requests (\"remember…\", \"action item…\", \"follow up on…\"), a 2-3 word \ -ack is enough (\"Got it.\", \"Noted.\").\n\ -- For genuinely unanswerable questions, say so in one short sentence rather than guessing."; - -/// First 12 chars of `request_id`, for log scoping. UUID prefixes are -/// unique enough at one-meet-at-a-time to keep transcripts apart. -fn short_id(id: &str) -> String { - id.chars().take(12).collect() -} - -/// Route the meeting utterance through the FULL orchestrator agent — -/// same path the chat UI and the webview meet handoff use. The -/// orchestrator inherits the user's connected integrations, memory -/// tree, MCP clients, skills, and the project-wide tool registry, so -/// "is my Friday evening free", "did anyone in #eng ping me about -/// the deploy", "remind me to mail Alice tomorrow" all answer with -/// real data — not a guess from the model's training prior. -/// -/// We rebuild the Agent per turn (cheap relative to the LLM call -/// itself, since the registry is initialised once at startup) and -/// wrap `run_single` in a 20s timeout so a slow tool iteration -/// doesn't leave the meeting participant in silence indefinitely. -/// -/// Errors propagate to the caller, which falls back to the bare -/// chat-completions path (`llm_meeting_basic`) so a config / -/// registry / token issue degrades to a polite reply instead of -/// dead air. -async fn llm_meeting_agentic(prompt: &str, request_id: &str) -> Result { - // Get-or-build the per-meet cached Agent. First wake of a meet - // builds the orchestrator once (memory tree + MCP + tools — 5-10s - // cold); subsequent wakes reuse the same instance, so its - // in-memory history accumulates and the orchestrator can recall - // earlier dialogue without disk-resume corruption tripping the - // tool_calls / tool_message API constraint. - let agent_lock = get_or_build_agent_for_meet(request_id).await?; - - // Lock for the duration of the turn. The lock is per-meet, so - // two distinct meet sessions can run agents in parallel; within - // one meet, turn_in_progress already prevents reentrancy. Held - // across run_single().await — that's why we use tokio::sync::Mutex. - let mut agent = agent_lock.lock().await; - - // Per-turn refresh of the time-context block. The voice directive - // is baked into the system prompt at build time; the clock has - // to update each turn or the bot will tell the user it's still - // 2am ten minutes later. Prepend the time block to the user - // utterance instead of touching the system prompt suffix (which - // we can't change without rebuilding the Agent). - let now_local = chrono::Local::now(); - let time_block = format!( - "[RIGHT-NOW CONTEXT — current local time: {} ({}), tz {}. \ - Use this directly for any time/date question; do not call a tool.]", - now_local.format("%Y-%m-%d %H:%M:%S"), - now_local.format("%A"), - now_local.format("%:z"), - ); - let user_message = format!("{time_block}\n\n{prompt}"); - - // Per-turn unique definition_name for the transcript file. The - // Agent's in-memory history persists across turns (cache); only - // the on-disk transcript filename rolls per turn so a kill - // mid-tool-call doesn't poison the next process's resume path. - let now_ms = std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .map(|d| d.as_millis()) - .unwrap_or(0); - agent.set_agent_definition_name(format!( - "orchestrator_meet_{}_{now_ms}", - short_id(request_id) - )); - - log::info!( - "[meet-agent] agentic turn dispatch request_id={request_id} prompt_chars={} cached_history_msgs={}", - prompt.chars().count(), - agent.history().len(), - ); - - // Meet-agent runs during an active call — the prompt text is - // speech captured from a live meeting, which after run_grant_turn - // can include utterances from non-owner participants. Treat it as - // externally-sourced channel input (not local CLI): the gate - // routes external_effect tools through the audit-trail path - // instead of letting them run unprompted with trusted-CLI - // semantics. - let fut = crate::openhuman::agent::turn_origin::with_origin( - crate::openhuman::agent::turn_origin::AgentTurnOrigin::ExternalChannel { - channel: "meet".to_string(), - // Meet utterances don't carry a stable per-participant identity - // at this layer (the room is the addressing primitive); leave - // sender unset and let the gate fall back to the per-channel - // audit-row + TTL-deny policy. - sender: None, - reply_target: request_id.to_string(), - message_id: format!("meet-{request_id}-{now_ms}"), - }, - agent.run_single(&user_message), - ); - let reply = match tokio::time::timeout(Duration::from_secs(AGENTIC_TURN_TIMEOUT_SECS), fut) - .await - { - Ok(Ok(text)) => text, - Ok(Err(e)) => { - return Err(format!("[meet-agent] orchestrator run_single failed: {e}")); - } - Err(_elapsed) => { - log::warn!( - "[meet-agent] agentic turn timed out request_id={request_id} after {}s — speaking polite ack", - AGENTIC_TURN_TIMEOUT_SECS - ); - return Err(format!( - "agentic timeout after {AGENTIC_TURN_TIMEOUT_SECS}s" - )); - } - }; - - Ok(strip_for_speech(&reply)) -} - -/// Get the cached orchestrator for this meet, or build it on first -/// call. Returns an `Arc>` so the caller can lock -/// across the run_single().await. -async fn get_or_build_agent_for_meet(request_id: &str) -> Result>, String> { - { - let cache = agent_cache().lock().await; - if let Some(existing) = cache.get(request_id) { - return Ok(existing.clone()); - } - } - - // Cold build. Use the with_profile builder — same canonical path - // the web channel (chat UI) uses at channels/providers/web.rs:1570, - // which is what wires the user's connected integrations + delegation - // tools. profile_prompt_suffix carries the meet voice directive. - let config = crate::openhuman::config::ops::load_config_with_timeout().await?; - let mut agent = Agent::from_config_for_agent_with_profile( - &config, - "orchestrator", - None, - Some(MEET_VOICE_DIRECTIVE.to_string()), - ) - .map_err(|e| format!("[meet-agent] orchestrator build failed: {e}"))?; - - // Per-meet event context so the harness scopes its observability - // events to this request_id instead of colliding with the chat UI. - agent.set_event_context(format!("meet_{request_id}"), "meet_agent"); - agent.set_agent_definition_name(format!("orchestrator_meet_{}", short_id(request_id))); - - log::info!("[meet-agent] orchestrator built + cached for request_id={request_id}"); - - let arc = Arc::new(TokioMutex::new(agent)); - agent_cache() - .lock() - .await - .insert(request_id.to_string(), arc.clone()); - Ok(arc) -} - -/// Build a chat-completions request from rolling meeting history plus -/// the current user prompt, post it through the backend, and return -/// the assistant's reply (trimmed, possibly empty). -/// -/// Used as a fallback when the orchestrator path -/// (`llm_meeting_agentic`) cannot be built — missing config, -/// registry not initialised, no session token. The orchestrator path -/// gives memory/tool/integration access; this bare path only gets -/// the rolling caption history. Acceptable degradation so the bot -/// doesn't go silent in a config-degraded environment. -async fn llm_meeting_basic( - prompt: &str, - history: &[ConversationTurn], - system_prompt: &str, -) -> Result { - use crate::api::config::effective_backend_api_url; - use crate::api::jwt::get_session_token; - use crate::api::BackendOAuthClient; - use reqwest::Method; - - let config = crate::openhuman::config::ops::load_config_with_timeout().await?; - let token = get_session_token(&config) - .map_err(|e| e.to_string())? - .filter(|t| !t.trim().is_empty()) - .ok_or_else(|| "no backend session token".to_string())?; - - let api_url = effective_backend_api_url(&config.api_url); - let client = BackendOAuthClient::new(&api_url).map_err(|e| e.to_string())?; - - let mut messages: Vec = Vec::with_capacity(history.len() + 2); - messages.push(json!({ "role": "system", "content": system_prompt })); - for turn in history { - messages.push(json!({ "role": turn.role, "content": turn.content })); - } - messages.push(json!({ "role": "user", "content": prompt })); - - let body = json!({ - // chat-v1 = conversational non-reasoning model. agentic-v1 / - // reasoning-v1 leak their chain-of-thought as plain text - // ("We need to generate a single sentence…") into the response - // body when streamed without the structured thinking_delta - // channel — which TTS then reads aloud. chat-v1 produces a - // direct user-facing answer, which is what we want over voice. - "model": "chat-v1", - "temperature": 0.5, - "max_tokens": REPLY_MAX_TOKENS, - "messages": messages, - }); - - let raw = client - .authed_json( - &token, - Method::POST, - "/openai/v1/chat/completions", - Some(body), - ) - .await - .map_err(|e| e.to_string())?; - - let text = extract_chat_completion_text(&raw) - .ok_or_else(|| format!("unexpected chat completions response: {raw}"))?; - Ok(strip_for_speech(&text)) -} - -/// Trim characters that sound bad when read aloud by TTS but routinely -/// leak from a chat-completions response (markdown asterisks, fenced -/// code, leading bullets). Keep punctuation that affects prosody -/// (commas, periods, question marks) intact. -fn strip_for_speech(text: &str) -> String { - // Strip reasoning-model ... blocks before we strip - // markdown. DeepSeek / GMI / qwen-style reasoning models emit - // their internal chain-of-thought wrapped in ... - // tags ahead of the user-facing reply. Without this, TTS reads - // the entire monologue aloud — which on a 60s+ reasoning trace - // produces a minute of bot speech the user never asked for. - // Multiple non-overlapping blocks are stripped in sequence; an - // unclosed at the end (truncated output) drops everything - // from the tag onwards. - let mut cleaned = String::with_capacity(text.len()); - let mut rest = text; - loop { - match rest.find("") { - Some(open) => { - cleaned.push_str(&rest[..open]); - let after = &rest[open + "".len()..]; - match after.find("") { - Some(close) => { - rest = &after[close + "".len()..]; - } - None => { - // Unclosed tag → drop the rest as reasoning. - break; - } - } - } - None => { - cleaned.push_str(rest); - break; - } - } - } - let text = cleaned.trim(); - - let mut out = String::with_capacity(text.len()); - let mut in_code = false; - for line in text.lines() { - let trimmed = line.trim(); - if trimmed.starts_with("```") { - in_code = !in_code; - continue; - } - if in_code { - continue; - } - let cleaned: String = trimmed - .trim_start_matches(|c: char| c == '-' || c == '*' || c == '#' || c == '>') - .trim() - .chars() - .filter(|c| !matches!(c, '*' | '`' | '_' | '#')) - .collect(); - if cleaned.is_empty() { - continue; - } - if !out.is_empty() { - out.push(' '); - } - out.push_str(&cleaned); - } - let trimmed = out.trim().to_string(); - let de_reasoned = strip_untagged_reasoning(&trimmed); - cap_for_speech(&de_reasoned, MAX_TTS_CHARS) -} - -/// Strip reasoning-style preamble that reasoning models leak as plain -/// text (no `` tags) — phrases like "We need to generate…", -/// "I should respond with…", "The user said…", "Let me think…". -/// Heuristic: drop sentences whose lowercased trim matches a known -/// reasoning opener; if everything is reasoning, return only the last -/// sentence (final conclusion). If no signal, return input untouched. -fn strip_untagged_reasoning(text: &str) -> String { - if text.is_empty() { - return text.to_string(); - } - const REASONING_OPENERS: &[&str] = &[ - "we need to", - "we should", - "i need to", - "i should", - "i will", - "let me ", - "first,", - "the user said", - "the user is", - "the user asked", - "the user wants", - "this is a", - "this seems", - "so i should", - "so the response", - "so my response", - "okay, so", - "alright,", - "given that", - "since the user", - "the assistant", - "the response should", - "my response", - "to respond", - "responding with", - ]; - let sentences: Vec<&str> = text - .split_inclusive(|c: char| matches!(c, '.' | '!' | '?')) - .map(str::trim) - .filter(|s| !s.is_empty()) - .collect(); - if sentences.is_empty() { - return text.to_string(); - } - let kept: Vec<&str> = sentences - .iter() - .filter(|s| { - let lc = s.to_lowercase(); - !REASONING_OPENERS - .iter() - .any(|opener| lc.starts_with(opener)) - }) - .copied() - .collect(); - if kept.is_empty() { - // Everything was reasoning — return the last sentence as the - // probable conclusion, lower-cased openers stripped. - return sentences.last().map(|s| s.to_string()).unwrap_or_default(); - } - kept.join(" ") -} - -/// Truncate `text` to at most `max_chars` characters, preferring to -/// cut at the last sentence terminator (`.`, `!`, `?`) inside the -/// budget so the TTS doesn't trail off mid-clause. Falls back to a -/// hard char cut + ellipsis when no terminator fits. -fn cap_for_speech(text: &str, max_chars: usize) -> String { - let total = text.chars().count(); - if total <= max_chars { - return text.to_string(); - } - let prefix: String = text.chars().take(max_chars).collect(); - if let Some(idx) = prefix.rfind(['.', '!', '?']) { - let end = idx - + prefix[idx..] - .chars() - .next() - .map(char::len_utf8) - .unwrap_or(1); - return prefix[..end].trim_end().to_string(); - } - let mut out = prefix.trim_end().to_string(); - out.push('…'); - out -} - -/// One rolling-history entry handed to the LLM. -#[derive(Debug, Clone)] -struct ConversationTurn { - role: &'static str, - content: String, -} - -/// Pull the last `window` `Heard`/`Spoke` events from the session log -/// and shape them into chat-completions turns. `Note` events are -/// internal book-keeping (errors, wake-word matches) and are skipped. -fn recent_dialog_history(events: &[SessionEvent], window: usize) -> Vec { - let mut out: Vec = Vec::with_capacity(window); - for e in events.iter().rev() { - if out.len() >= window { - break; - } - let role = match e.kind { - SessionEventKind::Heard => "user", - SessionEventKind::Spoke => "assistant", - SessionEventKind::Note => continue, - }; - let content = e.text.trim(); - if content.is_empty() { - continue; - } - out.push(ConversationTurn { - role, - content: content.to_string(), - }); - } - out.reverse(); - out -} - -async fn tts(text: &str) -> Result, String> { - use crate::openhuman::voice::reply_speech::{synthesize_reply, ReplySpeechOptions}; - - let config = crate::openhuman::config::ops::load_config_with_timeout().await?; - // Tuned for live conversational speech, not narration: - // stability 0.4 — leave room for prosody / inflection. Higher - // values (>0.6) flatten the read into the "monotone audiobook" - // timbre the previous default produced. - // similarity_boost 0.75 — keep the chosen voice's character. - // style 0.35 — light expressiveness; too high makes punctuation - // swallow words. - // use_speaker_boost on — louder, clearer in noisy meetings. - let voice_settings = json!({ - "stability": 0.4, - "similarity_boost": 0.75, - "style": 0.35, - "use_speaker_boost": true, - }); - let opts = ReplySpeechOptions { - // Ask ElevenLabs (via the hosted backend) for raw PCM16LE @ - // 16 kHz so we can feed the result straight into the - // shell-side bridge with no transcoding. - output_format: Some("pcm_16000".to_string()), - model_id: Some(TTS_MODEL_ID.to_string()), - voice_settings: Some(voice_settings), - ..Default::default() - }; - let outcome = synthesize_reply(&config, text, &opts).await?; - let result = outcome.value; - let pcm_bytes = B64 - .decode(result.audio_base64.as_bytes()) - .map_err(|e| format!("decode tts base64: {e}"))?; - if !pcm_bytes.len().is_multiple_of(2) { - return Err(format!("odd byte length from tts: {}", pcm_bytes.len())); - } - Ok(pcm_bytes - .chunks_exact(2) - .map(|c| i16::from_le_bytes([c[0], c[1]])) - .collect()) -} - -fn extract_chat_completion_text(raw: &Value) -> Option { - raw.get("choices") - .and_then(|c| c.as_array()) - .and_then(|arr| arr.first()) - .and_then(|first| first.get("message")) - .and_then(|m| m.get("content")) - .and_then(|s| s.as_str()) - .map(|s| s.trim().to_string()) -} - -// ─── Stubs (fallback for tests / no-backend) ──────────────────────── - -async fn stub_stt(samples: &[i16]) -> String { - let secs = samples.len() as f32 / SAMPLE_RATE_HZ as f32; - format!("(heard ~{secs:.1}s of audio)") -} - -async fn stub_llm(_heard: &str) -> String { - "I'm listening.".to_string() -} - -async fn stub_tts(text: &str) -> Vec { - if text.is_empty() { - return Vec::new(); - } - let sample_rate = SAMPLE_RATE_HZ as f32; - let freq = 440.0_f32; - let duration_secs = 0.2_f32; - let count = (sample_rate * duration_secs) as usize; - (0..count) - .map(|i| { - let t = i as f32 / sample_rate; - (((2.0 * std::f32::consts::PI * freq * t).sin()) * (i16::MAX as f32 * 0.3)) as i16 - }) - .collect() -} - -#[cfg(test)] -#[path = "brain_tests.rs"] -mod tests; diff --git a/src/openhuman/meet_agent/brain/access.rs b/src/openhuman/meet_agent/brain/access.rs new file mode 100644 index 0000000000..c3d0a74c67 --- /dev/null +++ b/src/openhuman/meet_agent/brain/access.rs @@ -0,0 +1,345 @@ +//! Speaker authorization: intent classification, denial messages, +//! grant-intent detection, and the grant / soft-deny turn runners. + +use super::speech::tts; +use super::stubs::stub_tts; +use crate::openhuman::meet_agent::session::registry; +use crate::openhuman::meet_agent::types::SessionEventKind; + +// ─── Intent classification ────────────────────────────────────────── + +/// Classify a non-owner caption that tripped the wake word. The +/// gate has already decided the speaker isn't authorised; this +/// picks between a friendly hi-back (greeting / pleasantry) and +/// a polite refusal (real task ask). Matching is conservative: +/// when the post-wake tail is empty OR only contains greeting +/// words, treat it as a greeting. Anything else is assumed to be +/// a task ask. +pub(crate) fn classify_unauthorized_intent(caption_text: &str) -> UnauthorizedIntent { + // Lift the bit of text that comes after the matched wake + // phrase so we don't get fooled by the wake itself ("hey + // openhuman" obviously contains "hey"). + let lower = caption_text.to_ascii_lowercase(); + let wake_phrases = [ + "hey open human", + "hi open human", + "hello open human", + "hey openhuman", + "hi openhuman", + "hello openhuman", + "open human", + "openhuman", + ]; + let tail = wake_phrases + .iter() + .filter_map(|p| lower.find(p).map(|i| &lower[i + p.len()..])) + .next() + .unwrap_or(&lower); + // Strip punctuation / common filler so "hi there!" reduces to + // ["hi", "there"]. Keeping the word list cheap and English-only + // for v1; the locale-aware story lands with multilingual TTS. + let words: Vec<&str> = tail + .split(|c: char| !c.is_ascii_alphanumeric()) + .filter(|w| !w.is_empty()) + .collect(); + if words.is_empty() { + return UnauthorizedIntent::Greeting; + } + const GREETING_WORDS: &[&str] = &[ + "hi", + "hello", + "hey", + "yo", + "sup", + "howdy", + "greetings", + "hola", + "good", + "morning", + "afternoon", + "evening", + "night", + "there", + "everyone", + "all", + "folks", + "team", + "guys", + "yall", + ]; + if words.iter().all(|w| GREETING_WORDS.contains(w)) { + UnauthorizedIntent::Greeting + } else { + UnauthorizedIntent::TaskAsk + } +} + +/// Output of `classify_unauthorized_intent`. Drives whether the +/// non-owner turn speaks a canned hi-back or routes the prompt +/// through a toolless LLM (general-knowledge + safe deflection). +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) enum UnauthorizedIntent { + /// Just a greeting — bot says hi back without offering tools. + Greeting, + /// Substantive question. Route to a toolless LLM with a strict + /// system prompt — answer general knowledge / casual chat, + /// refuse anything that would require the owner's personal + /// tools or data, and point the owner at the magic word + /// ("allow") if access is needed. + TaskAsk, +} + +// ─── Message builders ─────────────────────────────────────────────── + +/// System prompt for the non-owner branch. The LLM has no tool +/// surface attached and is told to refuse any request that would +/// need the owner's personal data. Kept short and explicit so the +/// model doesn't ad-lib a different boundary. +pub(super) fn non_owner_system_prompt(owner: &str) -> String { + let owner_label = if owner.trim().is_empty() { + "the meeting host" + } else { + owner.trim() + }; + format!( + "\ +You are openhuman, an AI participant in a live Google Meet call. The speaker is NOT the call \ +owner — the owner is {owner_label}.\n\ +\n\ +WHAT YOU MAY DO:\n\ +- Answer general knowledge questions (history, science, math, definitions, weather concepts).\n\ +- Casual conversation, jokes, small talk, greetings.\n\ +- Explain what you are and what you can do at a high level.\n\ +\n\ +WHAT YOU MUST REFUSE (no exceptions):\n\ +- Anything that would require {owner_label}'s personal data: their Slack, Gmail, Calendar, \ +contacts, memory notes, files, schedule, integrations, or chat history.\n\ +- Sending messages, scheduling, reminding, creating, modifying or deleting any data on their \ +behalf.\n\ +- Revealing what {owner_label} has previously told you or stored with you.\n\ +\n\ +WHEN REFUSING: respond with exactly one short sentence pointing at the magic word, e.g. \ +\"That needs {owner_label}'s permission — {owner_label}, say 'allow' if you'd like me to help.\"\n\ +\n\ +OUTPUT FORMAT (strict):\n\ +- ONE short spoken sentence, max 25 words.\n\ +- Plain English. No markdown, bullets, code fences, or URLs.\n\ +- No meta-narration (\"I should…\", \"Let me…\", \"As an AI…\"). Just answer.\n\ +- Respond in ENGLISH ONLY regardless of the speaker's language — TTS is English-only.\n\ +" + ) +} + +/// Friendly hi-back canned line when a non-owner just greets the +/// bot. Kept short and warm; doesn't mention the owner / privacy +/// gate at all — that's noise on a "hello". +pub(super) fn friendly_greeting_message(asker: &str) -> String { + let asker = asker.trim(); + if asker.is_empty() { + "Hi there! Nice to meet you.".to_string() + } else { + format!("Hi {asker}! Nice to meet you.") + } +} + +/// Spoken refusal when a non-owner trips the wake word. Built per +/// call from the configured owner display name so the audible +/// response names the actual person who has the keys, and tells +/// the owner the magic word ("allow") to grant access. Kept short +/// so it doesn't drown the conversation. +pub(crate) fn soft_deny_message(asker: &str, owner: &str) -> String { + let asker = asker.trim(); + let owner = owner.trim(); + match (asker.is_empty(), owner.is_empty()) { + (true, true) => "Sorry, I only respond to my owner.".to_string(), + (true, false) => format!( + "Sorry, only {owner} can ask me things in this call. {owner}, say 'allow' if you'd like me to answer." + ), + (false, true) => format!("Sorry {asker}, I only respond to my owner."), + (false, false) => format!( + "Sorry {asker}, only {owner} can ask me things here. {owner}, say 'allow' to let them in." + ), + } +} + +// ─── Grant-intent detection ───────────────────────────────────────── + +/// Recognise an "open the gate" intent from the owner's first words +/// after the wake phrase. Conservative: only fires when the prompt +/// begins with one of the canonical permit verbs so an unrelated +/// owner query that happens to contain "allow" or "yes" deeper in +/// the sentence isn't hijacked. +/// +/// Returns `true` when the owner is explicitly granting access to +/// the most-recently-refused asker. The caller still gates on +/// session-level state (`take_pending_unauthorized`) — without a +/// pending request the intent is meaningless and the prompt should +/// just run as a normal LLM turn. +pub(crate) fn looks_like_grant_intent(prompt: &str) -> bool { + let p = prompt.trim().to_ascii_lowercase(); + if p.is_empty() { + return false; + } + // Whole-prompt matches first so short approvals ("allow", "yes") + // don't collide with longer prompts that happen to start with + // the same word. + matches!( + p.as_str(), + "allow" | "yes" | "ok" | "okay" | "go ahead" | "let them in" | "let them ask" | "permit" + ) || p.starts_with("allow ") + || p.starts_with("let them") + || p.starts_with("let him") + || p.starts_with("let her") + || p.starts_with("go ahead") + || p.starts_with("yes go ahead") + || p.starts_with("yes let") + || p.starts_with("permit ") + || p.starts_with("you can answer") + || p.starts_with("you can tell") +} + +// ─── Turn runners ──────────────────────────────────────────────────── + +/// Owner-grant path: the owner said "allow them" / "go ahead" / +/// "let them in" after a non-owner's wake refusal. Add the +/// previously-refused speaker to the per-call allowlist (so their +/// next wake fires through to the orchestrator), and speak a +/// short confirmation so they know they're in. +pub async fn run_grant_turn(request_id: &str, grantee: &str) -> Result { + let grantee = grantee.trim(); + let message = if grantee.is_empty() { + "Okay, you can ask me now.".to_string() + } else { + format!("Okay, {grantee} can ask me now.") + }; + log::info!("[meet-agent] grant request_id={request_id} grantee=\"{grantee}\""); + // Apply the grant on the session BEFORE speaking — if TTS races + // and the grantee re-asks during synthesis, we want their next + // wake to fire through. Also cancel any prior outbound so the + // confirmation doesn't queue behind a half-drained refusal. + let _ = registry().with_session(request_id, |s| { + s.allow_speaker(grantee); + s.cancel_outbound(); + }); + let samples = match tts(&message).await { + Ok(samples) => samples, + Err(err) => { + log::warn!("[meet-agent] grant TTS failed request_id={request_id} err={err}"); + stub_tts(&message).await + } + }; + registry().with_session(request_id, |s| { + s.record_event( + SessionEventKind::Note, + format!("owner granted wake access to {grantee}"), + ); + s.record_event(SessionEventKind::Spoke, message.clone()); + if !samples.is_empty() { + s.enqueue_outbound_pcm(&samples, true); + } + // Clear the wake_active + turn_in_progress flags so the + // next caption (likely the grantee's actual question) can + // fire a new turn. Without this, the wake state from the + // owner's "allow them" prompt would coalesce the grantee's + // first real caption into a continuation of this grant turn. + s.wake_active = false; + s.turn_in_progress = false; + s.mark_turn_done(); + })?; + Ok(true) +} + +/// Soft-deny path: kick a canned-line TTS reply when the wake word +/// fires from a non-owner. Branches on intent: a bare greeting gets +/// a friendly hi-back; a substantive task ask gets the refusal that +/// tells the owner how to grant access. Does NOT touch the +/// orchestrator agent (no tool calls, no memory writes) — it's a +/// single canned line, so the failure modes are limited to TTS errors. +/// +/// `caption_text` is the full caption from `note_caption` so we can +/// classify intent here; the session has already recorded the +/// pending grant request and dispatch timestamp. +pub async fn run_soft_deny_turn( + request_id: &str, + asker: &str, + caption_text: &str, +) -> Result { + let owner = registry() + .with_session(request_id, |s| s.owner_display_name().to_string()) + .unwrap_or_default(); + let intent = classify_unauthorized_intent(caption_text); + // Greeting → canned hi (no network round-trip needed). + // TaskAsk → toolless LLM. The LLM has no tools attached, has + // an explicit "refuse personal-data asks" system + // prompt, and is asked to point the owner at the + // magic word when refusing. So a Q like "what's + // the capital of France" lands as a normal answer + // ("Paris"), while "read Nikhil's Slack" lands as + // the refusal. The LLM picks; we don't classify. + let message = match intent { + UnauthorizedIntent::Greeting => friendly_greeting_message(asker), + UnauthorizedIntent::TaskAsk => match llm_general_no_tools(caption_text, &owner).await { + Ok(reply) if !reply.trim().is_empty() => reply, + Ok(_) => { + // Empty reply = LLM declined silently. Fall back to + // the explicit canned refusal so the speaker hears + // *something* and knows the bot didn't crash. + log::info!( + "[meet-agent] non-owner LLM returned empty — using canned refusal request_id={request_id}" + ); + soft_deny_message(asker, &owner) + } + Err(err) => { + log::warn!("[meet-agent] non-owner LLM failed request_id={request_id} err={err}"); + soft_deny_message(asker, &owner) + } + }, + }; + log::info!( + "[meet-agent] soft-deny request_id={request_id} asker=\"{asker}\" owner=\"{owner}\" intent={intent:?}" + ); + // Cancel any prior outbound so the refusal doesn't queue behind a + // half-drained reply from a previous turn. + let _ = registry().with_session(request_id, |s| s.cancel_outbound()); + let samples = match tts(&message).await { + Ok(samples) => samples, + Err(err) => { + log::warn!("[meet-agent] soft-deny TTS failed request_id={request_id} err={err}"); + stub_tts(&message).await + } + }; + registry().with_session(request_id, |s| { + let kind = match intent { + UnauthorizedIntent::Greeting => "greeting", + UnauthorizedIntent::TaskAsk => "refusal", + }; + s.record_event( + SessionEventKind::Note, + format!("soft-deny ({kind}): {asker} unauthorised wake"), + ); + s.record_event(SessionEventKind::Spoke, message.clone()); + if !samples.is_empty() { + s.enqueue_outbound_pcm(&samples, true); + } + // NB: do NOT call `mark_turn_done` here — that's the + // owner-min-turn-gap stamp, and we want the owner to be + // able to wake (e.g. say "allow them") within seconds of a + // refusal. The session's own `UNAUTHORIZED_COOLDOWN_MS` is + // what guards against a soft-deny loop from the same + // non-owner speaker. + })?; + Ok(true) +} + +// ─── Non-owner LLM path ───────────────────────────────────────────── + +/// Route a non-owner caption through the toolless chat-v1 LLM. +/// Returns the spoken text — the caller TTS's it and enqueues. +async fn llm_general_no_tools(prompt: &str, owner: &str) -> Result { + let system_prompt = non_owner_system_prompt(owner); + // No rolling history for the non-owner path — each ask is a + // fresh conversation. Sharing history between owner turns and + // non-owner turns risks leaking the owner's tool-call results + // into a stranger-facing reply. + super::llm::llm_meeting_basic(prompt, &[], &system_prompt).await +} diff --git a/src/openhuman/meet_agent/brain/constants.rs b/src/openhuman/meet_agent/brain/constants.rs new file mode 100644 index 0000000000..b99dc8a013 --- /dev/null +++ b/src/openhuman/meet_agent/brain/constants.rs @@ -0,0 +1,166 @@ +//! Compile-time constants and the process-wide agent cache shared +//! across all sub-modules in `brain/`. + +use std::collections::HashMap; +use std::sync::{Arc, OnceLock}; +use tokio::sync::Mutex as TokioMutex; + +use crate::openhuman::agent::harness::session::Agent; + +/// Process-wide cache of orchestrator Agents keyed by `request_id`. +/// Each meet session reuses the same Agent across all its turns so +/// the harness's in-memory `Agent.history` accumulates and the +/// orchestrator can recall prior dialogue ("did I tell you to +/// remember Friday?", "what did Alice say earlier?"). Without the +/// cache each turn builds a fresh Agent, loses the prior turn's +/// memory, and pays the 5-10s build cost every time. +/// +/// Locked with `tokio::sync::Mutex` because we hold the inner +/// `Arc>` lock across `run_single().await` — +/// std::sync::Mutex cannot be held across await without breaking +/// Send + leaking the lock on cancel. +static AGENT_CACHE: OnceLock>>>> = OnceLock::new(); + +pub(super) fn agent_cache() -> &'static TokioMutex>>> { + AGENT_CACHE.get_or_init(|| TokioMutex::new(HashMap::new())) +} + +/// Wall-clock ceiling on one agentic turn. Slack / Gmail fetches via +/// Composio + per-message filtering + iteration-2 synthesis can hit +/// 60-80s in the slow path. 90s gives the long integrations a chance +/// to land. The turn_in_progress gate blocks new wakes during the +/// wait, so the user cannot spawn parallel queries by re-asking. +pub(super) const AGENTIC_TURN_TIMEOUT_SECS: u64 = 90; + +/// Spoken filler played immediately after wake-word fires, before the +/// (possibly slow) orchestrator+tool path runs. Bridges the 30-60s +/// silence on slow integration paths. Kept short (~1s synth) so it +/// doesn't intrude on fast greetings / time questions. +pub(super) const PREROLL_ACK_PHRASE: &str = "On it."; + +/// How many of the most recent `Heard` / `Spoke` events we feed back +/// into the LLM as rolling conversation context. 12 ≈ a few minutes of +/// captioned dialogue — enough for the model to follow a thread without +/// blowing the prompt budget. +pub(super) const CONTEXT_EVENT_WINDOW: usize = 12; + +/// Spoken-reply ceiling. Each token is roughly ¾ of a word, so 80 +/// tokens ≈ ~60 spoken words ≈ ~12 seconds. The system prompt asks for +/// one short sentence, but reasoning-style backends ignore soft length +/// hints and emit 800+ char monologues. Hard token cap keeps the bot +/// interruptible regardless of model behaviour. +pub(super) const REPLY_MAX_TOKENS: u32 = 80; + +/// ElevenLabs model. `eleven_turbo_v2_5` strikes the best +/// quality/latency balance; the older default the backend would pick +/// (`eleven_monolingual_v1`) sounds noticeably flatter. +pub(super) const TTS_MODEL_ID: &str = "eleven_turbo_v2_5"; + +/// Hard ceiling on reply characters fed to TTS. The LLM is asked to be +/// concise but reasoning models still emit 800+ char paragraphs. Cap +/// drops everything past the first sentence boundary at-or-before +/// this index, falling back to a raw char cut when no boundary fits. +/// ~25s of speech at average prosody — keeps the bot interruptible +/// and prevents the "60s monologue / can't talk over it" loop. +pub(super) const MAX_TTS_CHARS: usize = 400; + +/// Minimum samples below which we skip the brain turn entirely. +/// 250 ms @ 16 kHz — under this, VAD almost certainly fired on a +/// transient (cough, click) rather than real speech. +pub(super) const MIN_TURN_SAMPLES: usize = 4_000; + +/// Re-exported from `ops` so any drift (if we ever loosen the +/// boundary check) immediately breaks the WAV / duration math here +/// at compile time. Today the same constant is used in both places — +/// the ops boundary check rejects anything else outright. +pub(super) const SAMPLE_RATE_HZ: u32 = crate::openhuman::meet_agent::ops::REQUIRED_SAMPLE_RATE; + +/// Delay between wake-word match and prompt drain. Long enough that +/// 2-3 caption fragments can join up; short enough that the user +/// doesn't experience awkward silence after they stop talking. +pub(super) const CAPTION_TURN_DELAY_MS: u64 = 1_500; + +/// Prompt character threshold below which we skip the pre-roll ack. +/// Short prompts (greetings, trivial checks) are answered in 2-5s +/// without tools — they don't need an ack, and "On it. Yes, I can +/// hear you" sounds redundant. +pub(super) const PREROLL_SKIP_PROMPT_CHARS: usize = 50; + +/// Canned acknowledgements the agent speaks out loud after capturing +/// a note. Short, varied so consecutive notes don't sound robotic. +/// Selected by hashing the prompt so the same dictation reliably +/// produces the same ack (helpful for tests + debugging) while still +/// rotating across the set in a normal conversation. +pub(super) const ACK_PHRASES: &[&str] = + &["Got it.", "Noted.", "Adding that.", "On it.", "Captured."]; + +/// System prompt for the live meeting agent. Pushes the model toward +/// (a) recognising whether the latest utterance is genuinely directed +/// at it (intent classification — emit empty string when not), and +/// (b) responding conversationally and concisely when it is. +#[allow(dead_code)] +pub(super) const MEETING_SYSTEM_PROMPT: &str = "\ +You are OpenHuman, joining a live Google Meet call by voice. Every word you \ +produce will be spoken aloud over the call. The transcript shows `user` lines \ +(humans on the call, sometimes prefixed with a name) and `assistant` lines \ +(things you previously said out loud).\n\ +\n\ +STRICT OUTPUT RULES — these are non-negotiable. The output is fed DIRECTLY \ +into TTS and spoken aloud verbatim. Any meta-text becomes audible bot \ +gibberish on a live call.\n\ +1. Output ONE sentence. Maximum 25 spoken words.\n\ +2. Plain spoken English. No markdown. No bullets. No code. No emoji.\n\ +3. NO chain-of-thought. NO reasoning. NO planning. NO blocks. NO \ +preamble. NEVER write phrases like \"We need to…\", \"I should…\", \"Let me…\", \ +\"The user said…\", \"This is a greeting…\", \"So I should respond with…\", \ +\"My response is…\". Output ONLY the final answer that the user should hear.\n\ +4. Never repeat what the user said. Never narrate what you are about to do.\n\ +5. If the latest user line is not directly addressed to you, output the empty \ +string. Do not respond to side conversations or ambient speech.\n\ +6. Examples — good vs bad:\n\ + User: \"hello\" → GOOD: \"Hey there.\" BAD: \"The user said hello, so I should respond with a greeting.\"\n\ + User: \"what's the time\" → GOOD: \"I don't have a clock right now.\" BAD: \"We need to generate a single sentence. The user is asking the time.\"\n\ +\n\ +Address-detection: respond when the user names you (\"OpenHuman\", \"hey \ +openhuman\"), asks a direct question of you, or gives a direct command \ +(remember, summarise, look up). Otherwise stay silent.\n\ +\n\ +For unanswerable questions: say so in one sentence (\"I don't know that off \ +the top of my head\") instead of guessing or stalling.\n\ +For dictation / note requests: a 2-3 word ack (\"Got it.\", \"Noted.\"). Don't \ +read the note back.\n\ +"; + +/// Voice-frontend system-prompt directive prepended to the user +/// utterance before it reaches the orchestrator. The orchestrator +/// already has its own persona, tool catalogue, memory loader and +/// connected integrations; this addendum just tells it the answer is +/// going to be spoken aloud verbatim so it should reply in one short +/// spoken sentence with no markdown / no chain-of-thought / no +/// preamble. Wrapped in a delimiter so the orchestrator can't confuse +/// the directive with the user's actual utterance. +pub(super) const MEET_VOICE_DIRECTIVE: &str = "\ +MEETING VOICE MODE — this conversation is happening live over voice in a Google Meet call.\n\ +\n\ +LANGUAGE: Respond in ENGLISH ONLY. Do not switch languages even if a user's name, prior memory, or transcript hint suggests another locale. The TTS engine is English-only; non-English output produces garbled audio.\n\ +\n\ +TOOL USE (encouraged):\n\ +- USE TOOLS whenever a tool can give a real answer. Calendar, email, slack, memory, integrations — \ +call them. Tool calls are invisible to the user and DO NOT count toward your reply word budget.\n\ +- If you need data from a tool to answer accurately, CALL THE TOOL. Do not guess from prior training. \ +Do not claim something is not connected before attempting to call its tool — the tool surface above \ +shows what is actually available right now.\n\ +- delegate_to_integrations_agent is your gateway to all connected provider integrations (calendar, \ +gmail, slack, etc.). Use it when the user asks about their schedule, mail, messages, or any other \ +integration-backed data.\n\ +\n\ +FINAL SPOKEN REPLY (strict — this is the only part the user hears):\n\ +- After tool work is done, output ONE short spoken sentence, max 25 words.\n\ +- Plain spoken English only. No markdown. No bullets. No code. No URLs.\n\ +- No meta-narration. Do not say \"Let me check…\", \"I will look…\", \"The user is asking…\", \ +\"We need to…\", \"I should…\". Just give the answer.\n\ +- If the user is not directly addressing you (chit-chat between humans, side conversation, your \ +name appearing inside a longer thought aimed at someone else), output an empty string and stay silent.\n\ +- For dictation / note requests (\"remember…\", \"action item…\", \"follow up on…\"), a 2-3 word \ +ack is enough (\"Got it.\", \"Noted.\").\n\ +- For genuinely unanswerable questions, say so in one short sentence rather than guessing."; diff --git a/src/openhuman/meet_agent/brain/llm.rs b/src/openhuman/meet_agent/brain/llm.rs new file mode 100644 index 0000000000..99f259e35f --- /dev/null +++ b/src/openhuman/meet_agent/brain/llm.rs @@ -0,0 +1,248 @@ +//! LLM adapters: the full orchestrator path (`llm_meeting_agentic`) and +//! the bare chat-completions fallback (`llm_meeting_basic`). + +use std::sync::Arc; +use std::time::Duration; +use tokio::sync::Mutex as TokioMutex; + +use serde_json::{json, Value}; + +use super::constants::{ + agent_cache, AGENTIC_TURN_TIMEOUT_SECS, MEET_VOICE_DIRECTIVE, REPLY_MAX_TOKENS, +}; +use super::text::strip_for_speech; +use crate::openhuman::agent::harness::session::Agent; + +/// One rolling-history entry handed to the LLM. +#[derive(Debug, Clone)] +pub(super) struct ConversationTurn { + pub role: &'static str, + pub content: String, +} + +/// First 12 chars of `request_id`, for log scoping. UUID prefixes are +/// unique enough at one-meet-at-a-time to keep transcripts apart. +pub(super) fn short_id(id: &str) -> String { + id.chars().take(12).collect() +} + +/// Route the meeting utterance through the FULL orchestrator agent — +/// same path the chat UI and the webview meet handoff use. The +/// orchestrator inherits the user's connected integrations, memory +/// tree, MCP clients, skills, and the project-wide tool registry, so +/// "is my Friday evening free", "did anyone in #eng ping me about +/// the deploy", "remind me to mail Alice tomorrow" all answer with +/// real data — not a guess from the model's training prior. +/// +/// We rebuild the Agent per turn (cheap relative to the LLM call +/// itself, since the registry is initialised once at startup) and +/// wrap `run_single` in a 20s timeout so a slow tool iteration +/// doesn't leave the meeting participant in silence indefinitely. +/// +/// Errors propagate to the caller, which falls back to the bare +/// chat-completions path (`llm_meeting_basic`) so a config / +/// registry / token issue degrades to a polite reply instead of +/// dead air. +pub(super) async fn llm_meeting_agentic(prompt: &str, request_id: &str) -> Result { + // Get-or-build the per-meet cached Agent. First wake of a meet + // builds the orchestrator once (memory tree + MCP + tools — 5-10s + // cold); subsequent wakes reuse the same instance, so its + // in-memory history accumulates and the orchestrator can recall + // earlier dialogue without disk-resume corruption tripping the + // tool_calls / tool_message API constraint. + let agent_lock = get_or_build_agent_for_meet(request_id).await?; + + // Lock for the duration of the turn. The lock is per-meet, so + // two distinct meet sessions can run agents in parallel; within + // one meet, turn_in_progress already prevents reentrancy. Held + // across run_single().await — that's why we use tokio::sync::Mutex. + let mut agent = agent_lock.lock().await; + + // Per-turn refresh of the time-context block. The voice directive + // is baked into the system prompt at build time; the clock has + // to update each turn or the bot will tell the user it's still + // 2am ten minutes later. Prepend the time block to the user + // utterance instead of touching the system prompt suffix (which + // we can't change without rebuilding the Agent). + let now_local = chrono::Local::now(); + let time_block = format!( + "[RIGHT-NOW CONTEXT — current local time: {} ({}), tz {}. \ + Use this directly for any time/date question; do not call a tool.]", + now_local.format("%Y-%m-%d %H:%M:%S"), + now_local.format("%A"), + now_local.format("%:z"), + ); + let user_message = format!("{time_block}\n\n{prompt}"); + + // Per-turn unique definition_name for the transcript file. The + // Agent's in-memory history persists across turns (cache); only + // the on-disk transcript filename rolls per turn so a kill + // mid-tool-call doesn't poison the next process's resume path. + let now_ms = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.as_millis()) + .unwrap_or(0); + agent.set_agent_definition_name(format!( + "orchestrator_meet_{}_{now_ms}", + short_id(request_id) + )); + + log::info!( + "[meet-agent] agentic turn dispatch request_id={request_id} prompt_chars={} cached_history_msgs={}", + prompt.chars().count(), + agent.history().len(), + ); + + // Meet-agent runs during an active call — the prompt text is + // speech captured from a live meeting, which after run_grant_turn + // can include utterances from non-owner participants. Treat it as + // externally-sourced channel input (not local CLI): the gate + // routes external_effect tools through the audit-trail path + // instead of letting them run unprompted with trusted-CLI + // semantics. + let fut = crate::openhuman::agent::turn_origin::with_origin( + crate::openhuman::agent::turn_origin::AgentTurnOrigin::ExternalChannel { + channel: "meet".to_string(), + // Meet utterances don't carry a stable per-participant identity + // at this layer (the room is the addressing primitive); leave + // sender unset and let the gate fall back to the per-channel + // audit-row + TTL-deny policy. + sender: None, + reply_target: request_id.to_string(), + message_id: format!("meet-{request_id}-{now_ms}"), + }, + agent.run_single(&user_message), + ); + let reply = match tokio::time::timeout(Duration::from_secs(AGENTIC_TURN_TIMEOUT_SECS), fut) + .await + { + Ok(Ok(text)) => text, + Ok(Err(e)) => { + return Err(format!("[meet-agent] orchestrator run_single failed: {e}")); + } + Err(_elapsed) => { + log::warn!( + "[meet-agent] agentic turn timed out request_id={request_id} after {}s — speaking polite ack", + AGENTIC_TURN_TIMEOUT_SECS + ); + return Err(format!( + "agentic timeout after {AGENTIC_TURN_TIMEOUT_SECS}s" + )); + } + }; + + Ok(strip_for_speech(&reply)) +} + +/// Get the cached orchestrator for this meet, or build it on first +/// call. Returns an `Arc>` so the caller can lock +/// across the run_single().await. +async fn get_or_build_agent_for_meet(request_id: &str) -> Result>, String> { + { + let cache = agent_cache().lock().await; + if let Some(existing) = cache.get(request_id) { + return Ok(existing.clone()); + } + } + + // Cold build. Use the with_profile builder — same canonical path + // the web channel (chat UI) uses at channels/providers/web.rs:1570, + // which is what wires the user's connected integrations + delegation + // tools. profile_prompt_suffix carries the meet voice directive. + let config = crate::openhuman::config::ops::load_config_with_timeout().await?; + let mut agent = Agent::from_config_for_agent_with_profile( + &config, + "orchestrator", + None, + Some(MEET_VOICE_DIRECTIVE.to_string()), + ) + .map_err(|e| format!("[meet-agent] orchestrator build failed: {e}"))?; + + // Per-meet event context so the harness scopes its observability + // events to this request_id instead of colliding with the chat UI. + agent.set_event_context(format!("meet_{request_id}"), "meet_agent"); + agent.set_agent_definition_name(format!("orchestrator_meet_{}", short_id(request_id))); + + log::info!("[meet-agent] orchestrator built + cached for request_id={request_id}"); + + let arc = Arc::new(TokioMutex::new(agent)); + agent_cache() + .lock() + .await + .insert(request_id.to_string(), arc.clone()); + Ok(arc) +} + +/// Build a chat-completions request from rolling meeting history plus +/// the current user prompt, post it through the backend, and return +/// the assistant's reply (trimmed, possibly empty). +/// +/// Used as a fallback when the orchestrator path +/// (`llm_meeting_agentic`) cannot be built — missing config, +/// registry not initialised, no session token. The orchestrator path +/// gives memory/tool/integration access; this bare path only gets +/// the rolling caption history. Acceptable degradation so the bot +/// doesn't go silent in a config-degraded environment. +pub(super) async fn llm_meeting_basic( + prompt: &str, + history: &[ConversationTurn], + system_prompt: &str, +) -> Result { + use crate::api::config::effective_backend_api_url; + use crate::api::jwt::get_session_token; + use crate::api::BackendOAuthClient; + use reqwest::Method; + + let config = crate::openhuman::config::ops::load_config_with_timeout().await?; + let token = get_session_token(&config) + .map_err(|e| e.to_string())? + .filter(|t| !t.trim().is_empty()) + .ok_or_else(|| "no backend session token".to_string())?; + + let api_url = effective_backend_api_url(&config.api_url); + let client = BackendOAuthClient::new(&api_url).map_err(|e| e.to_string())?; + + let mut messages: Vec = Vec::with_capacity(history.len() + 2); + messages.push(json!({ "role": "system", "content": system_prompt })); + for turn in history { + messages.push(json!({ "role": turn.role, "content": turn.content })); + } + messages.push(json!({ "role": "user", "content": prompt })); + + let body = json!({ + // chat-v1 = conversational non-reasoning model. agentic-v1 / + // reasoning-v1 leak their chain-of-thought as plain text + // ("We need to generate a single sentence…") into the response + // body when streamed without the structured thinking_delta + // channel — which TTS then reads aloud. chat-v1 produces a + // direct user-facing answer, which is what we want over voice. + "model": "chat-v1", + "temperature": 0.5, + "max_tokens": REPLY_MAX_TOKENS, + "messages": messages, + }); + + let raw = client + .authed_json( + &token, + Method::POST, + "/openai/v1/chat/completions", + Some(body), + ) + .await + .map_err(|e| e.to_string())?; + + let text = extract_chat_completion_text(&raw) + .ok_or_else(|| format!("unexpected chat completions response: {raw}"))?; + Ok(strip_for_speech(&text)) +} + +pub(crate) fn extract_chat_completion_text(raw: &Value) -> Option { + raw.get("choices") + .and_then(|c| c.as_array()) + .and_then(|arr| arr.first()) + .and_then(|first| first.get("message")) + .and_then(|m| m.get("content")) + .and_then(|s| s.as_str()) + .map(|s| s.trim().to_string()) +} diff --git a/src/openhuman/meet_agent/brain/mod.rs b/src/openhuman/meet_agent/brain/mod.rs new file mode 100644 index 0000000000..acb7516e32 --- /dev/null +++ b/src/openhuman/meet_agent/brain/mod.rs @@ -0,0 +1,73 @@ +//! Turn orchestration: STT → LLM → TTS. +//! +//! ## Pipeline +//! +//! When [`session::Vad`] reports `EndOfUtterance`, [`run_turn`] drains +//! the inbound buffer and runs three serial stages: +//! +//! 1. **STT** — wrap the PCM16LE samples in a WAV container and post +//! to [`crate::openhuman::voice::cloud_transcribe`]. Returns the +//! transcribed text (or `Err` on transport / auth failure). +//! +//! 2. **LLM** — send a tiny chat-completions request through +//! [`crate::api::BackendOAuthClient`] with a "live meeting agent" +//! system prompt and the transcript as the user message. Returns a +//! short reply (or empty string when the agent decides to stay +//! silent). +//! +//! 3. **TTS** — feed the reply text into +//! [`crate::openhuman::voice::reply_speech`] requesting +//! `output_format = "pcm_16000"`. Decode the base64 PCM bytes back +//! into `Vec` and enqueue on the session's outbound queue. +//! +//! ## Fallback +//! +//! When the backend session token is missing (the most common reason +//! a stage fails outside production: tests, no-network smoke runs), +//! we fall back to deterministic stubs so the loop still produces an +//! audible blip and the unit tests stay network-free. Real +//! transport / 5xx errors are *not* swallowed — they surface as +//! `Note` events so a real-call failure is visible in the transcript +//! log, not silently degraded to a stub. + +mod access; +mod constants; +mod llm; +mod speech; +mod stubs; +mod text; +mod turns; + +// ─── Public API (unchanged external surface) ──────────────────────── + +pub use access::{run_grant_turn, run_soft_deny_turn}; +pub use turns::{run_caption_turn, run_turn}; + +use constants::agent_cache; + +/// Drop the cached orchestrator for a meet session. Called from +/// `handle_stop_session` so a finished call doesn't leak the Agent +/// (each one carries memory tree + tool registry handles). +pub async fn forget_session_agent(request_id: &str) { + let mut guard = agent_cache().lock().await; + if guard.remove(request_id).is_some() { + log::info!("[meet-agent] dropped cached orchestrator for request_id={request_id}"); + } +} + +// ─── Test surface (items accessed by brain_tests.rs) ──────────────── +// brain_tests.rs uses `super::*` and accesses private items directly, +// so we expose what the tests need via a `#[cfg(test)]` re-export block. + +#[cfg(test)] +pub(crate) use access::{ + classify_unauthorized_intent, looks_like_grant_intent, soft_deny_message, UnauthorizedIntent, +}; +#[cfg(test)] +pub(crate) use llm::extract_chat_completion_text; +#[cfg(test)] +pub(crate) use text::{recent_dialog_history, strip_for_speech}; + +#[cfg(test)] +#[path = "../brain_tests.rs"] +mod tests; diff --git a/src/openhuman/meet_agent/brain/speech.rs b/src/openhuman/meet_agent/brain/speech.rs new file mode 100644 index 0000000000..67d003674d --- /dev/null +++ b/src/openhuman/meet_agent/brain/speech.rs @@ -0,0 +1,68 @@ +//! STT and TTS adapters (real cloud paths). + +use base64::{engine::general_purpose::STANDARD as B64, Engine as _}; +use serde_json::json; + +use super::constants::{SAMPLE_RATE_HZ, TTS_MODEL_ID}; +use crate::openhuman::meet_agent::wav; + +// ─── Real STT adapter ─────────────────────────────────────────────── + +pub(super) async fn stt(samples: &[i16]) -> Result { + use crate::openhuman::voice::cloud_transcribe::{transcribe_cloud, CloudTranscribeOptions}; + + let config = crate::openhuman::config::ops::load_config_with_timeout().await?; + let wav_bytes = wav::pack_pcm16le_mono_wav(samples, SAMPLE_RATE_HZ); + let audio_b64 = B64.encode(&wav_bytes); + let opts = CloudTranscribeOptions { + mime_type: Some("audio/wav".to_string()), + file_name: Some("meet-agent.wav".to_string()), + ..Default::default() + }; + let outcome = transcribe_cloud(&config, &audio_b64, &opts).await?; + let text = outcome.value.text.clone(); + Ok(text) +} + +// ─── Real TTS adapter ─────────────────────────────────────────────── + +pub(super) async fn tts(text: &str) -> Result, String> { + use crate::openhuman::voice::reply_speech::{synthesize_reply, ReplySpeechOptions}; + + let config = crate::openhuman::config::ops::load_config_with_timeout().await?; + // Tuned for live conversational speech, not narration: + // stability 0.4 — leave room for prosody / inflection. Higher + // values (>0.6) flatten the read into the "monotone audiobook" + // timbre the previous default produced. + // similarity_boost 0.75 — keep the chosen voice's character. + // style 0.35 — light expressiveness; too high makes punctuation + // swallow words. + // use_speaker_boost on — louder, clearer in noisy meetings. + let voice_settings = json!({ + "stability": 0.4, + "similarity_boost": 0.75, + "style": 0.35, + "use_speaker_boost": true, + }); + let opts = ReplySpeechOptions { + // Ask ElevenLabs (via the hosted backend) for raw PCM16LE @ + // 16 kHz so we can feed the result straight into the + // shell-side bridge with no transcoding. + output_format: Some("pcm_16000".to_string()), + model_id: Some(TTS_MODEL_ID.to_string()), + voice_settings: Some(voice_settings), + ..Default::default() + }; + let outcome = synthesize_reply(&config, text, &opts).await?; + let result = outcome.value; + let pcm_bytes = B64 + .decode(result.audio_base64.as_bytes()) + .map_err(|e| format!("decode tts base64: {e}"))?; + if !pcm_bytes.len().is_multiple_of(2) { + return Err(format!("odd byte length from tts: {}", pcm_bytes.len())); + } + Ok(pcm_bytes + .chunks_exact(2) + .map(|c| i16::from_le_bytes([c[0], c[1]])) + .collect()) +} diff --git a/src/openhuman/meet_agent/brain/stubs.rs b/src/openhuman/meet_agent/brain/stubs.rs new file mode 100644 index 0000000000..1108fd81aa --- /dev/null +++ b/src/openhuman/meet_agent/brain/stubs.rs @@ -0,0 +1,29 @@ +//! Stub fallbacks for STT/LLM/TTS used in tests and no-backend runs. + +use super::constants::SAMPLE_RATE_HZ; + +pub(super) async fn stub_stt(samples: &[i16]) -> String { + let secs = samples.len() as f32 / SAMPLE_RATE_HZ as f32; + format!("(heard ~{secs:.1}s of audio)") +} + +#[allow(dead_code)] +pub(super) async fn stub_llm(_heard: &str) -> String { + "I'm listening.".to_string() +} + +pub(super) async fn stub_tts(text: &str) -> Vec { + if text.is_empty() { + return Vec::new(); + } + let sample_rate = SAMPLE_RATE_HZ as f32; + let freq = 440.0_f32; + let duration_secs = 0.2_f32; + let count = (sample_rate * duration_secs) as usize; + (0..count) + .map(|i| { + let t = i as f32 / sample_rate; + (((2.0 * std::f32::consts::PI * freq * t).sin()) * (i16::MAX as f32 * 0.3)) as i16 + }) + .collect() +} diff --git a/src/openhuman/meet_agent/brain/text.rs b/src/openhuman/meet_agent/brain/text.rs new file mode 100644 index 0000000000..49b30d413d --- /dev/null +++ b/src/openhuman/meet_agent/brain/text.rs @@ -0,0 +1,192 @@ +//! Text post-processing: strip markdown / reasoning traces for TTS, +//! cap reply length, and build rolling dialogue history. + +use super::constants::MAX_TTS_CHARS; +use super::llm::ConversationTurn; +use crate::openhuman::meet_agent::types::{SessionEvent, SessionEventKind}; + +/// Trim characters that sound bad when read aloud by TTS but routinely +/// leak from a chat-completions response (markdown asterisks, fenced +/// code, leading bullets). Keep punctuation that affects prosody +/// (commas, periods, question marks) intact. +pub(crate) fn strip_for_speech(text: &str) -> String { + // Strip reasoning-model ... blocks before we strip + // markdown. DeepSeek / GMI / qwen-style reasoning models emit + // their internal chain-of-thought wrapped in ... + // tags ahead of the user-facing reply. Without this, TTS reads + // the entire monologue aloud — which on a 60s+ reasoning trace + // produces a minute of bot speech the user never asked for. + // Multiple non-overlapping blocks are stripped in sequence; an + // unclosed at the end (truncated output) drops everything + // from the tag onwards. + let mut cleaned = String::with_capacity(text.len()); + let mut rest = text; + loop { + match rest.find("") { + Some(open) => { + cleaned.push_str(&rest[..open]); + let after = &rest[open + "".len()..]; + match after.find("") { + Some(close) => { + rest = &after[close + "".len()..]; + } + None => { + // Unclosed tag → drop the rest as reasoning. + break; + } + } + } + None => { + cleaned.push_str(rest); + break; + } + } + } + let text = cleaned.trim(); + + let mut out = String::with_capacity(text.len()); + let mut in_code = false; + for line in text.lines() { + let trimmed = line.trim(); + if trimmed.starts_with("```") { + in_code = !in_code; + continue; + } + if in_code { + continue; + } + let cleaned: String = trimmed + .trim_start_matches(|c: char| c == '-' || c == '*' || c == '#' || c == '>') + .trim() + .chars() + .filter(|c| !matches!(c, '*' | '`' | '_' | '#')) + .collect(); + if cleaned.is_empty() { + continue; + } + if !out.is_empty() { + out.push(' '); + } + out.push_str(&cleaned); + } + let trimmed = out.trim().to_string(); + let de_reasoned = strip_untagged_reasoning(&trimmed); + cap_for_speech(&de_reasoned, MAX_TTS_CHARS) +} + +/// Strip reasoning-style preamble that reasoning models leak as plain +/// text (no `` tags) — phrases like "We need to generate…", +/// "I should respond with…", "The user said…", "Let me think…". +/// Heuristic: drop sentences whose lowercased trim matches a known +/// reasoning opener; if everything is reasoning, return only the last +/// sentence (final conclusion). If no signal, return input untouched. +pub(super) fn strip_untagged_reasoning(text: &str) -> String { + if text.is_empty() { + return text.to_string(); + } + const REASONING_OPENERS: &[&str] = &[ + "we need to", + "we should", + "i need to", + "i should", + "i will", + "let me ", + "first,", + "the user said", + "the user is", + "the user asked", + "the user wants", + "this is a", + "this seems", + "so i should", + "so the response", + "so my response", + "okay, so", + "alright,", + "given that", + "since the user", + "the assistant", + "the response should", + "my response", + "to respond", + "responding with", + ]; + let sentences: Vec<&str> = text + .split_inclusive(|c: char| matches!(c, '.' | '!' | '?')) + .map(str::trim) + .filter(|s| !s.is_empty()) + .collect(); + if sentences.is_empty() { + return text.to_string(); + } + let kept: Vec<&str> = sentences + .iter() + .filter(|s| { + let lc = s.to_lowercase(); + !REASONING_OPENERS + .iter() + .any(|opener| lc.starts_with(opener)) + }) + .copied() + .collect(); + if kept.is_empty() { + // Everything was reasoning — return the last sentence as the + // probable conclusion, lower-cased openers stripped. + return sentences.last().map(|s| s.to_string()).unwrap_or_default(); + } + kept.join(" ") +} + +/// Truncate `text` to at most `max_chars` characters, preferring to +/// cut at the last sentence terminator (`.`, `!`, `?`) inside the +/// budget so the TTS doesn't trail off mid-clause. Falls back to a +/// hard char cut + ellipsis when no terminator fits. +pub(super) fn cap_for_speech(text: &str, max_chars: usize) -> String { + let total = text.chars().count(); + if total <= max_chars { + return text.to_string(); + } + let prefix: String = text.chars().take(max_chars).collect(); + if let Some(idx) = prefix.rfind(['.', '!', '?']) { + let end = idx + + prefix[idx..] + .chars() + .next() + .map(char::len_utf8) + .unwrap_or(1); + return prefix[..end].trim_end().to_string(); + } + let mut out = prefix.trim_end().to_string(); + out.push('…'); + out +} + +/// Pull the last `window` `Heard`/`Spoke` events from the session log +/// and shape them into chat-completions turns. `Note` events are +/// internal book-keeping (errors, wake-word matches) and are skipped. +pub(crate) fn recent_dialog_history( + events: &[SessionEvent], + window: usize, +) -> Vec { + let mut out: Vec = Vec::with_capacity(window); + for e in events.iter().rev() { + if out.len() >= window { + break; + } + let role = match e.kind { + SessionEventKind::Heard => "user", + SessionEventKind::Spoke => "assistant", + SessionEventKind::Note => continue, + }; + let content = e.text.trim(); + if content.is_empty() { + continue; + } + out.push(ConversationTurn { + role, + content: content.to_string(), + }); + } + out.reverse(); + out +} diff --git a/src/openhuman/meet_agent/brain/turns.rs b/src/openhuman/meet_agent/brain/turns.rs new file mode 100644 index 0000000000..f80f4618ae --- /dev/null +++ b/src/openhuman/meet_agent/brain/turns.rs @@ -0,0 +1,331 @@ +//! Turn orchestration: STT → LLM → TTS → enqueue outbound PCM. +//! +//! Two entry points: +//! - [`run_turn`]: PCM-path (VAD `EndOfUtterance`). Drains the inbound +//! buffer, runs STT, feeds the transcript to the agentic LLM, TTS's +//! the reply, and enqueues it. +//! - [`run_caption_turn`]: Caption-path. Wired to the wake-word trigger; +//! skips STT because the caption text is already available. Handles +//! the pre-roll ack, grant-intent fast path, and bare-wake greeting. + +use super::access::{looks_like_grant_intent, run_grant_turn}; +use super::constants::{ + ACK_PHRASES, CAPTION_TURN_DELAY_MS, CONTEXT_EVENT_WINDOW, MIN_TURN_SAMPLES, PREROLL_ACK_PHRASE, + PREROLL_SKIP_PROMPT_CHARS, +}; +use super::llm::llm_meeting_agentic; +use super::speech::{stt, tts}; +use super::stubs::{stub_stt, stub_tts}; +use super::text::recent_dialog_history; +use crate::openhuman::meet_agent::session::registry; +use crate::openhuman::meet_agent::types::SessionEventKind; + +/// Canned acknowledgements the agent speaks out loud after capturing +/// a note. Selected by hashing the prompt (deterministic, rotates across +/// the set in normal conversation). +#[allow(dead_code)] +pub(super) fn pick_ack_phrase(prompt: &str) -> &'static str { + if prompt.trim().is_empty() { + return ""; + } + let h: u32 = prompt.bytes().fold(0u32, |a, b| a.wrapping_add(b as u32)); + ACK_PHRASES[(h as usize) % ACK_PHRASES.len()] +} + +/// Fire one brain turn for the named session. Returns `Ok(true)` when a +/// turn actually ran, `Ok(false)` when the inbound buffer was below the +/// floor. +pub async fn run_turn(request_id: &str) -> Result { + let drained = registry().with_session(request_id, |s| s.drain_inbound())?; + if drained.len() < MIN_TURN_SAMPLES { + log::debug!( + "[meet-agent] skipping turn request_id={request_id} samples={}", + drained.len() + ); + return Ok(false); + } + + log::info!( + "[meet-agent] turn start request_id={request_id} samples={}", + drained.len() + ); + + // ─── STT ──────────────────────────────────────────────────────── + let heard = match stt(&drained).await { + Ok(text) if text.trim().is_empty() => { + log::info!("[meet-agent] STT empty, skipping turn request_id={request_id}"); + return Ok(false); + } + Ok(text) => text, + Err(err) => { + log::warn!("[meet-agent] STT failed request_id={request_id} err={err}"); + // Record a Note so the transcript log makes the failure + // visible to whoever's looking at logs. + let _ = registry().with_session(request_id, |s| { + s.record_event( + SessionEventKind::Note, + format!("STT failure (using stub): {err}"), + ); + }); + stub_stt(&drained).await + } + }; + log::info!( + "[meet-agent] STT request_id={request_id} text_chars={}", + heard.chars().count() + ); + + // ─── LLM (agentic only; no basic-LLM fallback to avoid toolless hallucinations) ─ + let reply_text = match llm_meeting_agentic(&heard, request_id).await { + Ok(text) => text, + Err(agentic_err) => { + log::warn!( + "[meet-agent] STT-path agentic failed — speaking polite ack request_id={request_id} err={agentic_err}" + ); + let _ = registry().with_session(request_id, |s| { + s.record_event( + SessionEventKind::Note, + format!("agentic path failed; speaking ack: {agentic_err}"), + ); + }); + "Let me get back to you on that.".to_string() + } + }; + + // ─── TTS ──────────────────────────────────────────────────────── + let synthesized = if reply_text.trim().is_empty() { + Vec::new() + } else { + match tts(&reply_text).await { + Ok(samples) => samples, + Err(err) => { + log::warn!("[meet-agent] TTS failed request_id={request_id} err={err}"); + let _ = registry().with_session(request_id, |s| { + s.record_event( + SessionEventKind::Note, + format!("TTS failure (using stub): {err}"), + ); + }); + stub_tts(&reply_text).await + } + } + }; + + registry().with_session(request_id, |s| { + s.record_event(SessionEventKind::Heard, heard.clone()); + if !reply_text.is_empty() { + s.record_event(SessionEventKind::Spoke, reply_text.clone()); + if !synthesized.is_empty() { + s.enqueue_outbound_pcm(&synthesized, true); + } + } else { + s.record_event( + SessionEventKind::Note, + "agent declined to respond".to_string(), + ); + } + s.turn_count += 1; + })?; + + log::info!( + "[meet-agent] turn done request_id={request_id} reply_chars={} synth_samples={}", + reply_text.chars().count(), + synthesized.len() + ); + Ok(true) +} + +/// Caption-driven turn. Drains the session's pending wake-word prompt +/// (assembled by `session::note_caption`) and runs LLM → TTS → enqueue +/// outbound. Skips STT entirely — the captions are already text. +/// +/// We give the user a short window (`CAPTION_TURN_DELAY_MS`) after the +/// wake word fires so multi-caption utterances ("hey openhuman … +/// what's the weather like in paris") have a chance to assemble +/// before we hit the LLM. The shell calls this on every caption +/// push that flagged the wake word; subsequent calls before the +/// delay expires are coalesced via the session's `wake_active` flag. +pub async fn run_caption_turn(request_id: &str) -> Result { + // Wait briefly so a multi-fragment wake utterance ("hey openhuman + // what's the weather like in paris" arriving as 2-3 captions) has + // a chance to assemble before we drain the prompt. + tokio::time::sleep(std::time::Duration::from_millis(CAPTION_TURN_DELAY_MS)).await; + + // When wake fires from a bare "hey openhuman" with no tail, the + // session returns None from take_pending_prompt — there's nothing + // to feed the LLM. Previously we silently bailed (`return Ok(false)`) + // which made the bot look broken to the user. Treat empty-tail wake + // as a "say hi back" greeting cue: synthesize a short ack so the + // user gets audible proof that the caption→wake→speak loop is + // wired up end-to-end. + // + // Also: drop any queued outbound PCM from the previous turn. + // Reasoning-model replies can run 60+ seconds; if the user re-fires + // the wake mid-reply we need to stop the old speech rather than + // play the entire backlog before the new reply starts. This makes + // the bot interruptible from the user's side. + let (prompt, history, was_bare_wake) = match registry().with_session(request_id, |s| { + // Mark turn as in-flight so note_caption refuses to fire new + // wakes until run_caption_turn returns. Without this, the + // user's continuing speech (or growing-caption re-fires) + // spawns 20 parallel agentic turns for one question and none + // of them complete inside the timeout. + s.turn_in_progress = true; + s.cancel_outbound(); + let prompt = s.take_pending_prompt(); + let history = recent_dialog_history(s.events(), CONTEXT_EVENT_WINDOW); + (prompt, history) + })? { + (Some(p), h) => (p, h, false), + (None, h) => { + log::info!( + "[meet-agent] caption turn bare-wake (no tail) request_id={request_id} — replying with greeting ack" + ); + ("hello".to_string(), h, true) + } + }; + log::info!( + "[meet-agent] caption turn start request_id={request_id} prompt_chars={} history_msgs={} bare_wake={}", + prompt.chars().count(), + history.len(), + was_bare_wake, + ); + + // Grant-intent fast path. When the owner says "hey openhuman, + // allow them" / "let them in" / "go ahead" after a non-owner + // wake refusal, treat the turn as a single-shot session-level + // grant rather than handing the prompt to the orchestrator. + // The pending grantee was captured by `note_caption` at refusal + // time and lives on the session for `PENDING_GRANT_WINDOW_MS`. + if !was_bare_wake && looks_like_grant_intent(&prompt) { + let pending = registry() + .with_session(request_id, |s| s.take_pending_unauthorized()) + .ok() + .flatten(); + if let Some(grantee) = pending { + return run_grant_turn(request_id, &grantee).await; + } + // No pending request to grant — fall through to the normal + // LLM path. The model can interpret "allow" however it + // wants from there; without a pending grantee we have no + // session-level meaning to attach to it. + log::info!( + "[meet-agent] grant-intent prompt detected but no pending request — falling through request_id={request_id}" + ); + } + + // Pre-roll filler. The orchestrator + integration tools take + // 30–60s on slow paths (Slack / Gmail / Calendar). Without an + // immediate acoustic cue, the user assumes the bot is broken and + // re-asks (which the turn_in_progress gate now blocks but still + // burns the call atmosphere). Speak a 2-word ack right away and + // enqueue with done=false so the real reply appends cleanly when + // it lands. + // + // Skip pre-roll on short prompts: greetings ("hi"), checks ("can + // you hear me", "are you there"), time questions ("what's the + // time"), and other trivial asks the agent answers in 2-5s + // without tools — those don't need the ack, and "On it. Yes, I + // can hear you" sounds redundant. The 50-char threshold is a + // rough proxy; real second-brain questions ("am I free Friday + // afternoon for a 30 min slot") are almost always longer. + if !was_bare_wake && prompt.chars().count() > PREROLL_SKIP_PROMPT_CHARS { + if let Ok(ack_pcm) = tts(PREROLL_ACK_PHRASE).await { + let _ = registry().with_session(request_id, |s| { + s.enqueue_outbound_pcm(&ack_pcm, false); + }); + log::info!( + "[meet-agent] pre-roll ack queued request_id={request_id} samples={}", + ack_pcm.len() + ); + } else { + log::debug!( + "[meet-agent] pre-roll ack synth failed request_id={request_id} — skipping pre-roll" + ); + } + } + + // Route the turn through the FULL orchestrator agent first — it + // owns the user's connected integrations, memory tree, MCP + // clients and skills, so it can actually answer "is my Friday + // free", "what did Alice say about the deploy", etc. Falls back + // to the bare chat-completions path on orchestrator build / + // timeout / RPC error so a config-degraded environment still + // produces audible output instead of dead air. + let reply_text = match llm_meeting_agentic(&prompt, request_id).await { + Ok(text) => text, + Err(agentic_err) => { + // Do NOT fall back to basic LLM. The basic path has no + // tool access, so on a calendar/slack/gmail question it + // confidently hallucinates "I don't have access" — which + // is the WRONG answer and worse than silence. Speak a + // short canned "let me get back to you" ack so the user + // knows the question was heard but the bot couldn't + // resolve it in time, then drop the prompt. The user + // can re-ask (turn_in_progress gate clears as we exit). + log::warn!( + "[meet-agent] agentic turn failed — speaking polite ack instead of toolless fallback request_id={request_id} err={agentic_err}" + ); + let _ = registry().with_session(request_id, |s| { + s.record_event( + SessionEventKind::Note, + format!("agentic path failed; speaking ack: {agentic_err}"), + ); + }); + "Let me get back to you on that.".to_string() + } + }; + + let synthesized = if reply_text.trim().is_empty() { + Vec::new() + } else { + match tts(&reply_text).await { + Ok(samples) => samples, + Err(err) => { + log::warn!( + "[meet-agent] caption-turn TTS failed request_id={request_id} err={err}" + ); + let _ = registry().with_session(request_id, |s| { + s.record_event( + SessionEventKind::Note, + format!("TTS failure (using stub): {err}"), + ); + }); + stub_tts(&reply_text).await + } + } + }; + + registry().with_session(request_id, |s| { + s.record_event(SessionEventKind::Heard, prompt.clone()); + if !reply_text.is_empty() { + s.record_event(SessionEventKind::Spoke, reply_text.clone()); + if !synthesized.is_empty() { + s.enqueue_outbound_pcm(&synthesized, true); + } + } else { + s.record_event( + SessionEventKind::Note, + "agent declined to respond".to_string(), + ); + } + s.turn_count += 1; + // Clear the in-flight gate so the next wake can fire. Done + // inside the same with_session so it lands in one critical + // section with the reply enqueue, even if the caller drops + // the future after this point. + s.turn_in_progress = false; + // Stamp turn-done time so note_caption's min-turn-gap + // backstop can suppress wakes that fire within 15s of this + // turn's completion (caption residue / repeat questions). + s.mark_turn_done(); + })?; + + log::info!( + "[meet-agent] caption turn done request_id={request_id} reply_chars={} synth_samples={} reply_preview={:?}", + reply_text.chars().count(), + synthesized.len(), + reply_text.chars().take(120).collect::(), + ); + Ok(true) +} diff --git a/src/openhuman/meet_agent/brain_tests.rs b/src/openhuman/meet_agent/brain_tests.rs index 7bf96d8763..40a00236d6 100644 --- a/src/openhuman/meet_agent/brain_tests.rs +++ b/src/openhuman/meet_agent/brain_tests.rs @@ -1,5 +1,7 @@ use super::*; use crate::openhuman::meet_agent::session::registry; +use crate::openhuman::meet_agent::types::{SessionEvent, SessionEventKind}; +use serde_json::json; #[tokio::test] async fn run_turn_skips_short_buffers() { diff --git a/src/openhuman/memory/README.md b/src/openhuman/memory/README.md index d2aeba170b..836d985950 100644 --- a/src/openhuman/memory/README.md +++ b/src/openhuman/memory/README.md @@ -44,8 +44,8 @@ one job. memory orchestrates and routes between them. | [`ingestion/`](ingestion/) | Document ingestion queue + extraction (entities, relations, embeddings) — feeds UnifiedMemory documents. | | [`canonicalize/`](../memory_sync/canonicalize/) | Source → canonical markdown (chat / email / document). Implemented in `memory_sync/canonicalize` and used at ingest time. | | [`chat/`](chat.rs) | Chat-source canonicalisation helpers. | -| [`read_rpc.rs`](read_rpc.rs) | RPC handlers for memory reads. | -| [`schemas/`](schemas/) + [`schema.rs`](schema.rs) | Controller schema definitions for the memory + memory_tree RPC namespaces. | +| [`read_rpc/`](read_rpc/) | RPC handlers for memory reads. | +| [`schemas/`](schemas/) + [`schema/`](schema/) | Controller schema definitions for the memory + memory_tree RPC namespaces. | | [`sync_status/`](../memory_sync/sync_status/) | Sync freshness tracking + RPC. | | [`ops/`](ops/) | RPC operation handlers + the shared `active_memory_client` helper. | | [`preferences.rs`](preferences.rs) | User preference read/write helpers. | diff --git a/src/openhuman/memory/query/smart_walk.rs b/src/openhuman/memory/query/smart_walk.rs deleted file mode 100644 index e05bcaf438..0000000000 --- a/src/openhuman/memory/query/smart_walk.rs +++ /dev/null @@ -1,2192 +0,0 @@ -//! E2GraphRAG-inspired smart memory retrieval. -//! -//! Unlike the basic `walk` module which only navigates the time-based summary -//! tree, smart_walk combines multiple retrieval strategies: -//! -//! 1. **Vector search** — semantic similarity across all stored content -//! 2. **Keyword search** — pattern matching across raw content files on disk -//! 3. **Entity search** — find entities and follow relationships -//! 4. **Tree browse** — navigate wiki summary hierarchies -//! 5. **Content read** — read specific files (raw/wiki/document/episodic) -//! 6. **Source listing** — discover available sources and content types -//! -//! The walker LLM (defaulting to DeepSeek Flash) plans which strategies to -//! use, collects evidence snippets, then synthesizes a cited answer. - -use crate::openhuman::config::rpc as config_rpc; -use crate::openhuman::config::Config; -use crate::openhuman::inference::provider::traits::{ChatMessage, Provider}; -use crate::openhuman::memory::chat::{build_chat_provider, ChatPrompt}; -use crate::openhuman::memory_store::chunks::types::SourceKind; -use crate::openhuman::memory_tree::retrieval; -use crate::openhuman::memory_tree::score::extract::EntityKind; -use crate::openhuman::memory_tree::tree_runtime::store::{read_children, read_node}; -use crate::openhuman::tools::traits::{PermissionLevel, Tool, ToolCategory, ToolResult}; -use async_trait::async_trait; -use serde_json::json; -use std::path::{Path, PathBuf}; - -const SMART_WALK_TEMP: f64 = 0.2; -const HARD_MAX_TURNS: usize = 25; -const MAX_EVIDENCE_ITEMS: usize = 30; -const MAX_KEYWORD_RESULTS: usize = 15; -const MAX_FILE_READ_BYTES: usize = 8000; - -fn truncate_chars(value: &str, max_chars: usize) -> String { - value.chars().take(max_chars).collect() -} - -// ── Public output types ───────────────────────────────────────────────────── - -#[derive(Debug, Clone)] -pub struct SmartWalkOptions { - pub max_turns: usize, - pub namespace: String, - /// Provider string override (e.g. "deepseek:deepseek-chat"). - pub model: Option, - /// Content root override. Defaults to config.memory_tree_content_root(). - pub content_root: Option, -} - -impl Default for SmartWalkOptions { - fn default() -> Self { - Self { - max_turns: 12, - namespace: "default".into(), - model: None, - content_root: None, - } - } -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum SmartWalkStopReason { - Answered, - MaxTurnsReached, - LlmGaveUp, - Error(String), -} - -#[derive(Debug, Clone)] -pub struct SmartWalkStep { - pub turn: usize, - pub action: String, - pub args_summary: String, - pub result_preview: String, -} - -#[derive(Debug, Clone)] -pub struct Evidence { - pub source_path: String, - pub snippet: String, - pub relevance: String, -} - -#[derive(Debug, Clone)] -pub struct SmartWalkOutcome { - pub answer: String, - pub evidence: Vec, - pub trace: Vec, - pub turns_used: usize, - pub stopped_reason: SmartWalkStopReason, -} - -// ── Tool ──────────────────────────────────────────────────────────────────── - -pub struct SmartMemoryWalkTool; - -#[async_trait] -impl Tool for SmartMemoryWalkTool { - fn name(&self) -> &str { - "memory_smart_walk" - } - - fn description(&self) -> &str { - "Smart memory retrieval — combines vector search, keyword search, \ - entity lookup, and tree browsing to answer queries about the user's \ - memory. More capable than the basic walk: searches across raw files, \ - wiki summaries, documents, and episodic memories." - } - - fn parameters_schema(&self) -> serde_json::Value { - json!({ - "type": "object", - "properties": { - "query": { - "type": "string", - "description": "Natural-language question to answer by searching memory." - }, - "namespace": { - "type": "string", - "description": "Memory namespace. Default: \"default\"." - }, - "max_turns": { - "type": "integer", - "description": "Max LLM turns. Default 12, hard cap 25." - }, - "model": { - "type": "string", - "description": "Provider:model override (e.g. 'deepseek:deepseek-chat')." - } - }, - "required": ["query"] - }) - } - - fn category(&self) -> ToolCategory { - ToolCategory::System - } - - fn permission_level(&self) -> PermissionLevel { - PermissionLevel::ReadOnly - } - - fn is_concurrency_safe(&self, _args: &serde_json::Value) -> bool { - true - } - - async fn execute(&self, args: serde_json::Value) -> anyhow::Result { - let query = args - .get("query") - .and_then(|v| v.as_str()) - .ok_or_else(|| anyhow::anyhow!("memory_smart_walk: `query` is required"))? - .to_string(); - - let namespace = args - .get("namespace") - .and_then(|v| v.as_str()) - .unwrap_or("default") - .to_string(); - - let max_turns = args - .get("max_turns") - .and_then(|v| v.as_u64()) - .map(|n| (n as usize).min(HARD_MAX_TURNS)) - .unwrap_or(12); - - let model = args - .get("model") - .and_then(|v| v.as_str()) - .map(|s| s.to_string()); - - let cfg = config_rpc::load_config_with_timeout() - .await - .map_err(|e| anyhow::anyhow!("memory_smart_walk: load config failed: {e}"))?; - - let opts = SmartWalkOptions { - max_turns, - namespace, - model, - content_root: None, - }; - - let chat_provider = build_chat_provider(&cfg) - .map_err(|e| anyhow::anyhow!("memory_smart_walk: build chat provider failed: {e}"))?; - let adapter = ChatProviderAdapter { - inner: chat_provider, - }; - - let outcome = run_smart_walk(&cfg, &adapter, &query, opts).await?; - - let mut out = format!("{}\n", outcome.answer); - - if !outcome.evidence.is_empty() { - out.push_str("\n## Evidence\n"); - for (i, ev) in outcome.evidence.iter().enumerate() { - out.push_str(&format!( - "{}. **{}** — {}\n > {}\n", - i + 1, - ev.source_path, - ev.relevance, - truncate_chars(&ev.snippet, 200) - )); - } - } - - out.push_str("\n## Trace\n"); - for step in &outcome.trace { - out.push_str(&format!( - "- **Turn {}** `{}` {}: {}\n", - step.turn, step.action, step.args_summary, step.result_preview - )); - } - out.push_str(&format!( - "\n*Stop reason: {:?}, turns used: {}*\n", - outcome.stopped_reason, outcome.turns_used - )); - - Ok(ToolResult::success(out)) - } -} - -// ── Main loop ─────────────────────────────────────────────────────────────── - -pub async fn run_smart_walk( - config: &Config, - provider: &dyn Provider, - query: &str, - opts: SmartWalkOptions, -) -> anyhow::Result { - let max_turns = opts.max_turns.min(HARD_MAX_TURNS); - let model = opts - .model - .clone() - .unwrap_or_else(|| resolve_walk_model(config)); - - let content_root = opts - .content_root - .clone() - .unwrap_or_else(|| config.memory_tree_content_root()); - - log::debug!( - "[smart_walk] starting query_len={} namespace={} max_turns={} model={} content_root={}", - query.len(), - opts.namespace, - max_turns, - model, - content_root.display() - ); - - let system = build_system_prompt(); - let inner_tools = build_inner_tools_text(); - - let cr = content_root.clone(); - let inventory = tokio::task::spawn_blocking(move || build_content_inventory(&cr)) - .await - .unwrap_or_else(|_| "error building content inventory".into()); - - let mut history: Vec = vec![ - ChatMessage::system(format!("{system}\n\n{inner_tools}")), - ChatMessage::user(format!( - "Query: {query}\n\n## Available content\n{inventory}" - )), - ]; - - let mut trace: Vec = Vec::new(); - let mut evidence: Vec = Vec::new(); - - for turn in 1..=max_turns { - log::debug!("[smart_walk] turn={turn} evidence_count={}", evidence.len()); - - let response = match provider - .chat_with_history(&history, &model, SMART_WALK_TEMP) - .await - { - Ok(r) => r, - Err(e) => { - log::warn!("[smart_walk] provider error on turn={turn}: {e:#}"); - let err_msg = format!("Provider error on turn {turn}: {e}"); - return Ok(SmartWalkOutcome { - answer: format!( - "Walk failed: {err_msg}\n\nPartial from {} turn(s).", - trace.len() - ), - evidence, - trace, - turns_used: turn, - stopped_reason: SmartWalkStopReason::Error(err_msg), - }); - } - }; - - log::debug!("[smart_walk] turn={turn} response_len={}", response.len()); - - let (text_before, calls) = parse_tool_calls(&response); - - if calls.is_empty() { - let trimmed = response.trim().to_string(); - if trimmed.is_empty() { - log::debug!("[smart_walk] turn={turn} LLM gave up (empty response)"); - return Ok(SmartWalkOutcome { - answer: synthesize_fallback(&trace, &evidence), - evidence, - trace, - turns_used: turn, - stopped_reason: SmartWalkStopReason::LlmGaveUp, - }); - } - log::debug!("[smart_walk] turn={turn} no tool calls — treating as answer"); - return Ok(SmartWalkOutcome { - answer: trimmed, - evidence, - trace, - turns_used: turn, - stopped_reason: SmartWalkStopReason::Answered, - }); - } - - history.push(ChatMessage::assistant(response.clone())); - - // Process ALL tool calls in this turn (not just the first). - let mut combined_results = Vec::new(); - for call in &calls { - log::debug!( - "[smart_walk] turn={turn} action={} args={}", - call.name, - call.args - ); - - let (args_summary, tool_result, is_answer, answer_text) = - dispatch_call(config, &opts.namespace, &content_root, call, &mut evidence).await; - - let result_preview: String = tool_result.chars().take(200).collect(); - trace.push(SmartWalkStep { - turn, - action: call.name.clone(), - args_summary, - result_preview: result_preview.clone(), - }); - - if is_answer { - log::debug!("[smart_walk] turn={turn} answer action — stopping"); - return Ok(SmartWalkOutcome { - answer: answer_text, - evidence, - trace, - turns_used: turn, - stopped_reason: SmartWalkStopReason::Answered, - }); - } - - combined_results.push(format!( - "{}", - call.name, tool_result - )); - } - - let evidence_summary = if evidence.is_empty() { - String::new() - } else { - format!( - "\n\nEvidence collected so far ({} items):\n{}", - evidence.len(), - evidence - .iter() - .enumerate() - .map(|(i, e)| format!(" {}. [{}] {}", i + 1, e.source_path, e.relevance)) - .collect::>() - .join("\n") - ) - }; - - let result_msg = format!("{}{}", combined_results.join("\n"), evidence_summary); - history.push(ChatMessage::user(result_msg)); - - if !text_before.trim().is_empty() { - log::debug!( - "[smart_walk] turn={turn} text before tool calls: {}", - truncate_chars(&text_before, 80) - ); - } - } - - log::debug!("[smart_walk] max_turns={max_turns} reached"); - Ok(SmartWalkOutcome { - answer: synthesize_fallback(&trace, &evidence), - evidence, - trace, - turns_used: max_turns, - stopped_reason: SmartWalkStopReason::MaxTurnsReached, - }) -} - -// ── ChatProviderAdapter ───────────────────────────────────────────────────── - -struct ChatProviderAdapter { - inner: std::sync::Arc, -} - -#[async_trait] -impl Provider for ChatProviderAdapter { - async fn chat_with_system( - &self, - system: Option<&str>, - message: &str, - _model: &str, - temperature: f64, - ) -> anyhow::Result { - let prompt = ChatPrompt { - system: system.unwrap_or("").to_string(), - user: message.to_string(), - temperature, - kind: "memory_smart_walk", - }; - self.inner.chat_for_text(&prompt).await - } - - async fn chat_with_history( - &self, - messages: &[ChatMessage], - model: &str, - temperature: f64, - ) -> anyhow::Result { - let system = messages - .iter() - .find(|m| m.role == "system") - .map(|m| m.content.as_str()); - let user: String = messages - .iter() - .filter(|m| m.role != "system") - .map(|m| m.content.as_str()) - .collect::>() - .join("\n"); - self.chat_with_system(system, &user, model, temperature) - .await - } -} - -// ── Inner call types ──────────────────────────────────────────────────────── - -#[derive(Clone)] -struct InnerCall { - name: String, - args: serde_json::Value, -} - -// ── Dispatch ──────────────────────────────────────────────────────────────── - -async fn dispatch_call( - config: &Config, - namespace: &str, - content_root: &Path, - call: &InnerCall, - evidence: &mut Vec, -) -> (String, String, bool, String) { - match call.name.as_str() { - "keyword_search" => { - let cr = content_root.to_path_buf(); - let c = call.clone(); - tokio::task::spawn_blocking(move || dispatch_keyword_search(&cr, &c)) - .await - .unwrap_or_else(|e| (String::new(), format!("error: {e}"), false, String::new())) - } - "entity_search" => dispatch_entity_search(config, call).await, - "list_sources" => { - let cr = content_root.to_path_buf(); - let c = call.clone(); - tokio::task::spawn_blocking(move || dispatch_list_sources(&cr, &c)) - .await - .unwrap_or_else(|e| (String::new(), format!("error: {e}"), false, String::new())) - } - "read_content" => { - let cr = content_root.to_path_buf(); - let c = call.clone(); - tokio::task::spawn_blocking(move || dispatch_read_content(&cr, &c)) - .await - .unwrap_or_else(|e| (String::new(), format!("error: {e}"), false, String::new())) - } - "browse_tree" => dispatch_browse_tree(config, namespace, call).await, - "collect_evidence" => dispatch_collect_evidence(call, evidence), - "answer" => dispatch_answer(call), - "vector_search" => dispatch_vector_search(config, call).await, - other => { - log::warn!("[smart_walk] unknown action: {other}"); - ( - format!("action={other}"), - format!( - "unknown action '{other}'. Valid: keyword_search, entity_search, \ - list_sources, read_content, browse_tree, vector_search, \ - collect_evidence, answer" - ), - false, - String::new(), - ) - } - } -} - -// ── keyword_search ────────────────────────────────────────────────────────── - -fn dispatch_keyword_search( - content_root: &Path, - call: &InnerCall, -) -> (String, String, bool, String) { - let pattern = call - .args - .get("pattern") - .and_then(|v| v.as_str()) - .unwrap_or("") - .to_string(); - - let content_type = call - .args - .get("content_type") - .and_then(|v| v.as_str()) - .unwrap_or("all"); - - if pattern.is_empty() { - return ( - "pattern=".into(), - "error: keyword_search requires a non-empty pattern".into(), - false, - String::new(), - ); - } - - log::debug!( - "[smart_walk] keyword_search pattern={} content_type={}", - pattern, - content_type - ); - - let args_summary = format!("pattern=\"{}\" type={}", pattern, content_type); - - let search_dirs: Vec = match content_type { - "raw" => vec![content_root.join("raw")], - "wiki" => vec![content_root.join("wiki")], - "document" => vec![content_root.join("document")], - "episodic" => vec![content_root.join("episodic")], - _ => vec![ - content_root.join("raw"), - content_root.join("wiki"), - content_root.join("document"), - content_root.join("episodic"), - ], - }; - - let pattern_lower = pattern.to_lowercase(); - let mut results: Vec = Vec::new(); - - for dir in &search_dirs { - if !dir.exists() { - continue; - } - search_dir_recursive(dir, &pattern_lower, &mut results, content_root); - if results.len() >= MAX_KEYWORD_RESULTS { - break; - } - } - - results.truncate(MAX_KEYWORD_RESULTS); - - if results.is_empty() { - ( - args_summary, - format!("no matches for pattern \"{}\"", pattern), - false, - String::new(), - ) - } else { - let count = results.len(); - ( - args_summary, - format!("{count} matches:\n{}", results.join("\n")), - false, - String::new(), - ) - } -} - -fn search_dir_recursive(dir: &Path, pattern: &str, results: &mut Vec, content_root: &Path) { - let entries = match std::fs::read_dir(dir) { - Ok(e) => e, - Err(_) => return, - }; - - for entry in entries.flatten() { - if results.len() >= MAX_KEYWORD_RESULTS { - return; - } - - let path = entry.path(); - if path.is_dir() { - search_dir_recursive(&path, pattern, results, content_root); - } else if path.extension().map_or(false, |e| e == "md") { - if let Ok(content) = std::fs::read_to_string(&path) { - if content.to_lowercase().contains(pattern) { - let rel = path - .strip_prefix(content_root) - .unwrap_or(&path) - .to_string_lossy() - .to_string(); - - let line_match = content - .lines() - .find(|l| l.to_lowercase().contains(pattern)) - .unwrap_or("") - .trim(); - let preview: String = line_match.chars().take(120).collect(); - results.push(format!(" [{rel}] {preview}")); - } - } - } - } -} - -// ── entity_search ─────────────────────────────────────────────────────────── - -async fn dispatch_entity_search( - config: &Config, - call: &InnerCall, -) -> (String, String, bool, String) { - let query = call - .args - .get("query") - .and_then(|v| v.as_str()) - .unwrap_or("") - .to_string(); - - let kinds: Option> = - call.args - .get("kinds") - .and_then(|v| v.as_array()) - .map(|arr| { - arr.iter() - .filter_map(|v| v.as_str()) - .filter_map(|s| EntityKind::parse(s).ok()) - .collect() - }); - - if query.is_empty() { - return ( - "query=".into(), - "error: entity_search requires a non-empty query".into(), - false, - String::new(), - ); - } - - log::debug!( - "[smart_walk] entity_search query={} kinds={:?}", - query, - kinds - .as_ref() - .map(|ks| ks.iter().map(|k| k.as_str()).collect::>()) - ); - let args_summary = format!( - "query=\"{}\" kinds={:?}", - query, - kinds - .as_ref() - .map(|ks| ks.iter().map(|k| k.as_str()).collect::>()) - ); - - match retrieval::search_entities(config, &query, kinds, 10).await { - Ok(matches) => { - if matches.is_empty() { - ( - args_summary, - format!("no entities matching \"{}\"", query), - false, - String::new(), - ) - } else { - let formatted: Vec = matches - .iter() - .map(|m| { - format!( - " [{}] kind={} surface=\"{}\" mentions={} last_seen={}", - m.canonical_id, - m.kind.as_str(), - m.surface, - m.mention_count, - m.last_seen_ms - ) - }) - .collect(); - ( - args_summary, - format!( - "{} entities found:\n{}", - formatted.len(), - formatted.join("\n") - ), - false, - String::new(), - ) - } - } - Err(e) => ( - args_summary, - format!("entity search error: {e}"), - false, - String::new(), - ), - } -} - -// ── list_sources ──────────────────────────────────────────────────────────── - -fn dispatch_list_sources(content_root: &Path, call: &InnerCall) -> (String, String, bool, String) { - let content_type = call - .args - .get("content_type") - .and_then(|v| v.as_str()) - .unwrap_or("all"); - - log::debug!("[smart_walk] list_sources type={}", content_type); - let args_summary = format!("type={}", content_type); - - let mut listing = Vec::new(); - - let types_to_scan: Vec<&str> = match content_type { - "all" => vec!["raw", "wiki", "document", "episodic"], - t => vec![t], - }; - - for ctype in types_to_scan { - let dir = content_root.join(ctype); - if !dir.exists() { - listing.push(format!(" {ctype}/: (empty)")); - continue; - } - - match std::fs::read_dir(&dir) { - Ok(entries) => { - let mut subdirs: Vec = entries - .flatten() - .filter(|e| e.path().is_dir()) - .filter_map(|e| e.file_name().into_string().ok()) - .collect(); - subdirs.sort(); - - if subdirs.is_empty() { - listing.push(format!(" {ctype}/: (no subdirectories)")); - } else { - let count = subdirs.len(); - let preview: Vec<&str> = subdirs.iter().map(|s| s.as_str()).take(10).collect(); - listing.push(format!( - " {ctype}/ ({count} sources): {}{}", - preview.join(", "), - if count > 10 { ", ..." } else { "" } - )); - } - } - Err(e) => listing.push(format!(" {ctype}/: error: {e}")), - } - } - - ( - args_summary, - format!("Content sources:\n{}", listing.join("\n")), - false, - String::new(), - ) -} - -// ── read_content ──────────────────────────────────────────────────────────── - -fn dispatch_read_content(content_root: &Path, call: &InnerCall) -> (String, String, bool, String) { - let path_str = call - .args - .get("path") - .and_then(|v| v.as_str()) - .unwrap_or("") - .to_string(); - - if path_str.is_empty() { - return ( - "path=".into(), - "error: read_content requires a non-empty path".into(), - false, - String::new(), - ); - } - - let requested = Path::new(&path_str); - if requested.is_absolute() || path_str.contains("..") { - return ( - format!("path={path_str}"), - "error: path must stay within the content root".into(), - false, - String::new(), - ); - } - - log::debug!("[smart_walk] read_content path={}", path_str); - - let full_path = content_root.join(requested); - if !full_path.exists() { - return ( - format!("path={path_str}"), - format!("file not found: {path_str}"), - false, - String::new(), - ); - } - - let canonical_root = match content_root.canonicalize() { - Ok(p) => p, - Err(e) => { - return ( - format!("path={path_str}"), - format!("error resolving content root: {e}"), - false, - String::new(), - ); - } - }; - let canonical_path = match full_path.canonicalize() { - Ok(p) => p, - Err(e) => { - return ( - format!("path={path_str}"), - format!("error resolving path: {e}"), - false, - String::new(), - ); - } - }; - if !canonical_path.starts_with(&canonical_root) { - return ( - format!("path={path_str}"), - "error: path escapes content root".into(), - false, - String::new(), - ); - } - - match std::fs::read_to_string(&canonical_path) { - Ok(content) => { - let truncated: String = content.chars().take(MAX_FILE_READ_BYTES).collect(); - let was_truncated = content.len() > MAX_FILE_READ_BYTES; - let suffix = if was_truncated { - format!("\n\n[...truncated, {} total chars]", content.len()) - } else { - String::new() - }; - ( - format!("path={path_str}"), - format!("{truncated}{suffix}"), - false, - String::new(), - ) - } - Err(e) => ( - format!("path={path_str}"), - format!("error reading: {e}"), - false, - String::new(), - ), - } -} - -// ── browse_tree ───────────────────────────────────────────────────────────── - -async fn dispatch_browse_tree( - config: &Config, - namespace: &str, - call: &InnerCall, -) -> (String, String, bool, String) { - let node_id = call - .args - .get("node_id") - .and_then(|v| v.as_str()) - .unwrap_or("root") - .to_string(); - - log::debug!("[smart_walk] browse_tree node_id={}", node_id); - - let config_owned = config.clone(); - let ns_owned = namespace.to_string(); - let id_owned = node_id.clone(); - - let result = tokio::task::spawn_blocking(move || { - let node = match read_node(&config_owned, &ns_owned, &id_owned) { - Ok(Some(n)) => n, - Ok(None) => return format!("unknown node: {id_owned}"), - Err(e) => return format!("error reading node {id_owned}: {e}"), - }; - - let children = match read_children(&config_owned, &ns_owned, &id_owned) { - Ok(c) => c, - Err(_) => vec![], - }; - - let mut out = format!( - "Node: {} (level={:?})\nSummary: {}\n", - node.node_id, node.level, node.summary - ); - - if children.is_empty() { - out.push_str("Children: (none — leaf node)\n"); - } else { - out.push_str(&format!("Children ({}):\n", children.len())); - for c in &children { - let preview: String = c.summary.chars().take(100).collect(); - out.push_str(&format!( - " - id={} level={:?}: {}\n", - c.node_id, c.level, preview - )); - } - } - out - }) - .await - .unwrap_or_else(|_| format!("error building context for node {node_id}")); - - (format!("node_id={node_id}"), result, false, String::new()) -} - -// ── vector_search ─────────────────────────────────────────────────────────── - -async fn dispatch_vector_search( - config: &Config, - call: &InnerCall, -) -> (String, String, bool, String) { - let query = call - .args - .get("query") - .and_then(|v| v.as_str()) - .unwrap_or("") - .to_string(); - - let source_kind = call - .args - .get("source_kind") - .and_then(|v| v.as_str()) - .and_then(|s| match s { - "chat" => Some(SourceKind::Chat), - "email" => Some(SourceKind::Email), - "document" => Some(SourceKind::Document), - _ => None, - }); - - let time_window_days = call - .args - .get("time_window_days") - .and_then(|v| v.as_u64()) - .map(|n| n as u32); - - if query.is_empty() { - return ( - "query=".into(), - "error: vector_search requires a non-empty query".into(), - false, - String::new(), - ); - } - - log::debug!( - "[smart_walk] vector_search query={} source_kind={:?} window_days={:?}", - query, - source_kind, - time_window_days - ); - let args_summary = format!( - "query=\"{}\" kind={:?} window={:?}", - truncate_chars(&query, 40), - source_kind, - time_window_days - ); - - match retrieval::query_source( - config, - None, - source_kind, - time_window_days, - Some(&query), - 10, - ) - .await - { - Ok(resp) => { - if resp.hits.is_empty() { - ( - args_summary, - format!("no vector matches for \"{}\"", query), - false, - String::new(), - ) - } else { - let formatted: Vec = resp - .hits - .iter() - .map(|h| { - let preview: String = h.content.chars().take(120).collect(); - format!(" [{}] (score={:.2}) {}", h.node_id, h.score, preview) - }) - .collect(); - ( - args_summary, - format!( - "{} semantic matches:\n{}", - formatted.len(), - formatted.join("\n") - ), - false, - String::new(), - ) - } - } - Err(e) => ( - args_summary, - format!("vector search error: {e}"), - false, - String::new(), - ), - } -} - -// ── collect_evidence ──────────────────────────────────────────────────────── - -fn dispatch_collect_evidence( - call: &InnerCall, - evidence: &mut Vec, -) -> (String, String, bool, String) { - let items = call - .args - .get("items") - .and_then(|v| v.as_array()) - .cloned() - .unwrap_or_default(); - - if items.is_empty() { - return ( - "items=[]".into(), - "error: collect_evidence requires non-empty items array".into(), - false, - String::new(), - ); - } - - let mut added = 0; - for item in &items { - if evidence.len() >= MAX_EVIDENCE_ITEMS { - break; - } - let source_path = item - .get("source") - .and_then(|v| v.as_str()) - .unwrap_or("unknown") - .to_string(); - let snippet = item - .get("snippet") - .and_then(|v| v.as_str()) - .unwrap_or("") - .to_string(); - let relevance = item - .get("relevance") - .and_then(|v| v.as_str()) - .unwrap_or("relevant") - .to_string(); - - if !snippet.is_empty() { - evidence.push(Evidence { - source_path, - snippet, - relevance, - }); - added += 1; - } - } - - log::debug!( - "[smart_walk] collect_evidence added={} total={}", - added, - evidence.len() - ); - - ( - format!("{added} items"), - format!( - "collected {added} evidence items (total: {})", - evidence.len() - ), - false, - String::new(), - ) -} - -// ── answer ────────────────────────────────────────────────────────────────── - -fn dispatch_answer(call: &InnerCall) -> (String, String, bool, String) { - let text = call - .args - .get("text") - .and_then(|v| v.as_str()) - .unwrap_or("") - .to_string(); - log::debug!("[smart_walk] answer text_len={}", text.len()); - ("(final answer)".into(), text.clone(), true, text) -} - -// ── Prompts ───────────────────────────────────────────────────────────────── - -fn build_system_prompt() -> String { - r#"You are a smart memory retrieval agent. Your task is to answer queries by -searching through a user's personal memory — which includes raw files (emails, -chats, commits, documents), wiki summaries, episodic conversation memories, -and document archives. - -## Strategy - -Use a multi-strategy approach inspired by graph-based retrieval: - -1. **Start broad**: Use `list_sources` to understand what content is available, - then `keyword_search` or `vector_search` to find relevant starting points. - -2. **Follow connections**: When you find a relevant entity or topic, use - `entity_search` to find related entities and follow the connections. - -3. **Drill into details**: Use `read_content` to read specific files for - full context. Use `browse_tree` to navigate wiki summary hierarchies. - -4. **Collect evidence**: As you find relevant information, use `collect_evidence` - to save snippets. This builds your citation buffer for the final answer. - -5. **Synthesize**: When you have enough evidence, use `answer` to provide a - comprehensive response with citations. - -## Rules - -- Be efficient: don't re-search for things you already found. -- Prefer vector_search for semantic/conceptual queries. -- Prefer keyword_search for specific names, IDs, or exact phrases. -- Use entity_search when the query mentions people, projects, or organizations. -- Always collect_evidence before answering, so your answer has citations. -- Use tags with JSON content for actions. Format: - {"name":"tool_name","arguments":{"param":"value"}} -- You can call multiple tools in one turn by including multiple blocks. - -## Example turn - -I'll search for recent emails about the project. - -{"name":"list_sources","arguments":{"content_type":"all"}} -{"name":"keyword_search","arguments":{"pattern":"project","content_type":"raw"}} -"# - .into() -} - -fn build_inner_tools_text() -> String { - r#"## Available tools - -**keyword_search** `{"pattern": "", "content_type": "all|raw|wiki|document|episodic"}` -Search for a text pattern (case-insensitive) across memory files. Returns matching file paths and line previews. - -**vector_search** `{"query": "", "source_kind": "chat|email|document", "time_window_days": 30}` -Semantic similarity search over indexed summaries. All params except query are optional. - -**entity_search** `{"query": "", "kinds": ["person", "email", "url", "handle"]}` -Find entities (people, emails, URLs, handles) in the entity index. kinds is optional. - -**list_sources** `{"content_type": "all|raw|wiki|document|episodic"}` -List available content sources and their subdirectories. - -**read_content** `{"path": ""}` -Read a specific content file. Path is relative to the content root (e.g. "raw/github-com-example/commits/123.md"). - -**browse_tree** `{"node_id": "root"}` -Navigate the wiki summary tree. Returns node summary and children. Use "root" to start. - -**collect_evidence** `{"items": [{"source": "", "snippet": "", "relevance": ""}]}` -Save evidence snippets for citation in your final answer. Call this as you find relevant information. - -**answer** `{"text": ""}` -Return your final answer. Reference collected evidence by source path."# - .into() -} - -// ── Content inventory ─────────────────────────────────────────────────────── - -fn build_content_inventory(content_root: &Path) -> String { - let mut parts = Vec::new(); - - for (label, subdir) in &[ - ("Raw content", "raw"), - ("Wiki summaries", "wiki"), - ("Documents", "document"), - ("Episodic memories", "episodic"), - ] { - let dir = content_root.join(subdir); - if dir.exists() { - let count = count_files_recursive(&dir); - if count > 0 { - parts.push(format!("- **{label}** ({subdir}/): {count} files")); - } - } - } - - if parts.is_empty() { - "No content files found.".into() - } else { - parts.join("\n") - } -} - -fn count_files_recursive(dir: &Path) -> usize { - let mut count = 0; - if let Ok(entries) = std::fs::read_dir(dir) { - for entry in entries.flatten() { - let path = entry.path(); - if path.is_dir() { - count += count_files_recursive(&path); - } else if path.extension().map_or(false, |e| e == "md") { - count += 1; - } - } - } - count -} - -// ── Model resolution ──────────────────────────────────────────────────────── - -const DEFAULT_SMART_WALK_MODEL: &str = "hint:summarization"; - -fn resolve_walk_model(config: &Config) -> String { - // 1. Explicit smart_walk_model config takes priority - if let Some(ref swm) = config.memory_tree.smart_walk_model { - if !swm.is_empty() { - return swm.clone(); - } - } - // 2. Default to summarization-v1 (routed through the OpenHuman backend) - DEFAULT_SMART_WALK_MODEL.to_string() -} - -// ── Tool call parser ──────────────────────────────────────────────────────── - -fn parse_tool_calls(response: &str) -> (String, Vec) { - let mut calls: Vec = Vec::new(); - let mut text_parts: Vec<&str> = Vec::new(); - let mut remaining: &str = response; - - const OPEN: &str = ""; - const CLOSE: &str = ""; - - loop { - match remaining.find(OPEN) { - None => { - if !remaining.trim().is_empty() && calls.is_empty() { - text_parts.push(remaining); - } - break; - } - Some(start) => { - let before = &remaining[..start]; - if !before.trim().is_empty() { - text_parts.push(before); - } - let after_open = &remaining[start + OPEN.len()..]; - match after_open.find(CLOSE) { - None => break, - Some(close_idx) => { - let inner = after_open[..close_idx].trim(); - if let Some(call) = parse_single_tool_call(inner) { - calls.push(call); - } - remaining = &after_open[close_idx + CLOSE.len()..]; - } - } - } - } - } - - let text_before = text_parts.concat(); - (text_before, calls) -} - -fn parse_single_tool_call(inner: &str) -> Option { - // Primary: JSON format {"name":"...","arguments":{...}} - if let Ok(val) = serde_json::from_str::(inner) { - if let Some(name) = val.get("name").and_then(|v| v.as_str()) { - let args = val - .get("arguments") - .cloned() - .unwrap_or(serde_json::Value::Object(Default::default())); - log::debug!( - "[smart_walk::parse_single_tool_call] json path: tool={} args_keys={}", - name, - args.as_object().map(|m| m.len()).unwrap_or(0) - ); - return Some(InnerCall { - name: name.to_string(), - args, - }); - } - } - // Fallback: XML-style nameJSON - if let (Some(name), args) = ( - extract_xml_tag(inner, "tool_name"), - extract_xml_tag(inner, "parameters"), - ) { - log::debug!( - "[smart_walk::parse_single_tool_call] xml fallback path: tool={} has_params={}", - name.trim(), - args.is_some() - ); - let parsed_args = args - .and_then(|a| serde_json::from_str::(a.trim()).ok()) - .unwrap_or_else(|| { - // Parameters might be XML key-value pairs; parse them heuristically - let mut map = serde_json::Map::new(); - for line in inner.lines() { - let trimmed = line.trim(); - if trimmed.starts_with('<') - && !trimmed.starts_with("') { - let tag = &trimmed[1..tag_end]; - if let Some(close) = trimmed.find(&format!("")) { - let value = &trimmed[tag_end + 1..close]; - map.insert( - tag.to_string(), - serde_json::Value::String(value.to_string()), - ); - } - } - } - } - serde_json::Value::Object(map) - }); - return Some(InnerCall { - name: name.trim().to_string(), - args: parsed_args, - }); - } - None -} - -fn extract_xml_tag<'a>(text: &'a str, tag: &str) -> Option<&'a str> { - let open = format!("<{tag}>"); - let close = format!(""); - let start = text.find(&open)? + open.len(); - let end = text[start..].find(&close)? + start; - Some(&text[start..end]) -} - -// ── Fallback synthesis ────────────────────────────────────────────────────── - -fn synthesize_fallback(trace: &[SmartWalkStep], evidence: &[Evidence]) -> String { - let mut out = String::new(); - - if !evidence.is_empty() { - out.push_str("Based on the evidence collected:\n\n"); - for (i, ev) in evidence.iter().enumerate() { - out.push_str(&format!( - "{}. [{}] {}: {}\n", - i + 1, - ev.source_path, - ev.relevance, - truncate_chars(&ev.snippet, 150) - )); - } - } else if !trace.is_empty() { - out.push_str("Could not converge on an answer. Steps taken:\n\n"); - for s in trace { - out.push_str(&format!( - "- Turn {}: {} → {}\n", - s.turn, - s.action, - truncate_chars(&s.result_preview, 100) - )); - } - } else { - out.push_str("Could not converge on an answer — no steps taken."); - } - out -} - -// ── Tests ─────────────────────────────────────────────────────────────────── - -#[cfg(test)] -mod tests { - use super::*; - use crate::openhuman::inference::provider::traits::ChatMessage; - use async_trait::async_trait; - use std::sync::Mutex; - use tempfile::TempDir; - - struct StubProvider { - responses: Mutex>, - } - - impl StubProvider { - fn new(responses: Vec<&str>) -> Self { - Self { - responses: Mutex::new(responses.into_iter().map(|s| s.to_string()).collect()), - } - } - } - - #[async_trait] - impl Provider for StubProvider { - async fn chat_with_system( - &self, - _system: Option<&str>, - _message: &str, - _model: &str, - _temp: f64, - ) -> anyhow::Result { - let mut responses = self.responses.lock().unwrap(); - if responses.is_empty() { - return Err(anyhow::anyhow!("StubProvider: no more responses")); - } - Ok(responses.remove(0)) - } - - async fn chat_with_history( - &self, - _messages: &[ChatMessage], - _model: &str, - _temp: f64, - ) -> anyhow::Result { - let mut responses = self.responses.lock().unwrap(); - if responses.is_empty() { - return Err(anyhow::anyhow!("StubProvider: no more responses")); - } - Ok(responses.remove(0)) - } - } - - fn test_config(tmp: &TempDir) -> Config { - let mut cfg = Config::default(); - cfg.workspace_dir = tmp.path().join("workspace"); - std::fs::create_dir_all(&cfg.workspace_dir).unwrap(); - cfg - } - - fn seed_content(content_root: &Path) { - let raw_dir = content_root.join("raw").join("test-source").join("commits"); - std::fs::create_dir_all(&raw_dir).unwrap(); - std::fs::write( - raw_dir.join("123_abc.md"), - "---\nsource_kind: document\n---\n# Test Commit\nFixed the login bug in auth module.\n", - ) - .unwrap(); - - let doc_dir = content_root.join("document").join("test-doc"); - std::fs::create_dir_all(&doc_dir).unwrap(); - std::fs::write( - doc_dir.join("readme.md"), - "---\nsource_kind: document\n---\n# README\nProject documentation for the auth system.\n", - ) - .unwrap(); - - let wiki_dir = content_root - .join("wiki") - .join("summaries") - .join("source-test"); - std::fs::create_dir_all(wiki_dir.join("L1")).unwrap(); - std::fs::write( - wiki_dir.join("L1").join("summary-001.md"), - "---\nkind: summary\nlevel: 1\n---\nSummary of auth changes in May 2026.\n", - ) - .unwrap(); - } - - #[tokio::test] - async fn smart_walk_keyword_search_and_answer() { - let tmp = TempDir::new().unwrap(); - let cfg = test_config(&tmp); - let content_root = cfg.workspace_dir.join("memory_tree").join("content"); - seed_content(&content_root); - - let provider = StubProvider::new(vec![ - // Turn 1: keyword search for "login" - r#"{"name":"keyword_search","arguments":{"pattern":"login","content_type":"all"}}"#, - // Turn 2: read the matching file - r#"{"name":"read_content","arguments":{"path":"raw/test-source/commits/123_abc.md"}}"#, - // Turn 3: collect evidence and answer - r#"{"name":"collect_evidence","arguments":{"items":[{"source":"raw/test-source/commits/123_abc.md","snippet":"Fixed the login bug in auth module.","relevance":"directly mentions login fix"}]}} -{"name":"answer","arguments":{"text":"The login bug was fixed in the auth module, as documented in commit 123_abc."}}"#, - ]); - - let opts = SmartWalkOptions { - max_turns: 10, - namespace: "default".into(), - model: Some("test-model".into()), - content_root: Some(content_root), - }; - - let outcome = run_smart_walk(&cfg, &provider, "What happened with the login bug?", opts) - .await - .unwrap(); - - assert_eq!(outcome.stopped_reason, SmartWalkStopReason::Answered); - assert!(outcome.answer.contains("login")); - assert_eq!(outcome.evidence.len(), 1); - assert!(outcome.evidence[0].snippet.contains("login bug")); - } - - #[tokio::test] - async fn smart_walk_list_sources() { - let tmp = TempDir::new().unwrap(); - let cfg = test_config(&tmp); - let content_root = cfg.workspace_dir.join("memory_tree").join("content"); - seed_content(&content_root); - - let provider = StubProvider::new(vec![ - // Turn 1: list sources - r#"{"name":"list_sources","arguments":{"content_type":"all"}}"#, - // Turn 2: answer - r#"{"name":"answer","arguments":{"text":"Found raw, document, and wiki content."}}"#, - ]); - - let opts = SmartWalkOptions { - max_turns: 5, - namespace: "default".into(), - model: Some("test-model".into()), - content_root: Some(content_root), - }; - - let outcome = run_smart_walk(&cfg, &provider, "What sources are available?", opts) - .await - .unwrap(); - - assert_eq!(outcome.stopped_reason, SmartWalkStopReason::Answered); - assert!(outcome.answer.contains("raw")); - } - - #[tokio::test] - async fn smart_walk_max_turns() { - let tmp = TempDir::new().unwrap(); - let cfg = test_config(&tmp); - let content_root = cfg.workspace_dir.join("memory_tree").join("content"); - seed_content(&content_root); - - let provider = StubProvider::new(vec![ - r#"{"name":"list_sources","arguments":{"content_type":"all"}}"#, - r#"{"name":"list_sources","arguments":{"content_type":"raw"}}"#, - r#"{"name":"list_sources","arguments":{"content_type":"wiki"}}"#, - ]); - - let opts = SmartWalkOptions { - max_turns: 3, - namespace: "default".into(), - model: Some("test-model".into()), - content_root: Some(content_root), - }; - - let outcome = run_smart_walk(&cfg, &provider, "loop test", opts) - .await - .unwrap(); - - assert_eq!(outcome.stopped_reason, SmartWalkStopReason::MaxTurnsReached); - assert_eq!(outcome.turns_used, 3); - } - - #[test] - fn parse_multiple_tool_calls() { - let response = r#"Let me search. -{"name":"keyword_search","arguments":{"pattern":"test"}} -{"name":"entity_search","arguments":{"query":"Alice"}}"#; - - let (text, calls) = parse_tool_calls(response); - assert_eq!(calls.len(), 2); - assert_eq!(calls[0].name, "keyword_search"); - assert_eq!(calls[1].name, "entity_search"); - assert!(text.contains("Let me search")); - } - - #[test] - fn content_inventory_counts_files() { - let tmp = TempDir::new().unwrap(); - let content_root = tmp.path().join("content"); - seed_content(&content_root); - - let inventory = build_content_inventory(&content_root); - assert!(inventory.contains("Raw content")); - assert!(inventory.contains("Documents")); - assert!(inventory.contains("Wiki summaries")); - } - - // ── Staging integration tests (run with --ignored) ──────────────── - - fn staging_content_root() -> Option { - let path = std::path::PathBuf::from( - "/Users/enamakel/.openhuman-staging/users/69d9cb73e61f755583c3671f/workspace/memory_tree/content", - ); - if path.exists() { - Some(path) - } else { - None - } - } - - #[test] - #[ignore] - fn staging_keyword_search_finds_steven() { - let content_root = staging_content_root().expect("staging content not available"); - let mut results = Vec::new(); - search_dir_recursive( - &content_root.join("raw"), - "steven", - &mut results, - &content_root, - ); - println!("keyword 'steven': {} results", results.len()); - for r in results.iter().take(5) { - println!(" {}", r); - } - assert!( - !results.is_empty(), - "should find 'steven' in staging raw content" - ); - } - - #[test] - #[ignore] - fn staging_content_inventory() { - let content_root = staging_content_root().expect("staging content not available"); - let inventory = build_content_inventory(&content_root); - println!("Inventory:\n{}", inventory); - assert!(inventory.contains("Raw content")); - assert!(inventory.contains("Documents")); - } - - #[test] - #[ignore] - fn staging_list_sources_shows_github() { - let content_root = staging_content_root().expect("staging content not available"); - let call = InnerCall { - name: "list_sources".into(), - args: serde_json::json!({"content_type": "all"}), - }; - let (_, result, _, _) = dispatch_list_sources(&content_root, &call); - println!("list_sources:\n{}", result); - assert!(result.contains("raw/"), "should list raw sources"); - } - - #[test] - #[ignore] - fn staging_read_wiki_summary() { - let content_root = staging_content_root().expect("staging content not available"); - let wiki_dir = content_root.join("wiki").join("summaries"); - if !wiki_dir.exists() { - println!("no wiki summaries found — skipping"); - return; - } - // Find first summary file - let first = walkdir_first_md(&wiki_dir); - if let Some(path) = first { - let rel = path - .strip_prefix(&content_root) - .unwrap() - .to_string_lossy() - .to_string(); - println!("Reading wiki: {}", rel); - let call = InnerCall { - name: "read_content".into(), - args: serde_json::json!({"path": rel}), - }; - let (_, result, _, _) = dispatch_read_content(&content_root, &call); - println!("Content preview: {}", &result[..result.len().min(300)]); - assert!( - !result.starts_with("error"), - "should read wiki file without error" - ); - } - } - - #[test] - #[ignore] - fn staging_read_episodic_memory() { - let content_root = staging_content_root().expect("staging content not available"); - let ep_dir = content_root.join("episodic"); - if !ep_dir.exists() { - println!("no episodic memories — skipping"); - return; - } - let first = walkdir_first_md(&ep_dir); - if let Some(path) = first { - let rel = path - .strip_prefix(&content_root) - .unwrap() - .to_string_lossy() - .to_string(); - println!("Reading episodic: {}", rel); - let call = InnerCall { - name: "read_content".into(), - args: serde_json::json!({"path": rel}), - }; - let (_, result, _, _) = dispatch_read_content(&content_root, &call); - println!("Content preview: {}", &result[..result.len().min(300)]); - assert!( - !result.starts_with("error"), - "should read episodic file without error" - ); - } - } - - #[test] - #[ignore] - fn staging_full_smart_walk_keyword_pipeline() { - let content_root = staging_content_root().expect("staging content not available"); - - // Simulate the pipeline: list_sources → keyword_search → read_content - let call = InnerCall { - name: "list_sources".into(), - args: serde_json::json!({"content_type": "raw"}), - }; - let (_, sources, _, _) = dispatch_list_sources(&content_root, &call); - println!("Step 1 - Sources:\n{}", sources); - - let call = InnerCall { - name: "keyword_search".into(), - args: serde_json::json!({"pattern": "memory", "content_type": "all"}), - }; - let (_, search_result, _, _) = dispatch_keyword_search(&content_root, &call); - println!("Step 2 - Search 'memory':\n{}", search_result); - - if search_result.contains("[") { - // Extract first file path from results - if let Some(path_start) = search_result.find('[') { - if let Some(path_end) = search_result[path_start + 1..].find(']') { - let file_path = &search_result[path_start + 1..path_start + 1 + path_end]; - println!("Step 3 - Reading: {}", file_path); - let call = InnerCall { - name: "read_content".into(), - args: serde_json::json!({"path": file_path}), - }; - let (_, content, _, _) = dispatch_read_content(&content_root, &call); - println!( - "Step 3 - Content ({} chars): {}", - content.len(), - &content[..content.len().min(200)] - ); - assert!( - !content.starts_with("error"), - "pipeline should complete without errors" - ); - } - } - } - } - - // ── Parser tests: XML-format tool calls ──────────────────────────── - - #[test] - fn parse_xml_style_tool_call() { - let response = r#"I'll browse the tree. - -browse_tree -{"node_id":"root"} -"#; - - let (text, calls) = parse_tool_calls(response); - assert_eq!(calls.len(), 1); - assert_eq!(calls[0].name, "browse_tree"); - assert_eq!(calls[0].args["node_id"], "root"); - assert!(text.contains("browse the tree")); - } - - #[test] - fn parse_xml_style_with_xml_params() { - let response = r#" -keyword_search - -project status -raw - -"#; - - let (_text, calls) = parse_tool_calls(response); - assert_eq!(calls.len(), 1); - assert_eq!(calls[0].name, "keyword_search"); - assert_eq!(calls[0].args["pattern"], "project status"); - assert_eq!(calls[0].args["content_type"], "raw"); - } - - #[test] - fn parse_mixed_json_and_xml_tool_calls() { - let response = r#"Searching... -{"name":"list_sources","arguments":{"content_type":"all"}} - -keyword_search -{"pattern":"email","content_type":"raw"} -"#; - - let (_, calls) = parse_tool_calls(response); - assert_eq!(calls.len(), 2); - assert_eq!(calls[0].name, "list_sources"); - assert_eq!(calls[1].name, "keyword_search"); - assert_eq!(calls[1].args["pattern"], "email"); - } - - #[test] - fn parse_xml_no_parameters_tag() { - let response = r#" -list_sources -"#; - - let (_, calls) = parse_tool_calls(response); - // No tag → extract_xml_tag returns None for parameters - // parse_single_tool_call requires tool_name match but parameters is - // Option, so it should still parse with empty args - assert_eq!(calls.len(), 1); - assert_eq!(calls[0].name, "list_sources"); - } - - #[test] - fn extract_xml_tag_basic() { - assert_eq!(extract_xml_tag("hello", "name"), Some("hello")); - assert_eq!(extract_xml_tag("no tags here", "name"), None); - assert_eq!(extract_xml_tag("12", "b"), Some("2")); - } - - #[test] - fn parse_single_tool_call_json() { - let call = - parse_single_tool_call(r#"{"name":"keyword_search","arguments":{"pattern":"test"}}"#); - assert!(call.is_some()); - let call = call.unwrap(); - assert_eq!(call.name, "keyword_search"); - assert_eq!(call.args["pattern"], "test"); - } - - #[test] - fn parse_single_tool_call_xml() { - let call = parse_single_tool_call( - "read_content\n{\"path\":\"raw/email/test.md\"}", - ); - assert!(call.is_some()); - let call = call.unwrap(); - assert_eq!(call.name, "read_content"); - assert_eq!(call.args["path"], "raw/email/test.md"); - } - - #[test] - fn parse_single_tool_call_garbage_returns_none() { - assert!(parse_single_tool_call("just some text").is_none()); - assert!(parse_single_tool_call("").is_none()); - } - - // ── E2E walk tests with rich seeded content ───────────────────────── - - fn seed_synced_memory(content_root: &Path) { - // Raw email content - let email_dir = content_root.join("raw").join("email").join("inbox"); - std::fs::create_dir_all(&email_dir).unwrap(); - std::fs::write( - email_dir.join("001_meeting.md"), - "---\nsource_kind: email\nauthor: alice@example.com\ndate: 2026-06-01\n---\n\ - # Team standup notes\n\n\ - Action items:\n\ - - Deploy the auth service refactor by Friday\n\ - - Review PR #342 for the billing module\n\ - - Schedule security audit with external team\n", - ) - .unwrap(); - std::fs::write( - email_dir.join("002_project.md"), - "---\nsource_kind: email\nauthor: bob@example.com\ndate: 2026-06-02\n---\n\ - # Project Phoenix status update\n\n\ - The migration is 80% complete. Remaining:\n\ - - Database schema changes (blocked on DBA review)\n\ - - API versioning for backward compatibility\n\ - - Load testing the new endpoints\n", - ) - .unwrap(); - std::fs::write( - email_dir.join("003_personal.md"), - "---\nsource_kind: email\nauthor: carol@example.com\ndate: 2026-06-03\n---\n\ - # Lunch plans\n\n\ - Hey, want to grab sushi on Thursday? The new place on 5th street \ - got great reviews.\n", - ) - .unwrap(); - - // Episodic memories - let ep_dir = content_root.join("episodic").join("daily"); - std::fs::create_dir_all(&ep_dir).unwrap(); - std::fs::write( - ep_dir.join("2026-06-01.md"), - "---\nkind: episodic\ndate: 2026-06-01\n---\n\ - Worked on the auth service refactor. Had a productive standup.\n\ - Identified three blockers for Project Phoenix.\n", - ) - .unwrap(); - - // Wiki summaries - let wiki_dir = content_root - .join("wiki") - .join("summaries") - .join("email-inbox"); - std::fs::create_dir_all(wiki_dir.join("L1")).unwrap(); - std::fs::write( - wiki_dir.join("L1").join("summary-week-22.md"), - "---\nkind: summary\nlevel: 1\n---\n\ - Week 22 summary: Team focused on Project Phoenix migration \ - and auth service refactor. Key contacts: alice@example.com (standup), \ - bob@example.com (project status), carol@example.com (social).\n", - ) - .unwrap(); - - // Document content - let doc_dir = content_root.join("document").join("notes"); - std::fs::create_dir_all(&doc_dir).unwrap(); - std::fs::write( - doc_dir.join("project-phoenix.md"), - "---\nsource_kind: document\n---\n\ - # Project Phoenix\n\n\ - ## Overview\n\ - Migration from legacy monolith to microservices.\n\n\ - ## Status\n\ - Phase 2 of 3 — data migration and API versioning.\n\n\ - ## Key risks\n\ - - Data integrity during cutover\n\ - - Backward compatibility for mobile clients\n", - ) - .unwrap(); - } - - #[tokio::test] - async fn walk_synced_email_with_keyword_and_evidence() { - let tmp = TempDir::new().unwrap(); - let cfg = test_config(&tmp); - let content_root = cfg.workspace_dir.join("memory_tree").join("content"); - seed_synced_memory(&content_root); - - let provider = StubProvider::new(vec![ - // Turn 1: list sources to discover content - r#"{"name":"list_sources","arguments":{"content_type":"all"}}"#, - // Turn 2: keyword search for "project phoenix" - r#"{"name":"keyword_search","arguments":{"pattern":"project phoenix","content_type":"all"}}"#, - // Turn 3: read the project email and project doc - r#"{"name":"read_content","arguments":{"path":"raw/email/inbox/002_project.md"}} -{"name":"read_content","arguments":{"path":"document/notes/project-phoenix.md"}}"#, - // Turn 4: collect evidence + answer - concat!( - r#"{"name":"collect_evidence","arguments":{"items":["#, - r#"{"source":"raw/email/inbox/002_project.md","snippet":"Migration is 80% complete. Remaining: DB schema, API versioning, load testing.","relevance":"direct project status"},"#, - r#"{"source":"document/notes/project-phoenix.md","snippet":"Phase 2 of 3 — data migration and API versioning.","relevance":"project overview doc"}"#, - r#"]}}"#, - "\n", - r#"{"name":"answer","arguments":{"text":"Project Phoenix is 80% complete (Phase 2 of 3). Remaining work: database schema changes (blocked on DBA review), API versioning for backward compatibility, and load testing new endpoints. Key risks include data integrity during cutover and backward compatibility for mobile clients."}}"#, - ), - ]); - - let opts = SmartWalkOptions { - max_turns: 10, - namespace: "default".into(), - model: Some("test-model".into()), - content_root: Some(content_root), - }; - - let outcome = run_smart_walk( - &cfg, - &provider, - "What is the status of Project Phoenix?", - opts, - ) - .await - .unwrap(); - - assert_eq!(outcome.stopped_reason, SmartWalkStopReason::Answered); - assert!(outcome.answer.contains("80%")); - assert!(outcome.answer.contains("Phoenix")); - assert_eq!(outcome.evidence.len(), 2); - assert!(outcome.evidence[0].source_path.contains("002_project")); - assert!(outcome.evidence[1].source_path.contains("project-phoenix")); - assert_eq!(outcome.turns_used, 4); - } - - #[tokio::test] - async fn walk_with_xml_format_tool_calls() { - let tmp = TempDir::new().unwrap(); - let cfg = test_config(&tmp); - let content_root = cfg.workspace_dir.join("memory_tree").join("content"); - seed_synced_memory(&content_root); - - // Simulate the bug scenario: LLM outputs XML-style tool calls - let provider = StubProvider::new(vec![ - // Turn 1: XML-style list_sources - "\nlist_sources\n{\"content_type\":\"all\"}\n", - // Turn 2: XML-style keyword_search - "\nkeyword_search\n{\"pattern\":\"auth service\",\"content_type\":\"raw\"}\n", - // Turn 3: read + answer (JSON this time — mixed is fine) - r#"{"name":"read_content","arguments":{"path":"raw/email/inbox/001_meeting.md"}}"#, - // Turn 4: answer - r#"{"name":"answer","arguments":{"text":"The auth service refactor needs to be deployed by Friday, as discussed in the team standup."}}"#, - ]); - - let opts = SmartWalkOptions { - max_turns: 10, - namespace: "default".into(), - model: Some("test-model".into()), - content_root: Some(content_root), - }; - - let outcome = run_smart_walk( - &cfg, - &provider, - "What do I need to work on for the auth service?", - opts, - ) - .await - .unwrap(); - - assert_eq!(outcome.stopped_reason, SmartWalkStopReason::Answered); - assert!(outcome.answer.contains("auth service")); - // Verify the XML tool calls were parsed — we should have 4 turns, - // not 1 (which would happen if XML calls were silently dropped) - assert_eq!(outcome.turns_used, 4); - assert!(outcome.trace.len() >= 3); - assert_eq!(outcome.trace[0].action, "list_sources"); - assert_eq!(outcome.trace[1].action, "keyword_search"); - } - - #[tokio::test] - async fn walk_reads_across_content_types() { - let tmp = TempDir::new().unwrap(); - let cfg = test_config(&tmp); - let content_root = cfg.workspace_dir.join("memory_tree").join("content"); - seed_synced_memory(&content_root); - - let provider = StubProvider::new(vec![ - // Turn 1: search for "standup" - r#"{"name":"keyword_search","arguments":{"pattern":"standup","content_type":"all"}}"#, - // Turn 2: read the email and the episodic memory - r#"{"name":"read_content","arguments":{"path":"raw/email/inbox/001_meeting.md"}} -{"name":"read_content","arguments":{"path":"episodic/daily/2026-06-01.md"}}"#, - // Turn 3: also read the wiki summary - r#"{"name":"read_content","arguments":{"path":"wiki/summaries/email-inbox/L1/summary-week-22.md"}}"#, - // Turn 4: collect from all 3 sources + answer - concat!( - r#"{"name":"collect_evidence","arguments":{"items":["#, - r#"{"source":"raw/email/inbox/001_meeting.md","snippet":"Deploy auth service refactor by Friday","relevance":"action item from standup"},"#, - r#"{"source":"episodic/daily/2026-06-01.md","snippet":"Had a productive standup","relevance":"episodic record"},"#, - r#"{"source":"wiki/summaries/email-inbox/L1/summary-week-22.md","snippet":"Team focused on Project Phoenix migration","relevance":"weekly summary"}"#, - r#"]}}"#, - "\n", - r#"{"name":"answer","arguments":{"text":"The standup covered Project Phoenix migration progress, auth service refactor deadlines, and identified three blockers."}}"#, - ), - ]); - - let opts = SmartWalkOptions { - max_turns: 10, - namespace: "default".into(), - model: Some("test-model".into()), - content_root: Some(content_root), - }; - - let outcome = run_smart_walk(&cfg, &provider, "What happened in the standup?", opts) - .await - .unwrap(); - - assert_eq!(outcome.stopped_reason, SmartWalkStopReason::Answered); - assert_eq!(outcome.evidence.len(), 3); - // Evidence from all three content types - let sources: Vec<&str> = outcome - .evidence - .iter() - .map(|e| e.source_path.as_str()) - .collect(); - assert!(sources.iter().any(|s| s.contains("raw/"))); - assert!(sources.iter().any(|s| s.contains("episodic/"))); - assert!(sources.iter().any(|s| s.contains("wiki/"))); - } - - #[tokio::test] - async fn walk_llm_gives_up_uses_fallback() { - let tmp = TempDir::new().unwrap(); - let cfg = test_config(&tmp); - let content_root = cfg.workspace_dir.join("memory_tree").join("content"); - seed_synced_memory(&content_root); - - let provider = StubProvider::new(vec![ - // Turn 1: search finds nothing - r#"{"name":"keyword_search","arguments":{"pattern":"quantum computing","content_type":"all"}}"#, - // Turn 2: LLM gives up with empty response - "", - ]); - - let opts = SmartWalkOptions { - max_turns: 5, - namespace: "default".into(), - model: Some("test-model".into()), - content_root: Some(content_root), - }; - - let outcome = run_smart_walk(&cfg, &provider, "Tell me about quantum computing", opts) - .await - .unwrap(); - - assert_eq!(outcome.stopped_reason, SmartWalkStopReason::LlmGaveUp); - assert!(outcome.evidence.is_empty()); - assert!(outcome.answer.contains("Could not converge")); - } - - #[tokio::test] - async fn walk_direct_answer_without_tools() { - let tmp = TempDir::new().unwrap(); - let cfg = test_config(&tmp); - let content_root = cfg.workspace_dir.join("memory_tree").join("content"); - seed_synced_memory(&content_root); - - let provider = StubProvider::new(vec![ - // LLM directly answers without using any tools - "I don't have enough context to answer that question from your memory.", - ]); - - let opts = SmartWalkOptions { - max_turns: 5, - namespace: "default".into(), - model: Some("test-model".into()), - content_root: Some(content_root), - }; - - let outcome = run_smart_walk(&cfg, &provider, "What's the meaning of life?", opts) - .await - .unwrap(); - - assert_eq!(outcome.stopped_reason, SmartWalkStopReason::Answered); - assert!(outcome.answer.contains("don't have enough context")); - assert_eq!(outcome.turns_used, 1); - assert!(outcome.evidence.is_empty()); - } - - #[tokio::test] - async fn walk_collect_evidence_deduplicates_within_limit() { - let tmp = TempDir::new().unwrap(); - let cfg = test_config(&tmp); - let content_root = cfg.workspace_dir.join("memory_tree").join("content"); - seed_synced_memory(&content_root); - - let provider = StubProvider::new(vec![ - // Turn 1: collect a batch of evidence - concat!( - r#"{"name":"collect_evidence","arguments":{"items":["#, - r#"{"source":"raw/email/inbox/001_meeting.md","snippet":"Deploy auth","relevance":"task"},"#, - r#"{"source":"raw/email/inbox/002_project.md","snippet":"Migration 80%","relevance":"status"}"#, - r#"]}}"#, - ), - // Turn 2: collect more evidence (including a duplicate of the first source) - concat!( - r#"{"name":"collect_evidence","arguments":{"items":["#, - r#"{"source":"document/notes/project-phoenix.md","snippet":"Phase 2 of 3","relevance":"doc"},"#, - r#"{"source":"raw/email/inbox/001_meeting.md","snippet":"Deploy auth (duplicate)","relevance":"task"}"#, - r#"]}}"#, - ), - // Turn 3: answer - r#"{"name":"answer","arguments":{"text":"Summary with evidence items including duplicate source."}}"#, - ]); - - let opts = SmartWalkOptions { - max_turns: 10, - namespace: "default".into(), - model: Some("test-model".into()), - content_root: Some(content_root), - }; - - let outcome = run_smart_walk(&cfg, &provider, "Summarize everything", opts) - .await - .unwrap(); - - assert_eq!(outcome.stopped_reason, SmartWalkStopReason::Answered); - // 2 items from turn 1 + 2 items from turn 2 (one of which duplicates a turn-1 source); - // collect_evidence does not deduplicate, so all 4 items are present. - assert_eq!(outcome.evidence.len(), 4); - } - - fn walkdir_first_md(dir: &std::path::Path) -> Option { - fn recurse(dir: &std::path::Path) -> Option { - for entry in std::fs::read_dir(dir).ok()?.flatten() { - let path = entry.path(); - if path.is_dir() { - if let Some(found) = recurse(&path) { - return Some(found); - } - } else if path.extension().map_or(false, |e| e == "md") { - return Some(path); - } - } - None - } - recurse(dir) - } -} diff --git a/src/openhuman/memory/query/smart_walk/dispatch.rs b/src/openhuman/memory/query/smart_walk/dispatch.rs new file mode 100644 index 0000000000..777ff84513 --- /dev/null +++ b/src/openhuman/memory/query/smart_walk/dispatch.rs @@ -0,0 +1,687 @@ +//! Tool call dispatch for the smart_walk inner loop. +//! +//! Each `dispatch_*` function handles one named inner tool and returns +//! `(args_summary, result_text, is_final_answer, answer_text)`. + +use crate::openhuman::config::Config; +use crate::openhuman::memory::query::smart_walk::prompts::InnerCall; +use crate::openhuman::memory::query::smart_walk::types::{ + Evidence, MAX_EVIDENCE_ITEMS, MAX_FILE_READ_BYTES, MAX_KEYWORD_RESULTS, +}; +use crate::openhuman::memory_store::chunks::types::SourceKind; +use crate::openhuman::memory_tree::retrieval; +use crate::openhuman::memory_tree::score::extract::EntityKind; +use crate::openhuman::memory_tree::tree_runtime::store::{read_children, read_node}; +use std::path::{Path, PathBuf}; + +// ── Top-level dispatcher ───────────────────────────────────────────────────── + +pub(crate) async fn dispatch_call( + config: &Config, + namespace: &str, + content_root: &Path, + call: &InnerCall, + evidence: &mut Vec, +) -> (String, String, bool, String) { + match call.name.as_str() { + "keyword_search" => { + let cr = content_root.to_path_buf(); + let c = call.clone(); + tokio::task::spawn_blocking(move || dispatch_keyword_search(&cr, &c)) + .await + .unwrap_or_else(|e| (String::new(), format!("error: {e}"), false, String::new())) + } + "entity_search" => dispatch_entity_search(config, call).await, + "list_sources" => { + let cr = content_root.to_path_buf(); + let c = call.clone(); + tokio::task::spawn_blocking(move || dispatch_list_sources(&cr, &c)) + .await + .unwrap_or_else(|e| (String::new(), format!("error: {e}"), false, String::new())) + } + "read_content" => { + let cr = content_root.to_path_buf(); + let c = call.clone(); + tokio::task::spawn_blocking(move || dispatch_read_content(&cr, &c)) + .await + .unwrap_or_else(|e| (String::new(), format!("error: {e}"), false, String::new())) + } + "browse_tree" => dispatch_browse_tree(config, namespace, call).await, + "collect_evidence" => dispatch_collect_evidence(call, evidence), + "answer" => dispatch_answer(call), + "vector_search" => dispatch_vector_search(config, call).await, + other => { + log::warn!("[smart_walk] unknown action: {other}"); + ( + format!("action={other}"), + format!( + "unknown action '{other}'. Valid: keyword_search, entity_search, \ + list_sources, read_content, browse_tree, vector_search, \ + collect_evidence, answer" + ), + false, + String::new(), + ) + } + } +} + +// ── keyword_search ─────────────────────────────────────────────────────────── + +pub(crate) fn dispatch_keyword_search( + content_root: &Path, + call: &InnerCall, +) -> (String, String, bool, String) { + let pattern = call + .args + .get("pattern") + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_string(); + + let content_type = call + .args + .get("content_type") + .and_then(|v| v.as_str()) + .unwrap_or("all"); + + if pattern.is_empty() { + return ( + "pattern=".into(), + "error: keyword_search requires a non-empty pattern".into(), + false, + String::new(), + ); + } + + log::debug!( + "[smart_walk] keyword_search pattern={} content_type={}", + pattern, + content_type + ); + + let args_summary = format!("pattern=\"{}\" type={}", pattern, content_type); + + let search_dirs: Vec = match content_type { + "raw" => vec![content_root.join("raw")], + "wiki" => vec![content_root.join("wiki")], + "document" => vec![content_root.join("document")], + "episodic" => vec![content_root.join("episodic")], + _ => vec![ + content_root.join("raw"), + content_root.join("wiki"), + content_root.join("document"), + content_root.join("episodic"), + ], + }; + + let pattern_lower = pattern.to_lowercase(); + let mut results: Vec = Vec::new(); + + for dir in &search_dirs { + if !dir.exists() { + continue; + } + search_dir_recursive(dir, &pattern_lower, &mut results, content_root); + if results.len() >= MAX_KEYWORD_RESULTS { + break; + } + } + + results.truncate(MAX_KEYWORD_RESULTS); + + if results.is_empty() { + ( + args_summary, + format!("no matches for pattern \"{}\"", pattern), + false, + String::new(), + ) + } else { + let count = results.len(); + ( + args_summary, + format!("{count} matches:\n{}", results.join("\n")), + false, + String::new(), + ) + } +} + +pub(crate) fn search_dir_recursive( + dir: &Path, + pattern: &str, + results: &mut Vec, + content_root: &Path, +) { + let entries = match std::fs::read_dir(dir) { + Ok(e) => e, + Err(_) => return, + }; + + for entry in entries.flatten() { + if results.len() >= MAX_KEYWORD_RESULTS { + return; + } + + let path = entry.path(); + if path.is_dir() { + search_dir_recursive(&path, pattern, results, content_root); + } else if path.extension().map_or(false, |e| e == "md") { + if let Ok(content) = std::fs::read_to_string(&path) { + if content.to_lowercase().contains(pattern) { + let rel = path + .strip_prefix(content_root) + .unwrap_or(&path) + .to_string_lossy() + .to_string(); + + let line_match = content + .lines() + .find(|l| l.to_lowercase().contains(pattern)) + .unwrap_or("") + .trim(); + let preview: String = line_match.chars().take(120).collect(); + results.push(format!(" [{rel}] {preview}")); + } + } + } + } +} + +// ── entity_search ──────────────────────────────────────────────────────────── + +async fn dispatch_entity_search( + config: &Config, + call: &InnerCall, +) -> (String, String, bool, String) { + let query = call + .args + .get("query") + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_string(); + + let kinds: Option> = + call.args + .get("kinds") + .and_then(|v| v.as_array()) + .map(|arr| { + arr.iter() + .filter_map(|v| v.as_str()) + .filter_map(|s| EntityKind::parse(s).ok()) + .collect() + }); + + if query.is_empty() { + return ( + "query=".into(), + "error: entity_search requires a non-empty query".into(), + false, + String::new(), + ); + } + + log::debug!( + "[smart_walk] entity_search query={} kinds={:?}", + query, + kinds + .as_ref() + .map(|ks| ks.iter().map(|k| k.as_str()).collect::>()) + ); + let args_summary = format!( + "query=\"{}\" kinds={:?}", + query, + kinds + .as_ref() + .map(|ks| ks.iter().map(|k| k.as_str()).collect::>()) + ); + + match retrieval::search_entities(config, &query, kinds, 10).await { + Ok(matches) => { + if matches.is_empty() { + ( + args_summary, + format!("no entities matching \"{}\"", query), + false, + String::new(), + ) + } else { + let formatted: Vec = matches + .iter() + .map(|m| { + format!( + " [{}] kind={} surface=\"{}\" mentions={} last_seen={}", + m.canonical_id, + m.kind.as_str(), + m.surface, + m.mention_count, + m.last_seen_ms + ) + }) + .collect(); + ( + args_summary, + format!( + "{} entities found:\n{}", + formatted.len(), + formatted.join("\n") + ), + false, + String::new(), + ) + } + } + Err(e) => ( + args_summary, + format!("entity search error: {e}"), + false, + String::new(), + ), + } +} + +// ── list_sources ───────────────────────────────────────────────────────────── + +pub(crate) fn dispatch_list_sources( + content_root: &Path, + call: &InnerCall, +) -> (String, String, bool, String) { + let content_type = call + .args + .get("content_type") + .and_then(|v| v.as_str()) + .unwrap_or("all"); + + log::debug!("[smart_walk] list_sources type={}", content_type); + let args_summary = format!("type={}", content_type); + + let mut listing = Vec::new(); + + let types_to_scan: Vec<&str> = match content_type { + "all" => vec!["raw", "wiki", "document", "episodic"], + t => vec![t], + }; + + for ctype in types_to_scan { + let dir = content_root.join(ctype); + if !dir.exists() { + listing.push(format!(" {ctype}/: (empty)")); + continue; + } + + match std::fs::read_dir(&dir) { + Ok(entries) => { + let mut subdirs: Vec = entries + .flatten() + .filter(|e| e.path().is_dir()) + .filter_map(|e| e.file_name().into_string().ok()) + .collect(); + subdirs.sort(); + + if subdirs.is_empty() { + listing.push(format!(" {ctype}/: (no subdirectories)")); + } else { + let count = subdirs.len(); + let preview: Vec<&str> = subdirs.iter().map(|s| s.as_str()).take(10).collect(); + listing.push(format!( + " {ctype}/ ({count} sources): {}{}", + preview.join(", "), + if count > 10 { ", ..." } else { "" } + )); + } + } + Err(e) => listing.push(format!(" {ctype}/: error: {e}")), + } + } + + ( + args_summary, + format!("Content sources:\n{}", listing.join("\n")), + false, + String::new(), + ) +} + +// ── read_content ───────────────────────────────────────────────────────────── + +pub(crate) fn dispatch_read_content( + content_root: &Path, + call: &InnerCall, +) -> (String, String, bool, String) { + let path_str = call + .args + .get("path") + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_string(); + + if path_str.is_empty() { + return ( + "path=".into(), + "error: read_content requires a non-empty path".into(), + false, + String::new(), + ); + } + + let requested = Path::new(&path_str); + if requested.is_absolute() || path_str.contains("..") { + return ( + format!("path={path_str}"), + "error: path must stay within the content root".into(), + false, + String::new(), + ); + } + + log::debug!("[smart_walk] read_content path={}", path_str); + + let full_path = content_root.join(requested); + if !full_path.exists() { + return ( + format!("path={path_str}"), + format!("file not found: {path_str}"), + false, + String::new(), + ); + } + + let canonical_root = match content_root.canonicalize() { + Ok(p) => p, + Err(e) => { + return ( + format!("path={path_str}"), + format!("error resolving content root: {e}"), + false, + String::new(), + ); + } + }; + let canonical_path = match full_path.canonicalize() { + Ok(p) => p, + Err(e) => { + return ( + format!("path={path_str}"), + format!("error resolving path: {e}"), + false, + String::new(), + ); + } + }; + if !canonical_path.starts_with(&canonical_root) { + return ( + format!("path={path_str}"), + "error: path escapes content root".into(), + false, + String::new(), + ); + } + + match std::fs::read_to_string(&canonical_path) { + Ok(content) => { + let truncated: String = content.chars().take(MAX_FILE_READ_BYTES).collect(); + let was_truncated = content.len() > MAX_FILE_READ_BYTES; + let suffix = if was_truncated { + format!("\n\n[...truncated, {} total chars]", content.len()) + } else { + String::new() + }; + ( + format!("path={path_str}"), + format!("{truncated}{suffix}"), + false, + String::new(), + ) + } + Err(e) => ( + format!("path={path_str}"), + format!("error reading: {e}"), + false, + String::new(), + ), + } +} + +// ── browse_tree ────────────────────────────────────────────────────────────── + +async fn dispatch_browse_tree( + config: &Config, + namespace: &str, + call: &InnerCall, +) -> (String, String, bool, String) { + let node_id = call + .args + .get("node_id") + .and_then(|v| v.as_str()) + .unwrap_or("root") + .to_string(); + + log::debug!("[smart_walk] browse_tree node_id={}", node_id); + + let config_owned = config.clone(); + let ns_owned = namespace.to_string(); + let id_owned = node_id.clone(); + + let result = tokio::task::spawn_blocking(move || { + let node = match read_node(&config_owned, &ns_owned, &id_owned) { + Ok(Some(n)) => n, + Ok(None) => return format!("unknown node: {id_owned}"), + Err(e) => return format!("error reading node {id_owned}: {e}"), + }; + + let children = match read_children(&config_owned, &ns_owned, &id_owned) { + Ok(c) => c, + Err(_) => vec![], + }; + + let mut out = format!( + "Node: {} (level={:?})\nSummary: {}\n", + node.node_id, node.level, node.summary + ); + + if children.is_empty() { + out.push_str("Children: (none — leaf node)\n"); + } else { + out.push_str(&format!("Children ({}):\n", children.len())); + for c in &children { + let preview: String = c.summary.chars().take(100).collect(); + out.push_str(&format!( + " - id={} level={:?}: {}\n", + c.node_id, c.level, preview + )); + } + } + out + }) + .await + .unwrap_or_else(|_| format!("error building context for node {node_id}")); + + (format!("node_id={node_id}"), result, false, String::new()) +} + +// ── vector_search ──────────────────────────────────────────────────────────── + +async fn dispatch_vector_search( + config: &Config, + call: &InnerCall, +) -> (String, String, bool, String) { + use crate::openhuman::memory::query::smart_walk::types::truncate_chars; + + let query = call + .args + .get("query") + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_string(); + + let source_kind = call + .args + .get("source_kind") + .and_then(|v| v.as_str()) + .and_then(|s| match s { + "chat" => Some(SourceKind::Chat), + "email" => Some(SourceKind::Email), + "document" => Some(SourceKind::Document), + _ => None, + }); + + let time_window_days = call + .args + .get("time_window_days") + .and_then(|v| v.as_u64()) + .map(|n| n as u32); + + if query.is_empty() { + return ( + "query=".into(), + "error: vector_search requires a non-empty query".into(), + false, + String::new(), + ); + } + + log::debug!( + "[smart_walk] vector_search query={} source_kind={:?} window_days={:?}", + query, + source_kind, + time_window_days + ); + let args_summary = format!( + "query=\"{}\" kind={:?} window={:?}", + truncate_chars(&query, 40), + source_kind, + time_window_days + ); + + match retrieval::query_source( + config, + None, + source_kind, + time_window_days, + Some(&query), + 10, + ) + .await + { + Ok(resp) => { + if resp.hits.is_empty() { + ( + args_summary, + format!("no vector matches for \"{}\"", query), + false, + String::new(), + ) + } else { + let formatted: Vec = resp + .hits + .iter() + .map(|h| { + let preview: String = h.content.chars().take(120).collect(); + format!(" [{}] (score={:.2}) {}", h.node_id, h.score, preview) + }) + .collect(); + ( + args_summary, + format!( + "{} semantic matches:\n{}", + formatted.len(), + formatted.join("\n") + ), + false, + String::new(), + ) + } + } + Err(e) => ( + args_summary, + format!("vector search error: {e}"), + false, + String::new(), + ), + } +} + +// ── collect_evidence ───────────────────────────────────────────────────────── + +pub(crate) fn dispatch_collect_evidence( + call: &InnerCall, + evidence: &mut Vec, +) -> (String, String, bool, String) { + let items = call + .args + .get("items") + .and_then(|v| v.as_array()) + .cloned() + .unwrap_or_default(); + + if items.is_empty() { + return ( + "items=[]".into(), + "error: collect_evidence requires non-empty items array".into(), + false, + String::new(), + ); + } + + let mut added = 0; + for item in &items { + if evidence.len() >= MAX_EVIDENCE_ITEMS { + break; + } + let source_path = item + .get("source") + .and_then(|v| v.as_str()) + .unwrap_or("unknown") + .to_string(); + let snippet = item + .get("snippet") + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_string(); + let relevance = item + .get("relevance") + .and_then(|v| v.as_str()) + .unwrap_or("relevant") + .to_string(); + + if !snippet.is_empty() { + evidence.push(Evidence { + source_path, + snippet, + relevance, + }); + added += 1; + } + } + + log::debug!( + "[smart_walk] collect_evidence added={} total={}", + added, + evidence.len() + ); + + ( + format!("{added} items"), + format!( + "collected {added} evidence items (total: {})", + evidence.len() + ), + false, + String::new(), + ) +} + +// ── answer ─────────────────────────────────────────────────────────────────── + +pub(crate) fn dispatch_answer(call: &InnerCall) -> (String, String, bool, String) { + let text = call + .args + .get("text") + .and_then(|v| v.as_str()) + .unwrap_or("") + .to_string(); + + log::debug!("[smart_walk] answer text_len={}", text.len()); + ("(final answer)".into(), text.clone(), true, text) +} diff --git a/src/openhuman/memory/query/smart_walk/mod.rs b/src/openhuman/memory/query/smart_walk/mod.rs new file mode 100644 index 0000000000..7c896fa64c --- /dev/null +++ b/src/openhuman/memory/query/smart_walk/mod.rs @@ -0,0 +1,29 @@ +//! E2GraphRAG-inspired smart memory retrieval. +//! +//! Unlike the basic `walk` module which only navigates the time-based summary +//! tree, smart_walk combines multiple retrieval strategies: +//! +//! 1. **Vector search** — semantic similarity across all stored content +//! 2. **Keyword search** — pattern matching across raw content files on disk +//! 3. **Entity search** — find entities and follow relationships +//! 4. **Tree browse** — navigate wiki summary hierarchies +//! 5. **Content read** — read specific files (raw/wiki/document/episodic) +//! 6. **Source listing** — discover available sources and content types +//! +//! The walker LLM (defaulting to DeepSeek Flash) plans which strategies to +//! use, collects evidence snippets, then synthesizes a cited answer. + +mod dispatch; +mod prompts; +mod runner; +mod tool; +pub mod types; + +#[cfg(test)] +mod smart_walk_tests; + +// ── Public re-exports ──────────────────────────────────────────────────────── + +pub use runner::run_smart_walk; +pub use tool::SmartMemoryWalkTool; +pub use types::{Evidence, SmartWalkOptions, SmartWalkOutcome, SmartWalkStep, SmartWalkStopReason}; diff --git a/src/openhuman/memory/query/smart_walk/prompts.rs b/src/openhuman/memory/query/smart_walk/prompts.rs new file mode 100644 index 0000000000..6bdc359378 --- /dev/null +++ b/src/openhuman/memory/query/smart_walk/prompts.rs @@ -0,0 +1,292 @@ +//! Prompt construction, content inventory, model resolution, tool-call parsing, +//! and fallback synthesis for smart_walk. + +use crate::openhuman::config::Config; +use crate::openhuman::memory::query::smart_walk::types::{truncate_chars, Evidence, SmartWalkStep}; +use std::path::Path; + +// ── Inner call type (used by parser and dispatch) ─────────────────────────── + +#[derive(Clone)] +pub(crate) struct InnerCall { + pub(crate) name: String, + pub(crate) args: serde_json::Value, +} + +// ── System prompt ──────────────────────────────────────────────────────────── + +pub(crate) fn build_system_prompt() -> String { + r#"You are a smart memory retrieval agent. Your task is to answer queries by +searching through a user's personal memory — which includes raw files (emails, +chats, commits, documents), wiki summaries, episodic conversation memories, +and document archives. + +## Strategy + +Use a multi-strategy approach inspired by graph-based retrieval: + +1. **Start broad**: Use `list_sources` to understand what content is available, + then `keyword_search` or `vector_search` to find relevant starting points. + +2. **Follow connections**: When you find a relevant entity or topic, use + `entity_search` to find related entities and follow the connections. + +3. **Drill into details**: Use `read_content` to read specific files for + full context. Use `browse_tree` to navigate wiki summary hierarchies. + +4. **Collect evidence**: As you find relevant information, use `collect_evidence` + to save snippets. This builds your citation buffer for the final answer. + +5. **Synthesize**: When you have enough evidence, use `answer` to provide a + comprehensive response with citations. + +## Rules + +- Be efficient: don't re-search for things you already found. +- Prefer vector_search for semantic/conceptual queries. +- Prefer keyword_search for specific names, IDs, or exact phrases. +- Use entity_search when the query mentions people, projects, or organizations. +- Always collect_evidence before answering, so your answer has citations. +- Use tags with JSON content for actions. Format: + {"name":"tool_name","arguments":{"param":"value"}} +- You can call multiple tools in one turn by including multiple blocks. + +## Example turn + +I'll search for recent emails about the project. + +{"name":"list_sources","arguments":{"content_type":"all"}} +{"name":"keyword_search","arguments":{"pattern":"project","content_type":"raw"}} +"# + .into() +} + +pub(crate) fn build_inner_tools_text() -> String { + r#"## Available tools + +**keyword_search** `{"pattern": "", "content_type": "all|raw|wiki|document|episodic"}` +Search for a text pattern (case-insensitive) across memory files. Returns matching file paths and line previews. + +**vector_search** `{"query": "", "source_kind": "chat|email|document", "time_window_days": 30}` +Semantic similarity search over indexed summaries. All params except query are optional. + +**entity_search** `{"query": "", "kinds": ["person", "email", "url", "handle"]}` +Find entities (people, emails, URLs, handles) in the entity index. kinds is optional. + +**list_sources** `{"content_type": "all|raw|wiki|document|episodic"}` +List available content sources and their subdirectories. + +**read_content** `{"path": ""}` +Read a specific content file. Path is relative to the content root (e.g. "raw/github-com-example/commits/123.md"). + +**browse_tree** `{"node_id": "root"}` +Navigate the wiki summary tree. Returns node summary and children. Use "root" to start. + +**collect_evidence** `{"items": [{"source": "", "snippet": "", "relevance": ""}]}` +Save evidence snippets for citation in your final answer. Call this as you find relevant information. + +**answer** `{"text": ""}` +Return your final answer. Reference collected evidence by source path."# + .into() +} + +// ── Content inventory ──────────────────────────────────────────────────────── + +pub(crate) fn build_content_inventory(content_root: &Path) -> String { + let mut parts = Vec::new(); + + for (label, subdir) in &[ + ("Raw content", "raw"), + ("Wiki summaries", "wiki"), + ("Documents", "document"), + ("Episodic memories", "episodic"), + ] { + let dir = content_root.join(subdir); + if dir.exists() { + let count = count_files_recursive(&dir); + if count > 0 { + parts.push(format!("- **{label}** ({subdir}/): {count} files")); + } + } + } + + if parts.is_empty() { + "No content files found.".into() + } else { + parts.join("\n") + } +} + +pub(crate) fn count_files_recursive(dir: &Path) -> usize { + let mut count = 0; + if let Ok(entries) = std::fs::read_dir(dir) { + for entry in entries.flatten() { + let path = entry.path(); + if path.is_dir() { + count += count_files_recursive(&path); + } else if path.extension().map_or(false, |e| e == "md") { + count += 1; + } + } + } + count +} + +// ── Model resolution ───────────────────────────────────────────────────────── + +const DEFAULT_SMART_WALK_MODEL: &str = "hint:summarization"; + +pub(crate) fn resolve_walk_model(config: &Config) -> String { + // 1. Explicit smart_walk_model config takes priority + if let Some(ref swm) = config.memory_tree.smart_walk_model { + if !swm.is_empty() { + return swm.clone(); + } + } + // 2. Default to summarization-v1 (routed through the OpenHuman backend) + DEFAULT_SMART_WALK_MODEL.to_string() +} + +// ── Tool call parser ───────────────────────────────────────────────────────── + +pub(crate) fn parse_tool_calls(response: &str) -> (String, Vec) { + let mut calls: Vec = Vec::new(); + let mut text_parts: Vec<&str> = Vec::new(); + let mut remaining: &str = response; + + const OPEN: &str = ""; + const CLOSE: &str = ""; + + loop { + match remaining.find(OPEN) { + None => { + if !remaining.trim().is_empty() && calls.is_empty() { + text_parts.push(remaining); + } + break; + } + Some(start) => { + let before = &remaining[..start]; + if !before.trim().is_empty() { + text_parts.push(before); + } + let after_open = &remaining[start + OPEN.len()..]; + match after_open.find(CLOSE) { + None => break, + Some(close_idx) => { + let inner = after_open[..close_idx].trim(); + if let Some(call) = parse_single_tool_call(inner) { + calls.push(call); + } + remaining = &after_open[close_idx + CLOSE.len()..]; + } + } + } + } + } + + let text_before = text_parts.concat(); + (text_before, calls) +} + +fn parse_single_tool_call(inner: &str) -> Option { + // Primary: JSON format {"name":"...","arguments":{...}} + if let Ok(val) = serde_json::from_str::(inner) { + if let Some(name) = val.get("name").and_then(|v| v.as_str()) { + let args = val + .get("arguments") + .cloned() + .unwrap_or(serde_json::Value::Object(Default::default())); + log::debug!( + "[smart_walk::parse_single_tool_call] json path: tool={} args_keys={}", + name, + args.as_object().map(|m| m.len()).unwrap_or(0) + ); + return Some(InnerCall { + name: name.to_string(), + args, + }); + } + } + // Fallback: XML-style nameJSON + if let (Some(name), args) = ( + extract_xml_tag(inner, "tool_name"), + extract_xml_tag(inner, "parameters"), + ) { + log::debug!( + "[smart_walk::parse_single_tool_call] xml fallback path: tool={} has_params={}", + name.trim(), + args.is_some() + ); + let parsed_args = args + .and_then(|a| serde_json::from_str::(a.trim()).ok()) + .unwrap_or_else(|| { + let mut map = serde_json::Map::new(); + for line in inner.lines() { + let trimmed = line.trim(); + if trimmed.starts_with('<') + && !trimmed.starts_with("') { + let tag = &trimmed[1..tag_end]; + if let Some(close) = trimmed.find(&format!("")) { + let value = &trimmed[tag_end + 1..close]; + map.insert( + tag.to_string(), + serde_json::Value::String(value.to_string()), + ); + } + } + } + } + serde_json::Value::Object(map) + }); + return Some(InnerCall { + name: name.trim().to_string(), + args: parsed_args, + }); + } + None +} + +fn extract_xml_tag<'a>(text: &'a str, tag: &str) -> Option<&'a str> { + let open = format!("<{tag}>"); + let close = format!(""); + let start = text.find(&open)? + open.len(); + let end = text[start..].find(&close)? + start; + Some(&text[start..end]) +} + +// ── Fallback synthesis ─────────────────────────────────────────────────────── + +pub(crate) fn synthesize_fallback(trace: &[SmartWalkStep], evidence: &[Evidence]) -> String { + let mut out = String::new(); + + if !evidence.is_empty() { + out.push_str("Based on the evidence collected:\n\n"); + for (i, ev) in evidence.iter().enumerate() { + out.push_str(&format!( + "{}. [{}] {}: {}\n", + i + 1, + ev.source_path, + ev.relevance, + truncate_chars(&ev.snippet, 150) + )); + } + } else if !trace.is_empty() { + out.push_str("Could not converge on an answer. Steps taken:\n\n"); + for s in trace { + out.push_str(&format!( + "- Turn {}: {} → {}\n", + s.turn, + s.action, + truncate_chars(&s.result_preview, 100) + )); + } + } else { + out.push_str("Could not converge on an answer — no steps taken."); + } + out +} diff --git a/src/openhuman/memory/query/smart_walk/runner.rs b/src/openhuman/memory/query/smart_walk/runner.rs new file mode 100644 index 0000000000..33b3bfb240 --- /dev/null +++ b/src/openhuman/memory/query/smart_walk/runner.rs @@ -0,0 +1,182 @@ +//! Main agentic loop for smart_walk. + +use crate::openhuman::config::Config; +use crate::openhuman::inference::provider::traits::{ChatMessage, Provider}; +use crate::openhuman::memory::query::smart_walk::dispatch::dispatch_call; +use crate::openhuman::memory::query::smart_walk::prompts::{ + build_content_inventory, build_inner_tools_text, build_system_prompt, parse_tool_calls, + resolve_walk_model, synthesize_fallback, +}; +use crate::openhuman::memory::query::smart_walk::types::{ + truncate_chars, Evidence, SmartWalkOptions, SmartWalkOutcome, SmartWalkStep, + SmartWalkStopReason, HARD_MAX_TURNS, SMART_WALK_TEMP, +}; + +pub async fn run_smart_walk( + config: &Config, + provider: &dyn Provider, + query: &str, + opts: SmartWalkOptions, +) -> anyhow::Result { + let max_turns = opts.max_turns.min(HARD_MAX_TURNS); + let model = opts + .model + .clone() + .unwrap_or_else(|| resolve_walk_model(config)); + + let content_root = opts + .content_root + .clone() + .unwrap_or_else(|| config.memory_tree_content_root()); + + log::debug!( + "[smart_walk] starting query_len={} namespace={} max_turns={} model={} content_root={}", + query.len(), + opts.namespace, + max_turns, + model, + content_root.display() + ); + + let system = build_system_prompt(); + let inner_tools = build_inner_tools_text(); + + let cr = content_root.clone(); + let inventory = tokio::task::spawn_blocking(move || build_content_inventory(&cr)) + .await + .unwrap_or_else(|_| "error building content inventory".into()); + + let mut history: Vec = vec![ + ChatMessage::system(format!("{system}\n\n{inner_tools}")), + ChatMessage::user(format!( + "Query: {query}\n\n## Available content\n{inventory}" + )), + ]; + + let mut trace: Vec = Vec::new(); + let mut evidence: Vec = Vec::new(); + + for turn in 1..=max_turns { + log::debug!("[smart_walk] turn={turn} evidence_count={}", evidence.len()); + + let response = match provider + .chat_with_history(&history, &model, SMART_WALK_TEMP) + .await + { + Ok(r) => r, + Err(e) => { + log::warn!("[smart_walk] provider error on turn={turn}: {e:#}"); + let err_msg = format!("Provider error on turn {turn}: {e}"); + return Ok(SmartWalkOutcome { + answer: format!( + "Walk failed: {err_msg}\n\nPartial from {} turn(s).", + trace.len() + ), + evidence, + trace, + turns_used: turn, + stopped_reason: SmartWalkStopReason::Error(err_msg), + }); + } + }; + + log::debug!("[smart_walk] turn={turn} response_len={}", response.len()); + + let (text_before, calls) = parse_tool_calls(&response); + + if calls.is_empty() { + let trimmed = response.trim().to_string(); + if trimmed.is_empty() { + log::debug!("[smart_walk] turn={turn} LLM gave up (empty response)"); + return Ok(SmartWalkOutcome { + answer: synthesize_fallback(&trace, &evidence), + evidence, + trace, + turns_used: turn, + stopped_reason: SmartWalkStopReason::LlmGaveUp, + }); + } + log::debug!("[smart_walk] turn={turn} no tool calls — treating as answer"); + return Ok(SmartWalkOutcome { + answer: trimmed, + evidence, + trace, + turns_used: turn, + stopped_reason: SmartWalkStopReason::Answered, + }); + } + + history.push(ChatMessage::assistant(response.clone())); + + // Process ALL tool calls in this turn (not just the first). + let mut combined_results = Vec::new(); + for call in &calls { + log::debug!( + "[smart_walk] turn={turn} action={} args={}", + call.name, + call.args + ); + + let (args_summary, tool_result, is_answer, answer_text) = + dispatch_call(config, &opts.namespace, &content_root, call, &mut evidence).await; + + let result_preview: String = tool_result.chars().take(200).collect(); + trace.push(SmartWalkStep { + turn, + action: call.name.clone(), + args_summary, + result_preview: result_preview.clone(), + }); + + if is_answer { + log::debug!("[smart_walk] turn={turn} answer action — stopping"); + return Ok(SmartWalkOutcome { + answer: answer_text, + evidence, + trace, + turns_used: turn, + stopped_reason: SmartWalkStopReason::Answered, + }); + } + + combined_results.push(format!( + "{}", + call.name, tool_result + )); + } + + let evidence_summary = if evidence.is_empty() { + String::new() + } else { + format!( + "\n\nEvidence collected so far ({} items):\n{}", + evidence.len(), + evidence + .iter() + .enumerate() + .map(|(i, e)| format!(" {}. [{}] {}", i + 1, e.source_path, e.relevance)) + .collect::>() + .join("\n") + ) + }; + + let result_msg = format!("{}{}", combined_results.join("\n"), evidence_summary); + history.push(ChatMessage::user(result_msg)); + + if !text_before.trim().is_empty() { + log::debug!( + "[smart_walk] turn={turn} text before tool calls: {}", + truncate_chars(&text_before, 80) + ); + } + } + + log::debug!("[smart_walk] max_turns={max_turns} reached"); + Ok(SmartWalkOutcome { + answer: synthesize_fallback(&trace, &evidence), + evidence, + trace, + turns_used: max_turns, + stopped_reason: SmartWalkStopReason::MaxTurnsReached, + }) +} diff --git a/src/openhuman/memory/query/smart_walk/smart_walk_tests.rs b/src/openhuman/memory/query/smart_walk/smart_walk_tests.rs new file mode 100644 index 0000000000..b851303c80 --- /dev/null +++ b/src/openhuman/memory/query/smart_walk/smart_walk_tests.rs @@ -0,0 +1,693 @@ +//! Tests for the smart_walk module. + +#[cfg(test)] +mod tests { + use crate::openhuman::config::Config; + use crate::openhuman::inference::provider::traits::{ChatMessage, Provider}; + use crate::openhuman::memory::query::smart_walk::dispatch::{ + dispatch_keyword_search, dispatch_list_sources, dispatch_read_content, search_dir_recursive, + }; + use crate::openhuman::memory::query::smart_walk::prompts::{ + build_content_inventory, parse_tool_calls, InnerCall, + }; + use crate::openhuman::memory::query::smart_walk::runner::run_smart_walk; + use crate::openhuman::memory::query::smart_walk::types::{ + SmartWalkOptions, SmartWalkStopReason, + }; + use async_trait::async_trait; + use std::path::Path; + use std::sync::Mutex; + use tempfile::TempDir; + + struct StubProvider { + responses: Mutex>, + } + + impl StubProvider { + fn new(responses: Vec<&str>) -> Self { + Self { + responses: Mutex::new(responses.into_iter().map(|s| s.to_string()).collect()), + } + } + } + + #[async_trait] + impl Provider for StubProvider { + async fn chat_with_system( + &self, + _system: Option<&str>, + _message: &str, + _model: &str, + _temp: f64, + ) -> anyhow::Result { + let mut responses = self.responses.lock().unwrap(); + if responses.is_empty() { + return Err(anyhow::anyhow!("StubProvider: no more responses")); + } + Ok(responses.remove(0)) + } + + async fn chat_with_history( + &self, + _messages: &[ChatMessage], + _model: &str, + _temp: f64, + ) -> anyhow::Result { + let mut responses = self.responses.lock().unwrap(); + if responses.is_empty() { + return Err(anyhow::anyhow!("StubProvider: no more responses")); + } + Ok(responses.remove(0)) + } + } + + fn test_config(tmp: &TempDir) -> Config { + let mut cfg = Config::default(); + cfg.workspace_dir = tmp.path().join("workspace"); + std::fs::create_dir_all(&cfg.workspace_dir).unwrap(); + cfg + } + + fn seed_content(content_root: &Path) { + let raw_dir = content_root.join("raw").join("test-source").join("commits"); + std::fs::create_dir_all(&raw_dir).unwrap(); + std::fs::write( + raw_dir.join("123_abc.md"), + "---\nsource_kind: document\n---\n# Test Commit\nFixed the login bug in auth module.\n", + ) + .unwrap(); + + let doc_dir = content_root.join("document").join("test-doc"); + std::fs::create_dir_all(&doc_dir).unwrap(); + std::fs::write( + doc_dir.join("readme.md"), + "---\nsource_kind: document\n---\n# README\nProject documentation for the auth system.\n", + ) + .unwrap(); + + let wiki_dir = content_root + .join("wiki") + .join("summaries") + .join("source-test"); + std::fs::create_dir_all(wiki_dir.join("L1")).unwrap(); + std::fs::write( + wiki_dir.join("L1").join("summary-001.md"), + "---\nkind: summary\nlevel: 1\n---\nSummary of auth changes in May 2026.\n", + ) + .unwrap(); + } + + #[tokio::test] + async fn smart_walk_keyword_search_and_answer() { + let tmp = TempDir::new().unwrap(); + let cfg = test_config(&tmp); + let content_root = cfg.workspace_dir.join("memory_tree").join("content"); + seed_content(&content_root); + + let provider = StubProvider::new(vec![ + // Turn 1: keyword search for "login" + r#"{"name":"keyword_search","arguments":{"pattern":"login","content_type":"all"}}"#, + // Turn 2: read the matching file + r#"{"name":"read_content","arguments":{"path":"raw/test-source/commits/123_abc.md"}}"#, + // Turn 3: collect evidence and answer + r#"{"name":"collect_evidence","arguments":{"items":[{"source":"raw/test-source/commits/123_abc.md","snippet":"Fixed the login bug in auth module.","relevance":"directly mentions login fix"}]}} +{"name":"answer","arguments":{"text":"The login bug was fixed in the auth module, as documented in commit 123_abc."}}"#, + ]); + + let opts = SmartWalkOptions { + max_turns: 10, + namespace: "default".into(), + model: Some("test-model".into()), + content_root: Some(content_root), + }; + + let outcome = run_smart_walk(&cfg, &provider, "What happened with the login bug?", opts) + .await + .unwrap(); + + assert_eq!(outcome.stopped_reason, SmartWalkStopReason::Answered); + assert!(outcome.answer.contains("login")); + assert_eq!(outcome.evidence.len(), 1); + assert!(outcome.evidence[0].snippet.contains("login bug")); + } + + #[tokio::test] + async fn smart_walk_list_sources() { + let tmp = TempDir::new().unwrap(); + let cfg = test_config(&tmp); + let content_root = cfg.workspace_dir.join("memory_tree").join("content"); + seed_content(&content_root); + + let provider = StubProvider::new(vec![ + // Turn 1: list sources + r#"{"name":"list_sources","arguments":{"content_type":"all"}}"#, + // Turn 2: answer + r#"{"name":"answer","arguments":{"text":"Found raw, document, and wiki content."}}"#, + ]); + + let opts = SmartWalkOptions { + max_turns: 5, + namespace: "default".into(), + model: Some("test-model".into()), + content_root: Some(content_root), + }; + + let outcome = run_smart_walk(&cfg, &provider, "What sources are available?", opts) + .await + .unwrap(); + + assert_eq!(outcome.stopped_reason, SmartWalkStopReason::Answered); + assert!(outcome.answer.contains("raw")); + } + + #[tokio::test] + async fn smart_walk_max_turns() { + let tmp = TempDir::new().unwrap(); + let cfg = test_config(&tmp); + let content_root = cfg.workspace_dir.join("memory_tree").join("content"); + seed_content(&content_root); + + let provider = StubProvider::new(vec![ + r#"{"name":"list_sources","arguments":{"content_type":"all"}}"#, + r#"{"name":"list_sources","arguments":{"content_type":"raw"}}"#, + r#"{"name":"list_sources","arguments":{"content_type":"wiki"}}"#, + ]); + + let opts = SmartWalkOptions { + max_turns: 3, + namespace: "default".into(), + model: Some("test-model".into()), + content_root: Some(content_root), + }; + + let outcome = run_smart_walk(&cfg, &provider, "loop test", opts) + .await + .unwrap(); + + assert_eq!(outcome.stopped_reason, SmartWalkStopReason::MaxTurnsReached); + assert_eq!(outcome.turns_used, 3); + } + + #[test] + fn parse_multiple_tool_calls() { + let response = r#"Let me search. +{"name":"keyword_search","arguments":{"pattern":"test"}} +{"name":"entity_search","arguments":{"query":"Alice"}}"#; + + let (text, calls) = parse_tool_calls(response); + assert_eq!(calls.len(), 2); + assert_eq!(calls[0].name, "keyword_search"); + assert_eq!(calls[1].name, "entity_search"); + assert!(text.contains("Let me search")); + } + + #[test] + fn content_inventory_counts_files() { + let tmp = TempDir::new().unwrap(); + let content_root = tmp.path().join("content"); + seed_content(&content_root); + + let inventory = build_content_inventory(&content_root); + assert!(inventory.contains("Raw content")); + assert!(inventory.contains("Documents")); + assert!(inventory.contains("Wiki summaries")); + } + + // ── Staging integration tests (run with --ignored) ──────────────── + + fn staging_content_root() -> Option { + let path = std::path::PathBuf::from( + "/Users/enamakel/.openhuman-staging/users/69d9cb73e61f755583c3671f/workspace/memory_tree/content", + ); + if path.exists() { + Some(path) + } else { + None + } + } + + #[test] + #[ignore] + fn staging_keyword_search_finds_steven() { + let content_root = staging_content_root().expect("staging content not available"); + let mut results = Vec::new(); + search_dir_recursive( + &content_root.join("raw"), + "steven", + &mut results, + &content_root, + ); + println!("keyword 'steven': {} results", results.len()); + for r in results.iter().take(5) { + println!(" {}", r); + } + assert!( + !results.is_empty(), + "should find 'steven' in staging raw content" + ); + } + + #[test] + #[ignore] + fn staging_content_inventory() { + let content_root = staging_content_root().expect("staging content not available"); + let inventory = build_content_inventory(&content_root); + println!("Inventory:\n{}", inventory); + assert!(inventory.contains("Raw content")); + assert!(inventory.contains("Documents")); + } + + #[test] + #[ignore] + fn staging_list_sources_shows_github() { + let content_root = staging_content_root().expect("staging content not available"); + let call = InnerCall { + name: "list_sources".into(), + args: serde_json::json!({"content_type": "all"}), + }; + let (_, result, _, _) = dispatch_list_sources(&content_root, &call); + println!("list_sources:\n{}", result); + assert!(result.contains("raw/"), "should list raw sources"); + } + + #[test] + #[ignore] + fn staging_read_wiki_summary() { + let content_root = staging_content_root().expect("staging content not available"); + let wiki_dir = content_root.join("wiki").join("summaries"); + if !wiki_dir.exists() { + println!("no wiki summaries found — skipping"); + return; + } + // Find first summary file + let first = walkdir_first_md(&wiki_dir); + if let Some(path) = first { + let rel = path + .strip_prefix(&content_root) + .unwrap() + .to_string_lossy() + .to_string(); + println!("Reading wiki: {}", rel); + let call = InnerCall { + name: "read_content".into(), + args: serde_json::json!({"path": rel}), + }; + let (_, result, _, _) = dispatch_read_content(&content_root, &call); + println!("Content preview: {}", &result[..result.len().min(300)]); + assert!( + !result.starts_with("error"), + "should read wiki file without error" + ); + } + } + + #[test] + #[ignore] + fn staging_read_episodic_memory() { + let content_root = staging_content_root().expect("staging content not available"); + let ep_dir = content_root.join("episodic"); + if !ep_dir.exists() { + println!("no episodic memories — skipping"); + return; + } + let first = walkdir_first_md(&ep_dir); + if let Some(path) = first { + let rel = path + .strip_prefix(&content_root) + .unwrap() + .to_string_lossy() + .to_string(); + println!("Reading episodic: {}", rel); + let call = InnerCall { + name: "read_content".into(), + args: serde_json::json!({"path": rel}), + }; + let (_, result, _, _) = dispatch_read_content(&content_root, &call); + println!("Content preview: {}", &result[..result.len().min(300)]); + assert!( + !result.starts_with("error"), + "should read episodic file without error" + ); + } + } + + #[test] + #[ignore] + fn staging_full_smart_walk_keyword_pipeline() { + let content_root = staging_content_root().expect("staging content not available"); + + // Simulate the pipeline: list_sources → keyword_search → read_content + let call = InnerCall { + name: "list_sources".into(), + args: serde_json::json!({"content_type": "raw"}), + }; + let (_, sources, _, _) = dispatch_list_sources(&content_root, &call); + println!("Step 1 - Sources:\n{}", sources); + + let call = InnerCall { + name: "keyword_search".into(), + args: serde_json::json!({"pattern": "memory", "content_type": "all"}), + }; + let (_, search_result, _, _) = dispatch_keyword_search(&content_root, &call); + println!("Step 2 - Search 'memory':\n{}", search_result); + + if search_result.contains('[') { + // Extract first file path from results + if let Some(path_start) = search_result.find('[') { + if let Some(path_end) = search_result[path_start + 1..].find(']') { + let file_path = &search_result[path_start + 1..path_start + 1 + path_end]; + println!("Step 3 - Reading: {}", file_path); + let call = InnerCall { + name: "read_content".into(), + args: serde_json::json!({"path": file_path}), + }; + let (_, content, _, _) = dispatch_read_content(&content_root, &call); + println!( + "Step 3 - Content ({} chars): {}", + content.len(), + &content[..content.len().min(200)] + ); + assert!( + !content.starts_with("error"), + "pipeline should complete without errors" + ); + } + } + } + } + + fn walkdir_first_md(dir: &std::path::Path) -> Option { + fn recurse(dir: &std::path::Path) -> Option { + for entry in std::fs::read_dir(dir).ok()?.flatten() { + let path = entry.path(); + if path.is_dir() { + if let Some(found) = recurse(&path) { + return Some(found); + } + } else if path.extension().map_or(false, |e| e == "md") { + return Some(path); + } + } + None + } + recurse(dir) + } + + fn seed_synced_memory(content_root: &Path) { + // Raw email content + let email_dir = content_root.join("raw").join("email").join("inbox"); + std::fs::create_dir_all(&email_dir).unwrap(); + std::fs::write( + email_dir.join("001_meeting.md"), + "---\nsource_kind: email\nauthor: alice@example.com\ndate: 2026-06-01\n---\n\ + # Team standup notes\n\n\ + Action items:\n\ + - Deploy the auth service refactor by Friday\n\ + - Review PR #342 for the billing module\n\ + - Schedule security audit with external team\n", + ) + .unwrap(); + std::fs::write( + email_dir.join("002_project.md"), + "---\nsource_kind: email\nauthor: bob@example.com\ndate: 2026-06-02\n---\n\ + # Project Phoenix status update\n\n\ + The migration is 80% complete. Remaining:\n\ + - Database schema changes (blocked on DBA review)\n\ + - API versioning for backward compatibility\n\ + - Load testing the new endpoints\n", + ) + .unwrap(); + std::fs::write( + email_dir.join("003_personal.md"), + "---\nsource_kind: email\nauthor: carol@example.com\ndate: 2026-06-03\n---\n\ + # Lunch plans\n\n\ + Hey, want to grab sushi on Thursday? The new place on 5th street \ + got great reviews.\n", + ) + .unwrap(); + + // Episodic memories + let ep_dir = content_root.join("episodic").join("daily"); + std::fs::create_dir_all(&ep_dir).unwrap(); + std::fs::write( + ep_dir.join("2026-06-01.md"), + "---\nkind: episodic\ndate: 2026-06-01\n---\n\ + Worked on the auth service refactor. Had a productive standup.\n\ + Identified three blockers for Project Phoenix.\n", + ) + .unwrap(); + + // Wiki summaries + let wiki_dir = content_root + .join("wiki") + .join("summaries") + .join("email-inbox"); + std::fs::create_dir_all(wiki_dir.join("L1")).unwrap(); + std::fs::write( + wiki_dir.join("L1").join("summary-week-22.md"), + "---\nkind: summary\nlevel: 1\n---\n\ + Week 22 summary: Team focused on Project Phoenix migration \ + and auth service refactor. Key contacts: alice@example.com (standup), \ + bob@example.com (project status), carol@example.com (social).\n", + ) + .unwrap(); + + // Document content + let doc_dir = content_root.join("document").join("notes"); + std::fs::create_dir_all(&doc_dir).unwrap(); + std::fs::write( + doc_dir.join("project-phoenix.md"), + "---\nsource_kind: document\n---\n\ + # Project Phoenix\n\n\ + ## Overview\n\ + Migration from legacy monolith to microservices.\n\n\ + ## Status\n\ + Phase 2 of 3 — data migration and API versioning.\n\n\ + ## Key risks\n\ + - Data integrity during cutover\n\ + - Backward compatibility for mobile clients\n", + ) + .unwrap(); + } + + #[tokio::test] + async fn walk_synced_email_with_keyword_and_evidence() { + let tmp = TempDir::new().unwrap(); + let cfg = test_config(&tmp); + let content_root = cfg.workspace_dir.join("memory_tree").join("content"); + seed_synced_memory(&content_root); + + let provider = StubProvider::new(vec![ + // Turn 1: keyword search + r#"{"name":"keyword_search","arguments":{"pattern":"auth service","content_type":"all"}}"#, + // Turn 2: collect evidence + concat!( + r#"{"name":"collect_evidence","arguments":{"items":["#, + r#"{"source":"raw/email/inbox/001_meeting.md","snippet":"Deploy the auth service refactor by Friday","relevance":"action item"}"#, + r#"]}}"#, + ), + // Turn 3: answer + r#"{"name":"answer","arguments":{"text":"The auth service refactor needs to be deployed by Friday."}}"#, + ]); + + let opts = SmartWalkOptions { + max_turns: 5, + namespace: "default".into(), + model: Some("test-model".into()), + content_root: Some(content_root), + }; + + let outcome = run_smart_walk( + &cfg, + &provider, + "What's happening with the auth service?", + opts, + ) + .await + .unwrap(); + + assert_eq!(outcome.stopped_reason, SmartWalkStopReason::Answered); + assert!(outcome.answer.contains("auth service")); + assert!(!outcome.evidence.is_empty()); + } + + #[tokio::test] + async fn walk_with_xml_format_tool_calls() { + let tmp = TempDir::new().unwrap(); + let cfg = test_config(&tmp); + let content_root = cfg.workspace_dir.join("memory_tree").join("content"); + seed_synced_memory(&content_root); + + let provider = StubProvider::new(vec![ + // Turn 1: XML-formatted tool call + concat!( + "", + "keyword_search", + "{\"pattern\": \"project phoenix\", \"content_type\": \"all\"}", + "", + ), + // Turn 2: JSON-formatted answer + r#"{"name":"answer","arguments":{"text":"Project Phoenix is in phase 2 of 3."}}"#, + ]); + + let opts = SmartWalkOptions { + max_turns: 5, + namespace: "default".into(), + model: Some("test-model".into()), + content_root: Some(content_root), + }; + + let outcome = run_smart_walk( + &cfg, + &provider, + "What's the status of Project Phoenix?", + opts, + ) + .await + .unwrap(); + + assert_eq!(outcome.stopped_reason, SmartWalkStopReason::Answered); + assert!(outcome.answer.contains("phase 2")); + } + + #[tokio::test] + async fn walk_reads_across_content_types() { + let tmp = TempDir::new().unwrap(); + let cfg = test_config(&tmp); + let content_root = cfg.workspace_dir.join("memory_tree").join("content"); + seed_synced_memory(&content_root); + + let provider = StubProvider::new(vec![ + // Turn 1: list sources + r#"{"name":"list_sources","arguments":{"content_type":"all"}}"#, + // Turn 2: read document + r#"{"name":"read_content","arguments":{"path":"document/notes/project-phoenix.md"}}"#, + // Turn 3: read episodic + r#"{"name":"read_content","arguments":{"path":"episodic/daily/2026-06-01.md"}}"#, + // Turn 4: collect + answer + concat!( + r#"{"name":"collect_evidence","arguments":{"items":["#, + r#"{"source":"document/notes/project-phoenix.md","snippet":"Phase 2 of 3","relevance":"status"},"#, + r#"{"source":"episodic/daily/2026-06-01.md","snippet":"Identified three blockers","relevance":"context"}"#, + r#"]}}"#, + r#"{"name":"answer","arguments":{"text":"Project Phoenix: Phase 2/3 with 3 blockers identified."}}"#, + ), + ]); + + let opts = SmartWalkOptions { + max_turns: 5, + namespace: "default".into(), + model: Some("test-model".into()), + content_root: Some(content_root), + }; + + let outcome = run_smart_walk(&cfg, &provider, "Summarize Project Phoenix status", opts) + .await + .unwrap(); + + assert_eq!(outcome.stopped_reason, SmartWalkStopReason::Answered); + assert_eq!(outcome.evidence.len(), 2); + } + + #[tokio::test] + async fn walk_llm_gives_up_uses_fallback() { + let tmp = TempDir::new().unwrap(); + let cfg = test_config(&tmp); + let content_root = cfg.workspace_dir.join("memory_tree").join("content"); + seed_synced_memory(&content_root); + + let provider = StubProvider::new(vec![ + // Turn 1: search finds nothing + r#"{"name":"keyword_search","arguments":{"pattern":"quantum computing","content_type":"all"}}"#, + // Turn 2: LLM gives up with empty response + "", + ]); + + let opts = SmartWalkOptions { + max_turns: 5, + namespace: "default".into(), + model: Some("test-model".into()), + content_root: Some(content_root), + }; + + let outcome = run_smart_walk(&cfg, &provider, "Tell me about quantum computing", opts) + .await + .unwrap(); + + assert_eq!(outcome.stopped_reason, SmartWalkStopReason::LlmGaveUp); + assert!(outcome.evidence.is_empty()); + assert!(outcome.answer.contains("Could not converge")); + } + + #[tokio::test] + async fn walk_direct_answer_without_tools() { + let tmp = TempDir::new().unwrap(); + let cfg = test_config(&tmp); + let content_root = cfg.workspace_dir.join("memory_tree").join("content"); + seed_synced_memory(&content_root); + + let provider = StubProvider::new(vec![ + // LLM directly answers without using any tools + "I don't have enough context to answer that question from your memory.", + ]); + + let opts = SmartWalkOptions { + max_turns: 5, + namespace: "default".into(), + model: Some("test-model".into()), + content_root: Some(content_root), + }; + + let outcome = run_smart_walk(&cfg, &provider, "What's the meaning of life?", opts) + .await + .unwrap(); + + assert_eq!(outcome.stopped_reason, SmartWalkStopReason::Answered); + assert!(outcome.answer.contains("don't have enough context")); + assert_eq!(outcome.turns_used, 1); + assert!(outcome.evidence.is_empty()); + } + + #[tokio::test] + async fn walk_collect_evidence_deduplicates_within_limit() { + let tmp = TempDir::new().unwrap(); + let cfg = test_config(&tmp); + let content_root = cfg.workspace_dir.join("memory_tree").join("content"); + seed_synced_memory(&content_root); + + let provider = StubProvider::new(vec![ + // Turn 1: collect a batch of evidence + concat!( + r#"{"name":"collect_evidence","arguments":{"items":["#, + r#"{"source":"raw/email/inbox/001_meeting.md","snippet":"Deploy auth","relevance":"task"},"#, + r#"{"source":"raw/email/inbox/002_project.md","snippet":"Migration 80%","relevance":"status"}"#, + r#"]}}"#, + ), + // Turn 2: collect more evidence (including a duplicate of the first source) + concat!( + r#"{"name":"collect_evidence","arguments":{"items":["#, + r#"{"source":"document/notes/project-phoenix.md","snippet":"Phase 2 of 3","relevance":"doc"},"#, + r#"{"source":"raw/email/inbox/001_meeting.md","snippet":"Deploy auth (duplicate)","relevance":"task"}"#, + r#"]}}"#, + ), + // Turn 3: answer + r#"{"name":"answer","arguments":{"text":"Summary with evidence items including duplicate source."}}"#, + ]); + + let opts = SmartWalkOptions { + max_turns: 10, + namespace: "default".into(), + model: Some("test-model".into()), + content_root: Some(content_root), + }; + + let outcome = run_smart_walk(&cfg, &provider, "Summarize everything", opts) + .await + .unwrap(); + + assert_eq!(outcome.stopped_reason, SmartWalkStopReason::Answered); + // 2 items from turn 1 + 2 items from turn 2 (one of which duplicates a turn-1 source); + // collect_evidence does not deduplicate, so all 4 items are present. + assert_eq!(outcome.evidence.len(), 4); + } +} diff --git a/src/openhuman/memory/query/smart_walk/tool.rs b/src/openhuman/memory/query/smart_walk/tool.rs new file mode 100644 index 0000000000..69bfb5c922 --- /dev/null +++ b/src/openhuman/memory/query/smart_walk/tool.rs @@ -0,0 +1,187 @@ +//! `SmartMemoryWalkTool` — the agent-facing tool wrapper, plus the +//! `ChatProviderAdapter` that bridges the memory chat provider to the +//! inference `Provider` trait. + +use crate::openhuman::config::rpc as config_rpc; +use crate::openhuman::inference::provider::traits::{ChatMessage, Provider}; +use crate::openhuman::memory::chat::{build_chat_provider, ChatPrompt}; +use crate::openhuman::memory::query::smart_walk::runner::run_smart_walk; +use crate::openhuman::memory::query::smart_walk::types::{ + truncate_chars, SmartWalkOptions, HARD_MAX_TURNS, +}; +use crate::openhuman::tools::traits::{PermissionLevel, Tool, ToolCategory, ToolResult}; +use async_trait::async_trait; +use serde_json::json; + +// ── Tool ───────────────────────────────────────────────────────────────────── + +pub struct SmartMemoryWalkTool; + +#[async_trait] +impl Tool for SmartMemoryWalkTool { + fn name(&self) -> &str { + "memory_smart_walk" + } + + fn description(&self) -> &str { + "Smart memory retrieval — combines vector search, keyword search, \ + entity lookup, and tree browsing to answer queries about the user's \ + memory. More capable than the basic walk: searches across raw files, \ + wiki summaries, documents, and episodic memories." + } + + fn parameters_schema(&self) -> serde_json::Value { + json!({ + "type": "object", + "properties": { + "query": { + "type": "string", + "description": "Natural-language question to answer by searching memory." + }, + "namespace": { + "type": "string", + "description": "Memory namespace. Default: \"default\"." + }, + "max_turns": { + "type": "integer", + "description": "Max LLM turns. Default 12, hard cap 25." + }, + "model": { + "type": "string", + "description": "Provider:model override (e.g. 'deepseek:deepseek-chat')." + } + }, + "required": ["query"] + }) + } + + fn category(&self) -> ToolCategory { + ToolCategory::System + } + + fn permission_level(&self) -> PermissionLevel { + PermissionLevel::ReadOnly + } + + fn is_concurrency_safe(&self, _args: &serde_json::Value) -> bool { + true + } + + async fn execute(&self, args: serde_json::Value) -> anyhow::Result { + let query = args + .get("query") + .and_then(|v| v.as_str()) + .ok_or_else(|| anyhow::anyhow!("memory_smart_walk: `query` is required"))? + .to_string(); + + let namespace = args + .get("namespace") + .and_then(|v| v.as_str()) + .unwrap_or("default") + .to_string(); + + let max_turns = args + .get("max_turns") + .and_then(|v| v.as_u64()) + .map(|n| (n as usize).min(HARD_MAX_TURNS)) + .unwrap_or(12); + + let model = args + .get("model") + .and_then(|v| v.as_str()) + .map(|s| s.to_string()); + + let cfg = config_rpc::load_config_with_timeout() + .await + .map_err(|e| anyhow::anyhow!("memory_smart_walk: load config failed: {e}"))?; + + let opts = SmartWalkOptions { + max_turns, + namespace, + model, + content_root: None, + }; + + let chat_provider = build_chat_provider(&cfg) + .map_err(|e| anyhow::anyhow!("memory_smart_walk: build chat provider failed: {e}"))?; + let adapter = ChatProviderAdapter { + inner: chat_provider, + }; + + let outcome = run_smart_walk(&cfg, &adapter, &query, opts).await?; + + let mut out = format!("{}\n", outcome.answer); + + if !outcome.evidence.is_empty() { + out.push_str("\n## Evidence\n"); + for (i, ev) in outcome.evidence.iter().enumerate() { + out.push_str(&format!( + "{}. **{}** — {}\n > {}\n", + i + 1, + ev.source_path, + ev.relevance, + truncate_chars(&ev.snippet, 200) + )); + } + } + + out.push_str("\n## Trace\n"); + for step in &outcome.trace { + out.push_str(&format!( + "- **Turn {}** `{}` {}: {}\n", + step.turn, step.action, step.args_summary, step.result_preview + )); + } + out.push_str(&format!( + "\n*Stop reason: {:?}, turns used: {}*\n", + outcome.stopped_reason, outcome.turns_used + )); + + Ok(ToolResult::success(out)) + } +} + +// ── ChatProviderAdapter ─────────────────────────────────────────────────────── + +pub(crate) struct ChatProviderAdapter { + pub(crate) inner: std::sync::Arc, +} + +#[async_trait] +impl Provider for ChatProviderAdapter { + async fn chat_with_system( + &self, + system: Option<&str>, + message: &str, + _model: &str, + temperature: f64, + ) -> anyhow::Result { + let prompt = ChatPrompt { + system: system.unwrap_or("").to_string(), + user: message.to_string(), + temperature, + kind: "memory_smart_walk", + }; + self.inner.chat_for_text(&prompt).await + } + + async fn chat_with_history( + &self, + messages: &[ChatMessage], + model: &str, + temperature: f64, + ) -> anyhow::Result { + let system = messages + .iter() + .find(|m| m.role == "system") + .map(|m| m.content.as_str()); + let user: String = messages + .iter() + .filter(|m| m.role != "system") + .map(|m| m.content.as_str()) + .collect::>() + .join("\n"); + self.chat_with_system(system, &user, model, temperature) + .await + } +} diff --git a/src/openhuman/memory/query/smart_walk/types.rs b/src/openhuman/memory/query/smart_walk/types.rs new file mode 100644 index 0000000000..94e55109bd --- /dev/null +++ b/src/openhuman/memory/query/smart_walk/types.rs @@ -0,0 +1,66 @@ +//! Public output types and shared constants for smart_walk. + +pub(crate) const SMART_WALK_TEMP: f64 = 0.2; +pub(crate) const HARD_MAX_TURNS: usize = 25; +pub(crate) const MAX_EVIDENCE_ITEMS: usize = 30; +pub(crate) const MAX_KEYWORD_RESULTS: usize = 15; +pub(crate) const MAX_FILE_READ_BYTES: usize = 8000; + +pub(crate) fn truncate_chars(value: &str, max_chars: usize) -> String { + value.chars().take(max_chars).collect() +} + +// ── Public output types ───────────────────────────────────────────────────── + +#[derive(Debug, Clone)] +pub struct SmartWalkOptions { + pub max_turns: usize, + pub namespace: String, + /// Provider string override (e.g. "deepseek:deepseek-chat"). + pub model: Option, + /// Content root override. Defaults to config.memory_tree_content_root(). + pub content_root: Option, +} + +impl Default for SmartWalkOptions { + fn default() -> Self { + Self { + max_turns: 12, + namespace: "default".into(), + model: None, + content_root: None, + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum SmartWalkStopReason { + Answered, + MaxTurnsReached, + LlmGaveUp, + Error(String), +} + +#[derive(Debug, Clone)] +pub struct SmartWalkStep { + pub turn: usize, + pub action: String, + pub args_summary: String, + pub result_preview: String, +} + +#[derive(Debug, Clone)] +pub struct Evidence { + pub source_path: String, + pub snippet: String, + pub relevance: String, +} + +#[derive(Debug, Clone)] +pub struct SmartWalkOutcome { + pub answer: String, + pub evidence: Vec, + pub trace: Vec, + pub turns_used: usize, + pub stopped_reason: SmartWalkStopReason, +} diff --git a/src/openhuman/memory/read_rpc.rs b/src/openhuman/memory/read_rpc.rs deleted file mode 100644 index 6a9939abaa..0000000000 --- a/src/openhuman/memory/read_rpc.rs +++ /dev/null @@ -1,2164 +0,0 @@ -//! Read RPCs that back the new Memory tab UI. -//! -//! Distinct from [`super::rpc`] (write/ingest) and [`super::retrieval::rpc`] -//! (LLM-callable retrieval primitives), this module exposes a small set of -//! "list / inspect / search / recall / score-for / delete" methods designed -//! for a human-facing dashboard — not for an LLM tool loop. -//! -//! All methods are scoped under the existing `memory_tree` JSON-RPC -//! namespace so they share authentication, telemetry, and discovery with -//! the other memory-tree RPCs. -//! -//! Coverage: -//! - `memory_tree_list_chunks` — paginated chunk listing with filters -//! - `memory_tree_list_sources` — distinct sources + chunk counts -//! - `memory_tree_search` — keyword search returning chunks -//! - `memory_tree_recall` — semantic recall (via Phase 4 rerank) -//! - `memory_tree_entity_index_for` — entities attached to one chunk -//! - `memory_tree_top_entities` — most-frequent canonical entities -//! - `memory_tree_chunk_score` — score breakdown for one chunk -//! - `memory_tree_delete_chunk` — purge one chunk + dependent rows -//! -//! The `Source.display_name` un-slugs the SQL `source_id` so a UI can show -//! a human-friendly label (e.g. `gmail:enamakel@..|sanil@..` → -//! `Enamakel ↔ Sanil`). When the workspace has surfaced the user's primary -//! email via app_state, we also strip it from the display so the user sees -//! the *other* party. - -use anyhow::{Context, Result}; -use rusqlite::params; -use serde::{Deserialize, Serialize}; -use std::io::Write; - -use crate::openhuman::config::Config; -use crate::openhuman::memory_store::chunks::store::{self as chunk_store, with_connection}; -use crate::openhuman::memory_store::chunks::types::SourceKind; -use crate::openhuman::memory_store::content::obsidian_registry; -use crate::openhuman::memory_store::content::read as content_read; -use crate::openhuman::memory_tree::retrieval::types::NodeKind; -use crate::openhuman::memory_tree::score::store as score_store; -use crate::rpc::RpcOutcome; - -const PREVIEW_MAX_CHARS: usize = 500; -const DEFAULT_LIST_LIMIT: u32 = 50; -const MAX_LIST_LIMIT: u32 = 1_000; - -// ── Wire types ─────────────────────────────────────────────────────────── - -/// Wire-shape chunk returned by the read RPCs. -/// -/// Distinct from [`crate::openhuman::memory_store::chunks::types::Chunk`] in two -/// ways: serialised timestamps are ms-since-epoch (matches the rest of the -/// JSON-RPC surface) and the body is replaced with a `≤500-char preview` -/// + a flag indicating whether the row has an embedding. UIs needing the -/// full body call back via `memory_tree_get_chunk`. -#[derive(Clone, Debug, Serialize, Deserialize)] -pub struct ChunkRow { - pub id: String, - pub source_kind: String, - pub source_id: String, - #[serde(default)] - pub source_ref: Option, - pub owner: String, - pub timestamp_ms: i64, - pub token_count: u32, - pub lifecycle_status: String, - #[serde(default)] - pub content_path: Option, - #[serde(default)] - pub content_preview: Option, - pub has_embedding: bool, - #[serde(default)] - pub tags: Vec, -} - -/// Filter shape for [`list_chunks`]. All fields are optional. -#[derive(Clone, Debug, Default, Serialize, Deserialize)] -pub struct ChunkFilter { - #[serde(default)] - pub source_kinds: Option>, - #[serde(default)] - pub source_ids: Option>, - #[serde(default)] - pub entity_ids: Option>, - #[serde(default)] - pub since_ms: Option, - #[serde(default)] - pub until_ms: Option, - #[serde(default)] - pub query: Option, - #[serde(default)] - pub limit: Option, - #[serde(default)] - pub offset: Option, -} - -/// Response shape for [`list_chunks`]. -#[derive(Clone, Debug, Serialize, Deserialize)] -pub struct ListChunksResponse { - pub chunks: Vec, - pub total: u64, -} - -/// Distinct ingest source plus chunk counts. Returned by [`list_sources`]. -#[derive(Clone, Debug, Serialize, Deserialize)] -pub struct Source { - pub source_id: String, - /// Computed display name (un-slug + strip user email when known). - pub display_name: String, - pub source_kind: String, - pub chunk_count: u32, - pub most_recent_ms: i64, -} - -/// Lightweight reference to a canonical entity. -#[derive(Clone, Debug, Serialize, Deserialize)] -pub struct EntityRef { - /// Canonical id (e.g. `email:alice@example.com`, `topic:phoenix`). - pub entity_id: String, - pub kind: String, - pub surface: String, - pub count: u32, -} - -/// Per-signal weight + raw value pair. -#[derive(Clone, Debug, Serialize, Deserialize)] -pub struct ScoreSignal { - pub name: String, - pub weight: f32, - pub value: f32, -} - -/// Score rationale returned by [`chunk_score`]. -#[derive(Clone, Debug, Serialize, Deserialize)] -pub struct ScoreBreakdown { - pub signals: Vec, - pub total: f32, - pub threshold: f32, - pub kept: bool, - pub llm_consulted: bool, -} - -// ── list_chunks ────────────────────────────────────────────────────────── - -/// `memory_tree_list_chunks` — paginated chunk listing with filters. -pub async fn list_chunks_rpc( - config: &Config, - filter: ChunkFilter, -) -> Result, String> { - let cfg = config.clone(); - let resp = tokio::task::spawn_blocking(move || -> Result { - list_chunks_blocking(&cfg, &filter) - }) - .await - .map_err(|e| format!("list_chunks join error: {e}"))? - .map_err(|e| format!("list_chunks: {e:#}"))?; - - let n = resp.chunks.len(); - let total = resp.total; - Ok(RpcOutcome::single_log( - resp, - format!("memory_tree::read: list_chunks n={n} total={total}"), - )) -} - -fn list_chunks_blocking(config: &Config, filter: &ChunkFilter) -> Result { - let limit = filter - .limit - .unwrap_or(DEFAULT_LIST_LIMIT) - .clamp(1, MAX_LIST_LIMIT); - let offset = filter.offset.unwrap_or(0); - - with_connection(config, |conn| { - // Build SQL with bound parameters. `entity_ids` requires an inner - // join via `mem_tree_entity_index`; the rest stay on `mem_tree_chunks`. - let mut sql = String::from( - "SELECT DISTINCT - c.id, c.source_kind, c.source_id, c.source_ref, c.owner, - c.timestamp_ms, c.token_count, c.lifecycle_status, - c.content_path, c.content, c.tags_json, - CASE WHEN c.embedding IS NULL THEN 0 ELSE 1 END AS has_embedding - FROM mem_tree_chunks c", - ); - let mut where_clauses: Vec = vec![]; - let mut params_owned: Vec> = Vec::new(); - - if let Some(eids) = &filter.entity_ids { - if !eids.is_empty() { - sql.push_str(" INNER JOIN mem_tree_entity_index ei ON ei.node_id = c.id"); - let placeholders: Vec = (0..eids.len()).map(|_| "?".to_string()).collect(); - where_clauses.push(format!("ei.entity_id IN ({})", placeholders.join(", "))); - for eid in eids { - params_owned.push(Box::new(eid.clone())); - } - } - } - if let Some(kinds) = &filter.source_kinds { - if !kinds.is_empty() { - let placeholders: Vec = (0..kinds.len()).map(|_| "?".to_string()).collect(); - where_clauses.push(format!("c.source_kind IN ({})", placeholders.join(", "))); - for k in kinds { - params_owned.push(Box::new(k.clone())); - } - } - } - if let Some(sids) = &filter.source_ids { - if !sids.is_empty() { - let placeholders: Vec = (0..sids.len()).map(|_| "?".to_string()).collect(); - where_clauses.push(format!("c.source_id IN ({})", placeholders.join(", "))); - for s in sids { - params_owned.push(Box::new(s.clone())); - } - } - } - if let Some(since) = filter.since_ms { - where_clauses.push("c.timestamp_ms >= ?".into()); - params_owned.push(Box::new(since)); - } - if let Some(until) = filter.until_ms { - where_clauses.push("c.timestamp_ms <= ?".into()); - params_owned.push(Box::new(until)); - } - if let Some(query) = &filter.query { - let q = query.trim(); - if !q.is_empty() { - // NOTE: `c.content` is the ≤500-char preview kept in - // SQLite, not the canonical body — that lives on disk - // at `c.content_path`. This means search currently - // misses any chunk whose match is past the first 500 - // chars. Acceptable for v1 (most matches land in the - // first paragraph anyway); a follow-up should swap to - // a full-text index over the on-disk body. - where_clauses.push("c.content LIKE ?".into()); - params_owned.push(Box::new(format!("%{}%", q))); - } - } - - if !where_clauses.is_empty() { - sql.push_str(" WHERE "); - sql.push_str(&where_clauses.join(" AND ")); - } - // total count for pagination — do it before applying limit/offset. - let count_sql = format!( - "SELECT COUNT(*) FROM ({}) AS sub", - sql.replacen( - "SELECT DISTINCT\n c.id, c.source_kind, c.source_id, c.source_ref, c.owner,\n c.timestamp_ms, c.token_count, c.lifecycle_status,\n c.content_path, c.content, c.tags_json,\n CASE WHEN c.embedding IS NULL THEN 0 ELSE 1 END AS has_embedding", - "SELECT DISTINCT c.id", - 1 - ) - ); - - sql.push_str(" ORDER BY c.timestamp_ms DESC, c.seq_in_source ASC LIMIT ? OFFSET ?"); - params_owned.push(Box::new(limit as i64)); - params_owned.push(Box::new(offset as i64)); - - // Execute count query — use the WHERE-bound params (without LIMIT/OFFSET). - let count_params: Vec<&dyn rusqlite::ToSql> = params_owned - .iter() - .take(params_owned.len() - 2) - .map(|b| b.as_ref() as &dyn rusqlite::ToSql) - .collect(); - let total: i64 = conn - .query_row(&count_sql, count_params.as_slice(), |r| r.get(0)) - .context("count chunks")?; - - // Execute list query. - let mut stmt = conn.prepare(&sql).context("prepare list_chunks")?; - let param_refs: Vec<&dyn rusqlite::ToSql> = params_owned - .iter() - .map(|b| b.as_ref() as &dyn rusqlite::ToSql) - .collect(); - let rows = stmt - .query_map(param_refs.as_slice(), |row| { - let id: String = row.get(0)?; - let source_kind: String = row.get(1)?; - let source_id: String = row.get(2)?; - let source_ref: Option = row.get(3)?; - let owner: String = row.get(4)?; - let timestamp_ms: i64 = row.get(5)?; - let token_count: i64 = row.get(6)?; - let lifecycle_status: String = row.get(7)?; - let content_path: Option = row.get(8)?; - let content: String = row.get(9)?; - let tags_json: String = row.get(10)?; - let has_embedding: i64 = row.get(11)?; - let preview: String = content.chars().take(PREVIEW_MAX_CHARS).collect(); - let tags: Vec = serde_json::from_str(&tags_json).unwrap_or_default(); - Ok(ChunkRow { - id, - source_kind, - source_id, - source_ref, - owner, - timestamp_ms, - token_count: token_count.max(0) as u32, - lifecycle_status, - content_path, - content_preview: if preview.is_empty() { - None - } else { - Some(preview) - }, - has_embedding: has_embedding != 0, - tags, - }) - })? - .collect::>>() - .context("collect list_chunks rows")?; - - Ok(ListChunksResponse { - chunks: rows, - total: total.max(0) as u64, - }) - }) -} - -// ── list_sources ───────────────────────────────────────────────────────── - -/// `memory_tree_list_sources` — distinct (source_kind, source_id) pairs -/// with aggregate chunk counts and most-recent timestamps. Display name is -/// computed from the `source_id` (un-slug; user email stripping where the -/// caller can supply the user's primary email via `user_email_hint`). -pub async fn list_sources_rpc( - config: &Config, - user_email_hint: Option, -) -> Result>, String> { - let cfg = config.clone(); - let sources = tokio::task::spawn_blocking(move || -> Result> { - list_sources_blocking(&cfg, user_email_hint.as_deref()) - }) - .await - .map_err(|e| format!("list_sources join error: {e}"))? - .map_err(|e| format!("list_sources: {e:#}"))?; - - let n = sources.len(); - Ok(RpcOutcome::single_log( - sources, - format!("memory_tree::read: list_sources n={n}"), - )) -} - -fn list_sources_blocking(config: &Config, user_email_hint: Option<&str>) -> Result> { - with_connection(config, |conn| { - let mut stmt = conn.prepare( - "SELECT source_kind, source_id, COUNT(*) AS n, MAX(timestamp_ms) AS most_recent - FROM mem_tree_chunks - GROUP BY source_kind, source_id - ORDER BY most_recent DESC", - )?; - let rows = stmt - .query_map([], |row| { - let source_kind: String = row.get(0)?; - let source_id: String = row.get(1)?; - let n: i64 = row.get(2)?; - let most_recent: i64 = row.get(3)?; - let display_name = display_name_for_source(&source_id, user_email_hint); - Ok(Source { - source_id, - display_name, - source_kind, - chunk_count: n.max(0) as u32, - most_recent_ms: most_recent, - }) - })? - .collect::>>() - .context("collect list_sources rows")?; - Ok(rows) - }) -} - -/// Compute the display name for a source. Pure / table-driven so the unit -/// tests can lock in the un-slug behaviour. -/// -/// Examples: -/// - `slack:#engineering` → `#engineering` (slack channel) -/// - `gmail:alice@example.com|bob@example.com` (user is alice) → `bob@example.com` -/// - `gmail:alice@example.com|bob@example.com` (user unknown) → -/// `alice@example.com ↔ bob@example.com` -/// - `notion:page-id-1234` → `page-id-1234` -fn display_name_for_source(source_id: &str, user_email_hint: Option<&str>) -> String { - // Drop the platform prefix if there is one. - let body = match source_id.split_once(':') { - Some((_platform, rest)) => rest, - None => source_id, - }; - // Email-thread ids often look like `a@x|b@y`. If the user's email is - // surfaced and matches one side, return only the other side. - if body.contains('|') { - let parts: Vec<&str> = body.split('|').collect(); - if let Some(user) = user_email_hint { - let user_lc = user.trim().to_ascii_lowercase(); - let others: Vec<&str> = parts - .iter() - .copied() - .filter(|p| p.trim().to_ascii_lowercase() != user_lc) - .collect(); - if !others.is_empty() && others.len() < parts.len() { - return others.join(", "); - } - } - // No user hint or no match — show all parties separated by an arrow. - return parts.join(" ↔ "); - } - body.to_string() -} - -// ── search / recall ────────────────────────────────────────────────────── - -/// `memory_tree_search` — keyword `LIKE '%q%'` over chunk bodies. Cheap, -/// deterministic, and useful as a fast fallback when the embedder is -/// offline or the query is short. Returns hits ordered by recency. -pub async fn search_rpc( - config: &Config, - query: String, - k: u32, -) -> Result>, String> { - let limit = k.clamp(1, MAX_LIST_LIMIT); - let filter = ChunkFilter { - query: Some(query.clone()), - limit: Some(limit), - ..ChunkFilter::default() - }; - let cfg = config.clone(); - let chunks = tokio::task::spawn_blocking(move || -> Result> { - Ok(list_chunks_blocking(&cfg, &filter)?.chunks) - }) - .await - .map_err(|e| format!("search join error: {e}"))? - .map_err(|e| format!("search: {e:#}"))?; - - let n = chunks.len(); - Ok(RpcOutcome::single_log( - chunks, - format!("memory_tree::read: search query_len={} n={n}", query.len()), - )) -} - -/// Response shape for [`recall_rpc`]. -#[derive(Clone, Debug, Serialize, Deserialize)] -pub struct RecallResponse { - pub chunks: Vec, - pub scores: Vec, -} - -/// `memory_tree_recall` — semantic recall via the existing Phase 4 rerank -/// path. Calls into `retrieval::query_source(query=Some(q))` and converts -/// the top-K summary hits into chunk rows by walking the summary -/// `child_ids`. UIs use this for "find me chunks like X". -/// -/// Note: returns chunks (not summaries) because the Memory tab's design -/// is leaf-centric — users browse chunks, not summary nodes. -pub async fn recall_rpc( - config: &Config, - query: String, - k: u32, -) -> Result, String> { - let limit = k.clamp(1, MAX_LIST_LIMIT) as usize; - log::debug!( - "[memory_tree::read::recall] query_len={} k={}", - query.len(), - limit - ); - - // Reuse the source-tree retrieval path which already does cosine - // rerank against query embeddings. We pull more summaries than `k` - // because each summary expands into multiple leaves. - let resp = crate::openhuman::memory_tree::retrieval::query_source( - config, - None, - None, - None, - Some(query.as_str()), - limit, - ) - .await - .map_err(|e| format!("recall query_source: {e:#}"))?; - - // Walk each hit's child_ids → leaves. Summary level=1 children are - // chunks; for level>1 we'd need to recurse — keep it shallow for now - // so a Memory tab call doesn't fan out unboundedly. Retrieval already - // surfaces L1 first, so the shallow walk covers the common case. - let mut chunk_rows: Vec = Vec::new(); - let mut scores: Vec = Vec::new(); - let cfg = config.clone(); - let leaves: Vec<(String, f32)> = resp - .hits - .into_iter() - .filter(|h| matches!(h.node_kind, NodeKind::Summary) && h.level == 1) - .flat_map(|h| { - h.child_ids - .into_iter() - .map(move |id| (id, h.score)) - .collect::>() - }) - .collect(); - if !leaves.is_empty() { - let collected = tokio::task::spawn_blocking(move || -> Result> { - with_connection(&cfg, |conn| { - let mut out = Vec::with_capacity(leaves.len()); - for (chunk_id, score) in leaves { - let row = conn - .query_row( - "SELECT id, source_kind, source_id, source_ref, owner, - timestamp_ms, token_count, lifecycle_status, - content_path, content, tags_json, - CASE WHEN embedding IS NULL THEN 0 ELSE 1 END - FROM mem_tree_chunks WHERE id = ?1", - params![chunk_id], - |r| { - let id: String = r.get(0)?; - let source_kind: String = r.get(1)?; - let source_id: String = r.get(2)?; - let source_ref: Option = r.get(3)?; - let owner: String = r.get(4)?; - let timestamp_ms: i64 = r.get(5)?; - let token_count: i64 = r.get(6)?; - let lifecycle_status: String = r.get(7)?; - let content_path: Option = r.get(8)?; - let content: String = r.get(9)?; - let tags_json: String = r.get(10)?; - let has_emb: i64 = r.get(11)?; - let preview: String = - content.chars().take(PREVIEW_MAX_CHARS).collect(); - let tags: Vec = - serde_json::from_str(&tags_json).unwrap_or_default(); - Ok(ChunkRow { - id, - source_kind, - source_id, - source_ref, - owner, - timestamp_ms, - token_count: token_count.max(0) as u32, - lifecycle_status, - content_path, - content_preview: if preview.is_empty() { - None - } else { - Some(preview) - }, - has_embedding: has_emb != 0, - tags, - }) - }, - ) - .ok(); - if let Some(r) = row { - out.push((r, score)); - } - } - Ok(out) - }) - }) - .await - .map_err(|e| format!("recall join error: {e}"))? - .map_err(|e| format!("recall hydrate: {e:#}"))?; - for (row, sc) in collected { - chunk_rows.push(row); - scores.push(sc); - } - } - chunk_rows.truncate(limit); - scores.truncate(limit); - - let n = chunk_rows.len(); - Ok(RpcOutcome::single_log( - RecallResponse { - chunks: chunk_rows, - scores, - }, - format!("memory_tree::read: recall n={n}"), - )) -} - -// ── entity index lookups ──────────────────────────────────────────────── - -/// `memory_tree_entity_index_for` — return all canonical entities indexed -/// against a single chunk (or summary) node id. -pub async fn entity_index_for_rpc( - config: &Config, - chunk_id: String, -) -> Result>, String> { - let cfg = config.clone(); - let id = chunk_id.clone(); - let refs = tokio::task::spawn_blocking(move || -> Result> { - with_connection(&cfg, |conn| { - let mut stmt = conn.prepare( - "SELECT entity_id, entity_kind, surface, COUNT(*) AS n - FROM mem_tree_entity_index - WHERE node_id = ?1 - GROUP BY entity_id, entity_kind, surface - ORDER BY n DESC, entity_id ASC", - )?; - let rows = stmt - .query_map(params![id], |row| { - let entity_id: String = row.get(0)?; - let kind: String = row.get(1)?; - let surface: String = row.get(2)?; - let n: i64 = row.get(3)?; - Ok(EntityRef { - entity_id, - kind, - surface, - count: n.max(0) as u32, - }) - })? - .collect::>>() - .context("collect entity_index_for rows")?; - Ok(rows) - }) - }) - .await - .map_err(|e| format!("entity_index_for join error: {e}"))? - .map_err(|e| format!("entity_index_for: {e:#}"))?; - - let n = refs.len(); - Ok(RpcOutcome::single_log( - refs, - format!("memory_tree::read: entity_index_for chunk_id={chunk_id} n={n}"), - )) -} - -/// `memory_tree_chunks_for_entity` — return chunk IDs that reference an -/// entity_id. Inverse of `entity_index_for`. Used by the Memory tab's -/// People/Topics lenses to filter the chunk list to those mentioning a -/// selected entity. -pub async fn chunks_for_entity_rpc( - config: &Config, - entity_id: String, -) -> Result>, String> { - let cfg = config.clone(); - let eid = entity_id.clone(); - let chunk_ids = tokio::task::spawn_blocking(move || -> Result> { - with_connection(&cfg, |conn| { - let mut stmt = conn.prepare( - // node_kind values are `leaf` (= chunk node, the actual - // chunk_id) and `summary` (= sealed bucket summary). - // Memory tab filtering wants the chunk-level rows only. - "SELECT DISTINCT node_id - FROM mem_tree_entity_index - WHERE entity_id = ?1 AND node_kind = 'leaf' - ORDER BY timestamp_ms DESC", - )?; - let rows = stmt - .query_map(params![eid], |row| { - let node_id: String = row.get(0)?; - Ok(node_id) - })? - .collect::>>() - .context("collect chunks_for_entity rows")?; - Ok(rows) - }) - }) - .await - .map_err(|e| format!("chunks_for_entity join error: {e}"))? - .map_err(|e| format!("chunks_for_entity: {e:#}"))?; - - let n = chunk_ids.len(); - Ok(RpcOutcome::single_log( - chunk_ids, - format!("memory_tree::read: chunks_for_entity entity_id={entity_id} n={n}"), - )) -} - -/// `memory_tree_top_entities` — most-frequent canonical entities, -/// optionally narrowed to one [`EntityKind`]. -pub async fn top_entities_rpc( - config: &Config, - kind: Option, - limit: u32, -) -> Result>, String> { - let limit = limit.clamp(1, MAX_LIST_LIMIT); - let cfg = config.clone(); - let refs = tokio::task::spawn_blocking(move || -> Result> { - with_connection(&cfg, |conn| { - let mut sql = String::from( - "SELECT entity_id, entity_kind, MAX(surface) AS surface_sample, COUNT(*) AS n - FROM mem_tree_entity_index", - ); - let mut params_owned: Vec> = Vec::new(); - if let Some(k) = kind { - sql.push_str(" WHERE entity_kind = ?"); - params_owned.push(Box::new(k)); - } - sql.push_str( - " GROUP BY entity_id, entity_kind - ORDER BY n DESC, MAX(timestamp_ms) DESC - LIMIT ?", - ); - params_owned.push(Box::new(limit as i64)); - let mut stmt = conn.prepare(&sql)?; - let param_refs: Vec<&dyn rusqlite::ToSql> = params_owned - .iter() - .map(|b| b.as_ref() as &dyn rusqlite::ToSql) - .collect(); - let rows = stmt - .query_map(param_refs.as_slice(), |row| { - let entity_id: String = row.get(0)?; - let kind: String = row.get(1)?; - let surface: String = row.get(2)?; - let n: i64 = row.get(3)?; - Ok(EntityRef { - entity_id, - kind, - surface, - count: n.max(0) as u32, - }) - })? - .collect::>>() - .context("collect top_entities rows")?; - Ok(rows) - }) - }) - .await - .map_err(|e| format!("top_entities join error: {e}"))? - .map_err(|e| format!("top_entities: {e:#}"))?; - - let n = refs.len(); - Ok(RpcOutcome::single_log( - refs, - format!("memory_tree::read: top_entities n={n}"), - )) -} - -// ── chunk_score ───────────────────────────────────────────────────────── - -/// `memory_tree_chunk_score` — return the score breakdown stored in -/// `mem_tree_score` for one chunk. UI uses this to render the "why was -/// this kept / dropped" panel. -pub async fn chunk_score_rpc( - config: &Config, - chunk_id: String, -) -> Result>, String> { - let cfg = config.clone(); - let id = chunk_id.clone(); - let result = tokio::task::spawn_blocking(move || -> Result> { - let row = score_store::get_score(&cfg, &id)?; - Ok(row.map(|r| { - // Hard-code the cheap-signal weights from `SignalWeights::default()` - // / `with_llm_enabled()`. The score row doesn't persist the weights - // it was scored with, so we read them from the same defaults the - // scoring path uses. This is acceptable because the weights are - // derived constants — see `score::signals::types`. - let llm_consulted = r.signals.llm_importance > 0.0; - let signals = vec![ - ScoreSignal { - name: "token_count".into(), - weight: 1.0, - value: r.signals.token_count, - }, - ScoreSignal { - name: "unique_words".into(), - weight: 1.0, - value: r.signals.unique_words, - }, - ScoreSignal { - name: "metadata_weight".into(), - weight: 1.5, - value: r.signals.metadata_weight, - }, - ScoreSignal { - name: "source_weight".into(), - weight: 1.5, - value: r.signals.source_weight, - }, - ScoreSignal { - name: "interaction".into(), - weight: 3.0, - value: r.signals.interaction, - }, - ScoreSignal { - name: "entity_density".into(), - weight: 1.0, - value: r.signals.entity_density, - }, - ScoreSignal { - name: "llm_importance".into(), - weight: if llm_consulted { 2.0 } else { 0.0 }, - value: r.signals.llm_importance, - }, - ]; - ScoreBreakdown { - signals, - total: r.total, - threshold: crate::openhuman::memory_tree::score::DEFAULT_DROP_THRESHOLD, - kept: !r.dropped, - llm_consulted, - } - })) - }) - .await - .map_err(|e| format!("chunk_score join error: {e}"))? - .map_err(|e| format!("chunk_score: {e:#}"))?; - Ok(RpcOutcome::single_log( - result, - format!("memory_tree::read: chunk_score id={chunk_id}"), - )) -} - -// ── delete_chunk ──────────────────────────────────────────────────────── - -/// `memory_tree_delete_chunk` — purge one chunk plus its score row and -/// entity-index rows. Idempotent — missing chunk returns success with -/// `deleted=false`. -/// -/// Does NOT cascade through summary nodes — sealed summaries are -/// immutable; deletion of leaves attached to a sealed summary leaves the -/// summary referencing a now-missing child id. UIs warn the user and -/// callers wanting full cascade should rebuild the affected tree by -/// re-ingesting upstream. -pub async fn delete_chunk_rpc( - config: &Config, - chunk_id: String, -) -> Result, String> { - let cfg = config.clone(); - let id = chunk_id.clone(); - let resp = tokio::task::spawn_blocking(move || -> Result { - with_connection(&cfg, |conn| { - let tx = conn.unchecked_transaction()?; - // Find the chunk's content_path so we can also remove the .md file. - let content_path: Option = tx - .query_row( - "SELECT content_path FROM mem_tree_chunks WHERE id = ?1", - params![id], - |r| r.get::<_, Option>(0), - ) - .ok() - .flatten(); - let removed_score = - tx.execute("DELETE FROM mem_tree_score WHERE chunk_id = ?1", params![id])?; - let removed_index = tx.execute( - "DELETE FROM mem_tree_entity_index WHERE node_id = ?1", - params![id], - )?; - let removed_chunk = - tx.execute("DELETE FROM mem_tree_chunks WHERE id = ?1", params![id])?; - tx.commit()?; - // Best-effort filesystem cleanup outside the SQL tx. - if let Some(rel) = content_path { - let mut path = cfg.memory_tree_content_root(); - for component in rel.split('/') { - path.push(component); - } - if let Err(e) = std::fs::remove_file(&path) { - if e.kind() != std::io::ErrorKind::NotFound { - log::warn!( - "[memory_tree::read::delete] failed to remove chunk file path_hash={}: {e}", - crate::openhuman::memory::util::redact::redact(&rel), - ); - } - } - } - Ok(DeleteChunkResponse { - deleted: removed_chunk > 0, - score_rows_removed: removed_score as u32, - entity_index_rows_removed: removed_index as u32, - }) - }) - }) - .await - .map_err(|e| format!("delete_chunk join error: {e}"))? - .map_err(|e| format!("delete_chunk: {e:#}"))?; - Ok(RpcOutcome::single_log( - resp.clone(), - format!( - "memory_tree::read: delete_chunk id={chunk_id} deleted={} score_rows={} entity_rows={}", - resp.deleted, resp.score_rows_removed, resp.entity_index_rows_removed - ), - )) -} - -/// Response shape for [`delete_chunk_rpc`]. -#[derive(Clone, Debug, Serialize, Deserialize)] -pub struct DeleteChunkResponse { - pub deleted: bool, - pub score_rows_removed: u32, - pub entity_index_rows_removed: u32, -} - -// ── graph_export ──────────────────────────────────────────────────────── - -/// Which graph the UI is asking for. -/// -/// `Tree` returns the summary tree (summary nodes connected by -/// parent_id) plus the leaf chunks hanging off it, bounded to ~1000 -/// nodes with summaries prioritized. `Contacts` returns raw chunks -/// connected to the person entities they mention via the inverted -/// `mem_tree_entity_index` — i.e. the document↔contact graph. -/// -/// Wire shape uses lowercase strings so the UI can pass `"tree"` / -/// `"contacts"` directly. -#[derive(Clone, Copy, Debug, Default, Deserialize, Serialize, PartialEq, Eq)] -#[serde(rename_all = "lowercase")] -pub enum GraphMode { - #[default] - Tree, - Contacts, -} - -/// One node in the graph export. -/// -/// `kind` discriminates between the three node shapes the wire returns: -/// - `"summary"` — sealed summary node (Tree mode) -/// - `"chunk"` — raw memory chunk (Contacts mode) -/// - `"contact"` — canonical person entity (Contacts mode) -/// -/// Optional fields are only populated when relevant to the node kind so -/// the UI can branch on `kind` and ignore the rest. -#[derive(Clone, Debug, Serialize, Deserialize)] -pub struct GraphNode { - /// `"summary" | "chunk" | "contact"`. - pub kind: String, - pub id: String, - /// Display-friendly label (summary uses scope, chunk uses preview - /// snippet, contact uses entity surface form). - pub label: String, - /// Summary-only: source/topic/global. - #[serde(default, skip_serializing_if = "Option::is_none")] - pub tree_kind: Option, - /// Summary-only: human-readable scope. - #[serde(default, skip_serializing_if = "Option::is_none")] - pub tree_scope: Option, - /// Summary-only: tree id. - #[serde(default, skip_serializing_if = "Option::is_none")] - pub tree_id: Option, - /// Summary-only: level in the tree (0 = leaves, 1+ = summaries). - #[serde(default, skip_serializing_if = "Option::is_none")] - pub level: Option, - /// Summary-only: parent summary id (None for roots). Present so - /// the UI draws parent→child edges directly without an explicit - /// edges array. - #[serde(default, skip_serializing_if = "Option::is_none")] - pub parent_id: Option, - /// Summary-only: number of children rolled up under this node. - #[serde(default, skip_serializing_if = "Option::is_none")] - pub child_count: Option, - /// Summary/chunk: time-range start (ms since epoch). - #[serde(default, skip_serializing_if = "Option::is_none")] - pub time_range_start_ms: Option, - /// Summary/chunk: time-range end (ms since epoch). - #[serde(default, skip_serializing_if = "Option::is_none")] - pub time_range_end_ms: Option, - /// Summary-only: filesystem-safe basename of the summary's `.md` - /// file (used to build the Obsidian deep link). - #[serde(default, skip_serializing_if = "Option::is_none")] - pub file_basename: Option, - /// Contact-only: entity kind (`person`, `organization`, …). - #[serde(default, skip_serializing_if = "Option::is_none")] - pub entity_kind: Option, -} - -/// One edge in the graph export. Used in Contacts mode to express -/// chunk↔contact mentions, since those don't fit the parent/child -/// shape encoded in `GraphNode.parent_id`. -#[derive(Clone, Debug, Serialize, Deserialize)] -pub struct GraphEdge { - pub from: String, - pub to: String, -} - -/// Response shape for [`graph_export_rpc`]. -#[derive(Clone, Debug, Serialize, Deserialize)] -pub struct GraphExportResponse { - pub nodes: Vec, - /// Explicit edges. In `Tree` mode this is empty (each summary - /// node's `parent_id` carries the edge); in `Contacts` mode each - /// edge connects a `chunk` node to a `contact` node. - #[serde(default)] - pub edges: Vec, - /// Absolute path to the on-disk `/memory_tree/content/` root. - /// UIs use this both to point an `obsidian://open?path=...` deep link at - /// the vault and as the folder the user adds via "Open folder as vault". - /// That deep link only resolves once this folder (or an ancestor) is a - /// *registered* Obsidian vault — the scheme cannot register a new vault on - /// its own, so the UI first calls [`obsidian_vault_status_rpc`] and guides - /// the user to add it when it isn't. - pub content_root_abs: String, -} - -/// `memory_tree_graph_export` — return either the summary tree or the -/// document↔contact graph, depending on `mode`. -pub async fn graph_export_rpc( - config: &Config, - mode: GraphMode, -) -> Result, String> { - let cfg = config.clone(); - let resp = tokio::task::spawn_blocking(move || -> Result { - let content_root = cfg.memory_tree_content_root(); - let resp = match mode { - GraphMode::Tree => collect_tree_graph(&cfg)?, - GraphMode::Contacts => collect_contacts_graph(&cfg)?, - }; - Ok(GraphExportResponse { - nodes: resp.0, - edges: resp.1, - content_root_abs: content_root.to_string_lossy().to_string(), - }) - }) - .await - .map_err(|e| format!("graph_export join error: {e}"))? - .map_err(|e| format!("graph_export: {e:#}"))?; - // Hash the content root rather than logging the absolute path — - // it embeds the user's home / username, which we don't want in - // tail-sampled debug streams or bug reports. - let log = format!( - "memory_tree::read: graph_export mode={:?} nodes={} edges={} root_hash={}", - mode, - resp.nodes.len(), - resp.edges.len(), - crate::openhuman::memory::util::redact::redact(&resp.content_root_abs), - ); - Ok(RpcOutcome::single_log(resp, log)) -} - -/// Response shape for [`obsidian_vault_status_rpc`]. -#[derive(Clone, Debug, Serialize, Deserialize)] -pub struct ObsidianVaultStatusResponse { - /// `true` when the content root (or an ancestor) is already a registered - /// Obsidian vault, so `obsidian://open?path=` will actually resolve. - pub registered: bool, - /// `true` when an `obsidian.json` was found and parsed (Obsidian is set - /// up). Lets the UI offer "Open folder as vault" vs. "Install Obsidian". - pub config_found: bool, - /// Absolute path to `/memory_tree/content/` — the folder the - /// user adds to Obsidian, and the target of the deep link. - pub content_root_abs: String, -} - -/// `memory_tree_obsidian_vault_status` — best-effort check of whether the -/// memory-tree content root is a registered Obsidian vault. -/// -/// The Memory tab calls this before firing the `obsidian://open?path=` deep -/// link: that scheme only resolves vaults already present in Obsidian's -/// `obsidian.json`, so opening an unregistered folder lands on *"Unable to -/// find a vault for the URL"*. `obsidian_config_dir` optionally overrides -/// where we look for `obsidian.json` (non-standard installs: Flatpak / Snap / -/// portable). Never errors and never hits the network — a probe miss simply -/// reports `registered = false` and the UI degrades to "open anyway" + reveal. -pub async fn obsidian_vault_status_rpc( - config: &Config, - obsidian_config_dir: Option, -) -> Result, String> { - let cfg = config.clone(); - let resp = tokio::task::spawn_blocking(move || -> ObsidianVaultStatusResponse { - let content_root = cfg.memory_tree_content_root(); - // Treat a blank/whitespace override as "no override" — otherwise - // `Path::new("")` resolves to `.` and would probe a stray local - // `./obsidian.json`. The UI omits the field when empty, but the RPC - // is a public controller so normalize defensively here. - let extra = obsidian_config_dir - .as_deref() - .map(str::trim) - .filter(|s| !s.is_empty()) - .map(std::path::Path::new); - let reg = obsidian_registry::vault_registration_status(&content_root, extra); - ObsidianVaultStatusResponse { - registered: reg.registered, - config_found: reg.config_found, - content_root_abs: content_root.to_string_lossy().to_string(), - } - }) - .await - .map_err(|e| format!("obsidian_vault_status join error: {e}"))?; - - // Redact the absolute path (embeds the user's home / username) — log only - // the booleans and a stable hash, matching `graph_export_rpc`. - let log = format!( - "memory_tree::read: obsidian_vault_status registered={} config_found={} root_hash={}", - resp.registered, - resp.config_found, - crate::openhuman::memory::util::redact::redact(&resp.content_root_abs), - ); - Ok(RpcOutcome::single_log(resp, log)) -} - -/// Response shape for [`vault_health_check_rpc`]. -#[derive(Clone, Debug, Serialize, Deserialize)] -pub struct VaultHealthCheckResponse { - /// Absolute path to `/memory_tree/content/`. - pub content_root_abs: String, - /// `true` when the content-root directory exists on disk. - pub exists: bool, - /// `true` when the content-root directory is readable. - pub readable: bool, - /// `true` when a temp file can be created + removed under content-root. - pub writable: bool, - /// `true` when the content root (or an ancestor) is a registered Obsidian - /// vault in Obsidian's `obsidian.json`. - pub obsidian_registered: bool, - /// `true` when the Memory Tree pipeline is neither paused nor in an error - /// state. - pub pipeline_healthy: bool, - /// Epoch ms of the most-recent chunk timestamp. Zero when empty. - pub last_sync_ms: i64, -} - -/// `memory_tree_vault_health_check` — consolidated onboarding/settings health -/// snapshot for the workspace vault. -/// -/// Combines: -/// - filesystem reachability checks over `/memory_tree/content/` -/// - Obsidian registration check (same logic as `obsidian_vault_status_rpc`) -/// - pipeline health signals from `memory_tree_pipeline_status` -/// -/// `obsidian_config_dir` is optional and mirrors -/// [`obsidian_vault_status_rpc`]: it overrides where we probe for -/// `obsidian.json` for non-standard installs. -pub async fn vault_health_check_rpc( - config: &Config, - obsidian_config_dir: Option, -) -> Result, String> { - let cfg = config.clone(); - let fs_probe = tokio::task::spawn_blocking(move || { - let content_root = cfg.memory_tree_content_root(); - let content_root_abs = content_root.to_string_lossy().to_string(); - let exists = content_root.is_dir(); - let readable = exists && std::fs::read_dir(&content_root).is_ok(); - let writable = exists && probe_directory_writable(&content_root); - - let extra = obsidian_config_dir - .as_deref() - .map(str::trim) - .filter(|s| !s.is_empty()) - .map(std::path::Path::new); - let obsidian_registered = - obsidian_registry::vault_registration_status(&content_root, extra).registered; - - ( - content_root_abs, - exists, - readable, - writable, - obsidian_registered, - ) - }) - .await - .map_err(|e| format!("vault_health_check fs probe join error: {e}"))?; - - let pipeline = crate::openhuman::memory_tree::tree::rpc::pipeline_status_rpc(config) - .await - .map_err(|e| format!("vault_health_check pipeline_status: {e}"))?; - - let (content_root_abs, exists, readable, writable, obsidian_registered) = fs_probe; - let pipeline_healthy = pipeline.value.status != "error" && !pipeline.value.is_paused; - let last_sync_ms = pipeline.value.last_sync_ms.max(0); - - let resp = VaultHealthCheckResponse { - content_root_abs, - exists, - readable, - writable, - obsidian_registered, - pipeline_healthy, - last_sync_ms, - }; - - let log = format!( - "memory_tree::read: vault_health_check exists={} readable={} writable={} obsidian_registered={} pipeline_healthy={} last_sync_ms={} root_hash={}", - resp.exists, - resp.readable, - resp.writable, - resp.obsidian_registered, - resp.pipeline_healthy, - resp.last_sync_ms, - crate::openhuman::memory::util::redact::redact(&resp.content_root_abs), - ); - Ok(RpcOutcome::single_log(resp, log)) -} - -fn probe_directory_writable(dir: &std::path::Path) -> bool { - let ts = std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .map(|d| d.as_nanos()) - .unwrap_or(0); - let probe = dir.join(format!( - ".openhuman-vault-writecheck-{}-{ts}.tmp", - std::process::id() - )); - match std::fs::OpenOptions::new() - .create_new(true) - .write(true) - .open(&probe) - { - Ok(mut file) => { - let write_ok = file.write_all(b"ok").is_ok(); - if let Err(e) = std::fs::remove_file(&probe) { - log::debug!("[memory] vault write-probe cleanup failed: {e}"); - } - write_ok - } - Err(_) => false, - } -} - -/// Tree mode: summary nodes joined to their owning tree for the -/// human-readable scope, plus the leaf chunks that hang off them. Edges -/// are encoded implicitly via `GraphNode.parent_id` (a chunk's -/// `parent_id` is its `parent_summary_id`, which matches a summary node's -/// `id`). -/// -/// Budget: summary (tree) nodes are **always kept in full** — they are -/// the skeleton of the graph — then leaf chunks fill the remaining budget -/// up to [`MAX_TREE_NODES`], most-recent first. Without the leaves the UI -/// graph showed only the handful of sealed summaries (e.g. ~20) while -/// Obsidian, which renders every `.md` on disk, showed hundreds; the -/// chunks are the bulk of the tree. Unsealed chunks have a null -/// `parent_summary_id` and render as orphan nodes — matching Obsidian's -/// `showOrphans` view. -fn collect_tree_graph(cfg: &Config) -> Result<(Vec, Vec)> { - const MAX_TREE_NODES: usize = 10_000; - - // 1. Collect summary nodes + their child_ids for document expansion. - struct SummaryRow { - node: GraphNode, - tree_scope: String, - child_ids: Vec, - } - - let summary_rows = with_connection(cfg, |conn| { - let mut stmt = conn.prepare( - "SELECT s.id, s.tree_id, s.tree_kind, t.scope, s.level, s.parent_id, - s.child_ids_json, s.time_range_start_ms, s.time_range_end_ms - FROM mem_tree_summaries s - JOIN mem_tree_trees t ON t.id = s.tree_id - WHERE s.deleted = 0 - ORDER BY s.tree_id, s.level, s.sealed_at_ms", - )?; - let rows = stmt - .query_map([], |row| { - let id: String = row.get(0)?; - let tree_id: String = row.get(1)?; - let tree_kind: String = row.get(2)?; - let tree_scope: String = row.get(3)?; - let level: i64 = row.get(4)?; - let parent_id: Option = row.get(5)?; - let child_ids_json: String = row.get(6)?; - let time_range_start_ms: i64 = row.get(7)?; - let time_range_end_ms: i64 = row.get(8)?; - let child_ids: Vec = - serde_json::from_str(&child_ids_json).unwrap_or_default(); - let child_count = child_ids.len() as u32; - let file_basename = sanitize_basename(&id); - let label = format!("L{} · {}", level.max(0), tree_scope); - Ok(SummaryRow { - node: GraphNode { - kind: "summary".into(), - id, - label, - tree_kind: Some(tree_kind), - tree_scope: Some(tree_scope.clone()), - tree_id: Some(tree_id), - level: Some(level.max(0) as u32), - parent_id, - child_count: Some(child_count), - time_range_start_ms: Some(time_range_start_ms), - time_range_end_ms: Some(time_range_end_ms), - file_basename: Some(file_basename), - entity_kind: None, - }, - tree_scope, - child_ids, - }) - })? - .collect::>>() - .context("collect tree-mode summary rows")?; - Ok(rows) - })?; - - // 2. Build synthetic source-root nodes (one per tree scope). - let mut scopes: std::collections::BTreeSet = std::collections::BTreeSet::new(); - for sr in &summary_rows { - scopes.insert(sr.tree_scope.clone()); - } - - let mut nodes: Vec = Vec::new(); - let mut source_root_ids: std::collections::HashMap = - std::collections::HashMap::new(); - - for scope in &scopes { - let root_id = format!("source:{scope}"); - let label = scope_display_label(scope); - source_root_ids.insert(scope.clone(), root_id.clone()); - nodes.push(GraphNode { - kind: "source".into(), - id: root_id, - label, - tree_kind: None, - tree_scope: Some(scope.clone()), - tree_id: None, - level: None, - parent_id: None, - child_count: None, - time_range_start_ms: None, - time_range_end_ms: None, - file_basename: None, - entity_kind: None, - }); - } - - // 3. Add summary nodes — orphans (no parent_id) link to their source root. - let mut summary_ids: std::collections::HashSet = std::collections::HashSet::new(); - for sr in &summary_rows { - summary_ids.insert(sr.node.id.clone()); - } - - for sr in &summary_rows { - let mut node = sr.node.clone(); - let has_valid_parent = node - .parent_id - .as_ref() - .map(|pid| summary_ids.contains(pid)) - .unwrap_or(false); - if !has_valid_parent { - node.parent_id = source_root_ids.get(&sr.tree_scope).cloned(); - } - nodes.push(node); - } - - // 4. For L1 summaries, emit document nodes from child_ids (commits/issues/PRs). - // These are the raw items that were summarised. Only for summaries whose - // children are NOT other summaries (i.e. L1 nodes whose children are - // raw item IDs, not summary IDs). - let doc_budget = MAX_TREE_NODES.saturating_sub(nodes.len()); - let mut doc_count = 0usize; - for sr in &summary_rows { - if doc_count >= doc_budget { - break; - } - if sr.node.level != Some(1) { - continue; - } - // Skip if children look like summary IDs (L2+ children). - if sr - .child_ids - .first() - .map(|c| c.starts_with("summary:")) - .unwrap_or(false) - { - continue; - } - for child_id in &sr.child_ids { - if doc_count >= doc_budget { - break; - } - let label = document_label(child_id); - nodes.push(GraphNode { - kind: "chunk".into(), - id: format!("doc:{}:{}", sr.tree_scope, child_id), - label, - tree_kind: None, - tree_scope: Some(sr.tree_scope.clone()), - tree_id: None, - level: None, - parent_id: Some(sr.node.id.clone()), - child_count: None, - time_range_start_ms: sr.node.time_range_start_ms, - time_range_end_ms: sr.node.time_range_end_ms, - file_basename: None, - entity_kind: None, - }); - doc_count += 1; - } - } - - // 5. Fill remaining budget with DB-backed leaf chunks (gmail etc). - let chunk_budget = MAX_TREE_NODES.saturating_sub(nodes.len()); - if chunk_budget > 0 { - let chunk_nodes = with_connection(cfg, |conn| { - let mut stmt = conn.prepare( - "SELECT c.id, c.parent_summary_id, c.content, - c.time_range_start_ms, c.time_range_end_ms, c.source_id - FROM mem_tree_chunks c - ORDER BY c.timestamp_ms DESC - LIMIT ?1", - )?; - let rows = stmt - .query_map(params![chunk_budget as i64], |row| { - let id: String = row.get(0)?; - let parent_id: Option = row.get(1)?; - let content: String = row.get(2)?; - let time_range_start_ms: i64 = row.get(3)?; - let time_range_end_ms: i64 = row.get(4)?; - let source_id: String = row.get(5)?; - let label = content - .lines() - .next() - .unwrap_or("") - .chars() - .take(72) - .collect::(); - Ok(( - GraphNode { - kind: "chunk".into(), - id, - label, - tree_kind: None, - tree_scope: None, - tree_id: None, - level: None, - parent_id: parent_id.filter(|s| !s.is_empty()), - child_count: None, - time_range_start_ms: Some(time_range_start_ms), - time_range_end_ms: Some(time_range_end_ms), - file_basename: None, - entity_kind: None, - }, - source_id, - )) - })? - .collect::>>() - .context("collect tree-mode leaf chunk rows")?; - Ok(rows) - })?; - - for (chunk, _source_id) in chunk_nodes { - nodes.push(chunk); - } - } - - Ok((nodes, Vec::new())) -} - -fn scope_display_label(scope: &str) -> String { - if scope.starts_with("github:") { - let repo = scope.strip_prefix("github:").unwrap_or(scope); - format!("GitHub · {repo}") - } else if scope.starts_with("gmail:") { - let account = scope - .strip_prefix("gmail:") - .unwrap_or(scope) - .replace("-at-", "@") - .replace("-dot-", "."); - format!("Gmail · {account}") - } else if scope.starts_with("slack:") { - let channel = scope.strip_prefix("slack:").unwrap_or(scope); - format!("Slack · {channel}") - } else { - scope.to_string() - } -} - -fn document_label(child_id: &str) -> String { - if let Some(sha) = child_id.strip_prefix("commit:") { - format!("commit {}", &sha[..sha.len().min(8)]) - } else if let Some(n) = child_id.strip_prefix("issue:") { - format!("issue #{n}") - } else if let Some(n) = child_id.strip_prefix("pr:") { - format!("PR #{n}") - } else { - child_id.chars().take(40).collect() - } -} - -fn source_id_to_scope(source_id: &str) -> String { - // Chunk source_ids like "gmail:stevent95-at-gmail-dot-com:thread:abc" - // → scope "gmail:stevent95-at-gmail-dot-com" - let parts: Vec<&str> = source_id.splitn(3, ':').collect(); - if parts.len() >= 2 { - format!("{}:{}", parts[0], parts[1]) - } else { - source_id.to_string() - } -} - -/// Contacts mode: every chunk that mentions a person entity, plus the -/// distinct person entities themselves, with one edge per mention. -/// -/// Caps applied to keep the wire payload bounded for large workspaces: -/// at most `MAX_CHUNK_NODES` chunks (most-recent first) and at most -/// `MAX_EDGES` mention edges. Older chunks beyond the cap are dropped -/// — the graph is for orientation, not exhaustive inspection. -fn collect_contacts_graph(cfg: &Config) -> Result<(Vec, Vec)> { - const MAX_CHUNK_NODES: usize = 1500; - const MAX_EDGES: usize = 4000; - - with_connection(cfg, |conn| { - // Pull the chunks that have at least one person mention. The - // `INNER JOIN` keeps orphan chunks (no person entities) out of - // the contacts view — they'd be isolated nodes that add no - // signal. - let mut chunk_stmt = conn.prepare( - "SELECT c.id, c.timestamp_ms, c.content - FROM mem_tree_chunks c - WHERE c.id IN ( - SELECT DISTINCT node_id - FROM mem_tree_entity_index - WHERE entity_kind = 'person' - ) - ORDER BY c.timestamp_ms DESC - LIMIT ?1", - )?; - let chunks: Vec<(String, i64, String)> = chunk_stmt - .query_map(params![MAX_CHUNK_NODES as i64], |row| { - Ok(( - row.get::<_, String>(0)?, - row.get::<_, i64>(1)?, - row.get::<_, String>(2)?, - )) - })? - .collect::>() - .context("collect contacts-mode chunk rows")?; - - let chunk_ids: Vec = chunks.iter().map(|(id, _, _)| id.clone()).collect(); - - // Pull mention edges + distinct contacts, scoped to the - // chunks we already kept and to leaf rows only. Filtering in - // SQL (rather than after a global `LIMIT`) is essential: in a - // busy workspace, unrelated `mem_tree_entity_index` rows - // would otherwise consume the entire `MAX_EDGES` window and - // leave kept chunks with zero contact edges. We build the - // `IN (?, ?, …)` placeholder list dynamically so SQLite can - // index-narrow the search to just the kept chunks before - // applying the cap. - let edges: Vec<(String, String, String)> = if chunk_ids.is_empty() { - Vec::new() - } else { - let placeholders = std::iter::repeat("?") - .take(chunk_ids.len()) - .collect::>() - .join(","); - let sql = format!( - "SELECT entity_id, node_id, surface - FROM mem_tree_entity_index - WHERE entity_kind = 'person' - AND node_kind = 'leaf' - AND node_id IN ({placeholders}) - ORDER BY timestamp_ms DESC - LIMIT ?" - ); - // Bind chunk ids first, then MAX_EDGES last. - let mut bind: Vec = chunk_ids - .iter() - .map(|s| rusqlite::types::Value::Text(s.clone())) - .collect(); - bind.push(rusqlite::types::Value::Integer(MAX_EDGES as i64)); - let mut mention_stmt = conn.prepare(&sql)?; - let rows = mention_stmt - .query_map(rusqlite::params_from_iter(bind), |row| { - Ok(( - row.get::<_, String>(0)?, - row.get::<_, String>(1)?, - row.get::<_, String>(2)?, - )) - })? - .collect::>>() - .context("collect contacts-mode mentions")?; - rows - }; - - let mut edges_out: Vec = Vec::with_capacity(edges.len()); - let mut contacts: std::collections::HashMap = - std::collections::HashMap::new(); - for (entity_id, node_id, surface) in edges { - // First-seen surface wins as the display label — surface - // forms can vary across mentions (e.g. "Alice", "Alice S."). - contacts.entry(entity_id.clone()).or_insert(surface); - edges_out.push(GraphEdge { - from: node_id, - to: entity_id, - }); - } - - let mut nodes: Vec = Vec::with_capacity(chunks.len() + contacts.len()); - for (id, ts, preview) in chunks { - // Trim preview to one line for graph hover legibility. - let label = preview - .lines() - .next() - .unwrap_or("") - .chars() - .take(72) - .collect::(); - nodes.push(GraphNode { - kind: "chunk".into(), - id, - label, - tree_kind: None, - tree_scope: None, - tree_id: None, - level: None, - parent_id: None, - child_count: None, - time_range_start_ms: Some(ts), - time_range_end_ms: Some(ts), - file_basename: None, - entity_kind: None, - }); - } - for (entity_id, surface) in contacts { - nodes.push(GraphNode { - kind: "contact".into(), - id: entity_id, - label: surface, - tree_kind: None, - tree_scope: None, - tree_id: None, - level: None, - parent_id: None, - child_count: None, - time_range_start_ms: None, - time_range_end_ms: None, - file_basename: None, - entity_kind: Some("person".into()), - }); - } - Ok((nodes, edges_out)) - }) -} - -/// Replicate `content_store::paths::sanitize_filename` — colons and other -/// Windows-illegal characters become `-` so the basename matches the -/// on-disk `.md` filename Obsidian needs to open via deep link. -fn sanitize_basename(id: &str) -> String { - id.chars() - .map(|c| match c { - '\\' | '/' | ':' | '*' | '?' | '"' | '<' | '>' | '|' => '-', - other => other, - }) - .collect() -} - -// ── wipe_all (destructive "reset memory" trigger) ─────────────────────── - -/// Response shape for [`wipe_all_rpc`]. Counts everything we touched -/// so the UI can confirm something actually happened. -#[derive(Clone, Debug, Serialize, Deserialize)] -pub struct WipeAllResponse { - /// Number of mem_tree_* SQLite rows deleted across all tables. - pub rows_deleted: u64, - /// Top-level on-disk directories under `/` that we - /// removed (e.g. `["raw", "wiki", "email", "chat", "document", - /// "summaries"]`). - pub dirs_removed: Vec, - /// Composio sync-state KV rows deleted from the unified memory - /// store. Clearing these is what lets the next sync re-fetch - /// every upstream item instead of skipping ones the dedup set - /// already saw. - pub sync_state_cleared: u64, -} - -/// `memory_tree_wipe_all` — destructive reset of every memory-tree -/// artefact owned by this workspace. -/// -/// Three things get wiped, in this order: -/// 1. Every `mem_tree_*` SQLite table (chunks, summaries, trees, -/// buffers, score, entity_index, entity_hotness, jobs). -/// 2. The on-disk content folders under `/` -/// (`raw`, `wiki`, plus the legacy `email` / `chat` / `document` -/// / `summaries` paths). -/// 3. The Composio sync-state KV rows under the -/// `composio-sync-state` namespace in the unified memory store. -/// These hold each provider's per-connection cursor + -/// `synced_ids` dedup set — clearing them is what lets the next -/// sync re-fetch every upstream item instead of skipping the -/// ones it's already seen. -/// -/// Used by the "Reset memory" button in the Memory tab so the user -/// can re-sync from scratch without leaving the app. -pub async fn wipe_all_rpc(config: &Config) -> Result, String> { - let cfg = config.clone(); - let (rows_deleted, sync_state_cleared) = tokio::task::spawn_blocking(move || -> Result<(u64, u64)> { - // Tables to truncate. Order matters: `mem_tree_summaries` and - // `mem_tree_buffers` both have `FOREIGN KEY (tree_id) REFERENCES - // mem_tree_trees(id)` with `PRAGMA foreign_keys = ON`, so trees - // must come AFTER its dependents. Every other table's order is - // free. - const TABLES: &[&str] = &[ - "mem_tree_score", - "mem_tree_entity_index", - "mem_tree_entity_hotness", - "mem_tree_jobs", - "mem_tree_buffers", - "mem_tree_summaries", - "mem_tree_trees", - "mem_tree_chunks", - // Source-level ingest gate. MUST be cleared on wipe: otherwise the - // chunks are gone but `(source_kind, source_id[@version])` stays - // claimed, so the next sync sees `already_ingested` and writes 0 - // chunks / enqueues 0 seal jobs — a wiped source can never - // rebuild. (Previously masked for documents by the old - // delete-first re-ingest path, which has been removed in favour of - // non-destructive versioned ingest.) - "mem_tree_ingested_sources", - ]; - let rows_deleted: u64 = with_connection(&cfg, |conn| { - let tx = conn.unchecked_transaction()?; - let mut total: u64 = 0; - for table in TABLES { - let n = tx - .execute(&format!("DELETE FROM {table}"), []) - .with_context(|| format!("delete from {table}"))?; - total += n as u64; - } - tx.commit()?; - Ok(total) - })?; - - // Composio sync-state lives in the unified memory store - // (`/memory/memory.db`). Open it directly and - // delete every key in the `composio-sync-state` namespace — - // this clears each provider's `cursor` + `synced_ids` set so - // the next sync re-fetches from the beginning. - let sync_state_cleared: u64 = { - let unified_db = cfg.workspace_dir.join("memory").join("memory.db"); - if !unified_db.exists() { - log::debug!( - "[memory_tree::read::wipe] unified memory DB not present — skipping sync-state clear" - ); - 0 - } else { - clear_composio_sync_state(&unified_db) - .context("clear composio-sync-state during wipe_all")? - } - }; - - Ok((rows_deleted, sync_state_cleared)) - }) - .await - .map_err(|e| format!("wipe_all join error: {e}"))? - .map_err(|e| format!("wipe_all: {e:#}"))?; - - // Filesystem cleanup. Each directory is best-effort: if one - // fails (permission denied, path doesn't exist) we keep going - // and report what we managed to remove. `email/` and the - // legacy bare `summaries/` are listed for back-compat — - // workspaces ingested before the raw-archive + wiki/ moves - // still have files there. Fresh installs only ever populate - // `raw/`, `wiki/`, `chat/`, and `document/`. - // - // Use async retry to avoid blocking the executor during Windows sharing violations. - const DIRS: &[&str] = &["raw", "wiki", "chat", "document", "email", "summaries"]; - let content_root = config.memory_tree_content_root(); - let mut dirs_removed: Vec = Vec::new(); - for dir in DIRS { - let path = content_root.join(dir); - let remove_result = crate::openhuman::util::retry_with_backoff_async( - &format!("remove dir {}", dir), - 6, - 200, - || async { - tokio::fs::remove_dir_all(&path) - .await - .context("remove_dir_all") - }, - ) - .await; - - match remove_result { - Ok(()) => dirs_removed.push((*dir).to_string()), - Err(e) => { - let is_not_found = e - .chain() - .find_map(|e| e.downcast_ref::()) - .map_or(false, |ioe| ioe.kind() == std::io::ErrorKind::NotFound); - if !is_not_found { - // Logical name (raw / wiki / chat / ...) is enough - // signal — the absolute path embeds the user's - // home directory. - log::warn!( - "[memory_tree::read::wipe] failed to remove dir={} err={:#}", - dir, - e - ); - } - } - } - } - - let resp = WipeAllResponse { - rows_deleted, - dirs_removed, - sync_state_cleared, - }; - - let log = format!( - "memory_tree::read: wipe_all rows={} dirs={:?} sync_state={}", - resp.rows_deleted, resp.dirs_removed, resp.sync_state_cleared - ); - Ok(RpcOutcome::single_log(resp, log)) -} - -/// Drop every row in the unified memory store's `kv_namespace` table -/// keyed under [`crate::openhuman::composio::providers::sync_state::KV_NAMESPACE`]. -/// -/// We open the SQLite file directly rather than going through -/// [`crate::openhuman::memory_store::client::MemoryClientRef`] so -/// `wipe_all` stays a pure synchronous operation runnable from -/// `spawn_blocking` without dragging in the full memory-store init -/// path. The `kv_namespace` table is created up-front by -/// `UnifiedMemory::new`, so the DELETE is a no-op on a fresh DB -/// rather than an error. -fn clear_composio_sync_state(db_path: &std::path::Path) -> Result { - use crate::openhuman::composio::providers::sync_state::KV_NAMESPACE; - let conn = rusqlite::Connection::open(db_path) - .with_context(|| format!("open unified memory db {}", db_path.display()))?; - let n = conn - .execute( - "DELETE FROM kv_namespace WHERE namespace = ?1", - params![KV_NAMESPACE], - ) - .context("delete composio-sync-state rows")?; - Ok(n as u64) -} - -// ── reset_tree (rebuild summary tree from existing chunks) ────────────── - -/// Response shape for [`reset_tree_rpc`]. -#[derive(Clone, Debug, Serialize, Deserialize)] -pub struct ResetTreeResponse { - /// Tree-state SQLite rows deleted (summaries + trees + buffers + jobs). - pub tree_rows_deleted: u64, - /// Number of `mem_tree_chunks` whose lifecycle_status was reset to - /// `pending_extraction` (i.e. the chunks that will re-enter the - /// extract → score → embed → buffer → seal pipeline). - pub chunks_requeued: u64, - /// Number of `extract_chunk` jobs enqueued (one per chunk in - /// `chunks_requeued`). The job worker picks these up and drives - /// each chunk back through the pipeline; downstream seals - /// happen automatically as L0 buffers fill. - pub jobs_enqueued: u64, -} - -/// `memory_tree_reset_tree` — wipe summary-tree state but keep chunks -/// + raw archive + sync state, then re-enqueue every chunk through -/// the extraction pipeline so the tree rebuilds from scratch. -/// -/// Useful when you've changed the LLM summariser (e.g. flipped from -/// inert fallback to a real Ollama model) and want to re-summarise -/// existing data without paying the upstream sync cost again. -/// -/// Three steps, executed in this order: -/// 1. Truncate `mem_tree_summaries`, `mem_tree_trees`, -/// `mem_tree_buffers`, `mem_tree_jobs`. The tree schema is -/// derived state — chunks are the source of truth. -/// 2. Reset every chunk's `lifecycle_status` to -/// `'pending_extraction'` and enqueue an `extract_chunk` job -/// keyed on the chunk id. The async worker picks each up and -/// re-runs entity extract → score → embed → append-to-buffer. -/// Seals happen automatically as L0 buffers cross the gate. -/// 3. Remove `/wiki/summaries/` on disk so stale -/// `.md` files don't drift from the SQL truth. Done last (and -/// outside `spawn_blocking`) so the on-disk removal can use -/// async retry without blocking the worker thread. -pub async fn reset_tree_rpc(config: &Config) -> Result, String> { - use crate::openhuman::memory_queue::store as jobs_store; - use crate::openhuman::memory_queue::types::{ExtractChunkPayload, NewJob}; - - let cfg = config.clone(); - let (tree_rows_deleted, chunks_requeued, jobs_enqueued) = - tokio::task::spawn_blocking(move || -> Result<(u64, u64, u64)> { - // Step 1 — truncate tree state in one transaction. - const TREE_TABLES: &[&str] = &[ - "mem_tree_summaries", - "mem_tree_buffers", - "mem_tree_jobs", - "mem_tree_entity_index", - "mem_tree_trees", - ]; - let tree_rows_deleted: u64 = with_connection(&cfg, |conn| { - let tx = conn.unchecked_transaction()?; - let mut total: u64 = 0; - for table in TREE_TABLES { - let n = tx - .execute(&format!("DELETE FROM {table}"), []) - .with_context(|| format!("delete from {table}"))?; - total += n as u64; - } - tx.commit()?; - Ok(total) - })?; - - // Step 2 — flip every chunk back to `pending_extraction` and - // enqueue an `extract_chunk` job per id. - let (chunks_requeued, jobs_enqueued) = - with_connection(&cfg, |conn| -> anyhow::Result<(u64, u64)> { - let tx = conn.unchecked_transaction()?; - let chunks_requeued = tx.execute( - "UPDATE mem_tree_chunks SET lifecycle_status = 'pending_extraction'", - [], - )? as u64; - let chunk_ids: Vec = { - let mut stmt = tx.prepare("SELECT id FROM mem_tree_chunks")?; - let rows = stmt - .query_map([], |r| r.get::<_, String>(0))? - .collect::>>() - .context("collect chunk ids")?; - rows - }; - let mut jobs_enqueued: u64 = 0; - for id in &chunk_ids { - let payload = ExtractChunkPayload { - chunk_id: id.clone(), - }; - let job = NewJob::extract_chunk(&payload) - .context("build extract_chunk NewJob")?; - if jobs_store::enqueue_tx(&tx, &job) - .context("enqueue extract_chunk")? - .is_some() - { - jobs_enqueued += 1; - } - } - tx.commit()?; - Ok((chunks_requeued, jobs_enqueued)) - })?; - - Ok((tree_rows_deleted, chunks_requeued, jobs_enqueued)) - }) - .await - .map_err(|e| format!("reset_tree join error: {e}"))? - .map_err(|e| format!("reset_tree: {e:#}"))?; - - // Step 3 — wipe the on-disk wiki/summaries tree. - // Use async retry to avoid blocking the executor during Windows sharing violations. - let summaries_dir = config - .memory_tree_content_root() - .join("wiki") - .join("summaries"); - let remove_result = crate::openhuman::util::retry_with_backoff_async( - "remove wiki/summaries", - 6, - 200, - || async { - tokio::fs::remove_dir_all(&summaries_dir) - .await - .context("remove_dir_all") - }, - ) - .await; - - match remove_result { - Ok(()) => log::debug!("[memory_tree::read::reset_tree] removed wiki/summaries"), - Err(e) => { - let is_not_found = e - .chain() - .find_map(|e| e.downcast_ref::()) - .map_or(false, |ioe| ioe.kind() == std::io::ErrorKind::NotFound); - if !is_not_found { - log::warn!( - "[memory_tree::read::reset_tree] failed to remove wiki/summaries: {:#}", - e - ) - } - } - } - - // Wake the worker pool. Done after the on-disk cleanup so jobs don't - // start racing against an in-progress directory removal; the small - // delay (at most the retry window on Windows) is acceptable. - crate::openhuman::memory_queue::wake_workers(); - - let resp = ResetTreeResponse { - tree_rows_deleted, - chunks_requeued, - jobs_enqueued, - }; - - let log = format!( - "memory_tree::read: reset_tree tree_rows={} chunks={} jobs={}", - resp.tree_rows_deleted, resp.chunks_requeued, resp.jobs_enqueued - ); - Ok(RpcOutcome::single_log(resp, log)) -} - -// ── flush_source_tree (per-source immediate seal) ─────────────────────── - -#[derive(Clone, Debug, Serialize, Deserialize)] -pub struct FlushSourceTreeResponse { - pub tree_scope: String, - pub seals_fired: u32, -} - -/// `memory_tree_flush_source` — seal one source tree's L0 buffer immediately, -/// bypassing the job queue. Mutex per tree_scope so concurrent clicks are -/// serialised. -pub async fn flush_source_tree_rpc( - config: &Config, - source_scope: &str, -) -> Result, String> { - use crate::openhuman::memory::tree_source::get_or_create_source_tree; - use crate::openhuman::memory_tree::tree::bucket_seal::LabelStrategy; - use crate::openhuman::memory_tree::tree::flush::force_flush_tree; - use crate::openhuman::memory_tree::tree::TreeFactory; - use std::collections::HashSet; - use std::sync::Mutex; - - static ACTIVE: std::sync::LazyLock>> = - std::sync::LazyLock::new(|| Mutex::new(HashSet::new())); - - let scope = source_scope.to_string(); - - { - let mut active = ACTIVE.lock().unwrap_or_else(|e| e.into_inner()); - if !active.insert(scope.clone()) { - return Ok(RpcOutcome::single_log( - FlushSourceTreeResponse { - tree_scope: scope, - seals_fired: 0, - }, - "memory_tree::read: flush_source_tree already running for this scope".to_string(), - )); - } - } - - let cfg = config.clone(); - let scope_for_task = scope.clone(); - let result = tokio::task::spawn_blocking(move || -> Result { - let tree = get_or_create_source_tree(&cfg, &scope_for_task) - .context("get_or_create_source_tree")?; - let strategy = TreeFactory::from_tree(&tree).label_strategy(&cfg); - Ok(FlushSourceTreeResponse { - tree_scope: scope_for_task, - seals_fired: 0, - }) - }) - .await - .map_err(|e| format!("flush_source_tree join error: {e}"))?; - - let tree_info = result.map_err(|e| format!("flush_source_tree: {e:#}"))?; - - let cfg2 = config.clone(); - let scope2 = scope.clone(); - let resp = tokio::spawn(async move { - let tree = get_or_create_source_tree(&cfg2, &scope2)?; - let strategy = TreeFactory::from_tree(&tree).label_strategy(&cfg2); - let sealed = force_flush_tree(&cfg2, &tree.id, Some(chrono::Utc::now()), &strategy).await?; - Ok::<_, anyhow::Error>(FlushSourceTreeResponse { - tree_scope: scope2, - seals_fired: sealed.len() as u32, - }) - }) - .await - .map_err(|e| format!("flush_source_tree join error: {e}"))? - .map_err(|e| format!("flush_source_tree: {e:#}"))?; - - { - let mut active = ACTIVE.lock().unwrap_or_else(|e| e.into_inner()); - active.remove(&scope); - } - - let log = format!( - "memory_tree::read: flush_source_tree scope={} seals={}", - resp.tree_scope, resp.seals_fired - ); - Ok(RpcOutcome::single_log(resp, log)) -} - -// ── flush_now (manual "Build summary trees" trigger) ──────────────────── - -/// Response shape for [`flush_now_rpc`]. -#[derive(Clone, Debug, Serialize, Deserialize)] -pub struct FlushNowResponse { - /// `true` when a fresh job row was inserted; `false` when the - /// dedupe key already had an active flush job for today (the - /// existing job will pick up the same buffers). - pub enqueued: bool, - /// Number of L0 buffers that currently qualify for force-seal under - /// `max_age_secs = 0` — i.e. every non-empty L0 buffer in the - /// workspace. Echoed back so the UI can show "Sealing N buffers…" - /// without waiting for the worker to drain. - pub stale_buffers: u32, -} - -/// `memory_tree_flush_now` — UI-facing "Build summary trees" trigger. -/// -/// Enqueues a `flush_stale` job with `max_age_secs = 0` so every L0 -/// buffer (raw-leaf frontier of every source tree) gets force-sealed -/// regardless of its age. The seal worker picks up the new summary -/// nodes, runs them through the configured summariser (cloud or local -/// depending on `memory_tree.llm_backend`), and persists the new L1+ -/// summaries — i.e. the tree gets built using the user's chosen AI. -/// -/// Idempotent: the dedupe key is `flush_stale:-h` -/// where `` is the current 3-hour UTC block (0..=7), so -/// spamming the button within the same window doesn't queue duplicates. -pub async fn flush_now_rpc(config: &Config) -> Result, String> { - use crate::openhuman::memory_queue::store as jobs_store; - use crate::openhuman::memory_queue::types::{FlushStalePayload, NewJob}; - use crate::openhuman::memory_tree::tree::store as tree_store; - - let cfg = config.clone(); - let resp = tokio::task::spawn_blocking(move || -> Result { - // Probe how many L0 buffers currently qualify (cutoff "now" = - // every buffer with at least one item) for the response payload. - let stale = tree_store::list_stale_buffers(&cfg, chrono::Utc::now()) - .context("list stale buffers")?; - let stale_buffers = stale.len() as u32; - - let payload = FlushStalePayload { - max_age_secs: Some(0), - }; - let now = chrono::Utc::now(); - let date_iso = now.format("%Y-%m-%d").to_string(); - let hour_block = chrono::Timelike::hour(&now) / 3; - let job = NewJob::flush_stale(&payload, &date_iso, hour_block) - .context("build flush_stale NewJob")?; - let enqueued = jobs_store::enqueue(&cfg, &job) - .context("enqueue flush_stale job")? - .is_some(); - Ok(FlushNowResponse { - enqueued, - stale_buffers, - }) - }) - .await - .map_err(|e| format!("flush_now join error: {e}"))? - .map_err(|e| format!("flush_now: {e:#}"))?; - - let log = format!( - "memory_tree::read: flush_now enqueued={} stale_buffers={}", - resp.enqueued, resp.stale_buffers - ); - Ok(RpcOutcome::single_log(resp, log)) -} - -// ── small helpers ─────────────────────────────────────────────────────── - -/// Fetch the raw `mem_tree_chunks` row plus a content preview, suitable -/// for building a [`ChunkRow`]. Used by [`chunk_store::get_chunk`] callers -/// who don't want to walk all the way back through the existing read -/// path. Currently unused publicly — kept for the JSON-RPC layer to call -/// when wiring per-id reads. -#[allow(dead_code)] -pub(crate) fn read_chunk_row(config: &Config, chunk_id: &str) -> Result> { - let chunk = match chunk_store::get_chunk(config, chunk_id)? { - Some(c) => c, - None => return Ok(None), - }; - // Try to load the full body for the preview, falling back to whatever - // SQLite has if the on-disk file is missing. - let body = - content_read::read_chunk_body(config, chunk_id).unwrap_or_else(|_| chunk.content.clone()); - let preview: String = body.chars().take(PREVIEW_MAX_CHARS).collect(); - let has_embedding = chunk_store::get_chunk_embedding(config, chunk_id)?.is_some(); - Ok(Some(ChunkRow { - id: chunk.id, - source_kind: chunk.metadata.source_kind.as_str().to_string(), - source_id: chunk.metadata.source_id, - source_ref: chunk.metadata.source_ref.map(|r| r.value), - owner: chunk.metadata.owner, - timestamp_ms: chunk.metadata.timestamp.timestamp_millis(), - token_count: chunk.token_count, - lifecycle_status: chunk_store::get_chunk_lifecycle_status(config, chunk_id)? - .unwrap_or_else(|| "unknown".to_string()), - content_path: chunk_store::get_chunk_content_path(config, chunk_id)?, - content_preview: if preview.is_empty() { - None - } else { - Some(preview) - }, - has_embedding, - tags: chunk.metadata.tags, - })) -} - -#[allow(dead_code)] -fn parse_source_kind_str(s: &str) -> Option { - SourceKind::parse(s).ok() -} - -// ── Tests ──────────────────────────────────────────────────────────────── - -#[cfg(test)] -#[path = "read_rpc_tests.rs"] -mod tests; diff --git a/src/openhuman/memory/read_rpc/admin.rs b/src/openhuman/memory/read_rpc/admin.rs new file mode 100644 index 0000000000..175b8f3cb6 --- /dev/null +++ b/src/openhuman/memory/read_rpc/admin.rs @@ -0,0 +1,347 @@ +use anyhow::{Context, Result}; +use rusqlite::params; + +use crate::openhuman::config::Config; +use crate::openhuman::memory_store::chunks::store::with_connection; +use crate::rpc::RpcOutcome; + +use super::types::{FlushNowResponse, FlushSourceTreeResponse, ResetTreeResponse, WipeAllResponse}; + +// ── wipe_all ───────────────────────────────────────────────────────────── + +pub async fn wipe_all_rpc(config: &Config) -> Result, String> { + let cfg = config.clone(); + let (rows_deleted, sync_state_cleared) = + tokio::task::spawn_blocking(move || -> Result<(u64, u64)> { + const TABLES: &[&str] = &[ + "mem_tree_score", + "mem_tree_entity_index", + "mem_tree_entity_hotness", + "mem_tree_jobs", + "mem_tree_buffers", + "mem_tree_summaries", + "mem_tree_trees", + "mem_tree_chunks", + "mem_tree_ingested_sources", + ]; + let rows_deleted: u64 = with_connection(&cfg, |conn| { + let tx = conn.unchecked_transaction()?; + let mut total: u64 = 0; + for table in TABLES { + let n = tx + .execute(&format!("DELETE FROM {table}"), []) + .with_context(|| format!("delete from {table}"))?; + total += n as u64; + } + tx.commit()?; + Ok(total) + })?; + + let sync_state_cleared: u64 = { + let unified_db = cfg.workspace_dir.join("memory").join("memory.db"); + if !unified_db.exists() { + log::debug!( + "[memory_tree::read::wipe] unified memory DB not present — skipping sync-state clear" + ); + 0 + } else { + clear_composio_sync_state(&unified_db) + .context("clear composio-sync-state during wipe_all")? + } + }; + + Ok((rows_deleted, sync_state_cleared)) + }) + .await + .map_err(|e| format!("wipe_all join error: {e}"))? + .map_err(|e| format!("wipe_all: {e:#}"))?; + + const DIRS: &[&str] = &["raw", "wiki", "chat", "document", "email", "summaries"]; + let content_root = config.memory_tree_content_root(); + let mut dirs_removed: Vec = Vec::new(); + for dir in DIRS { + let path = content_root.join(dir); + let remove_result = crate::openhuman::util::retry_with_backoff_async( + &format!("remove dir {}", dir), + 6, + 200, + || async { + tokio::fs::remove_dir_all(&path) + .await + .context("remove_dir_all") + }, + ) + .await; + + match remove_result { + Ok(()) => dirs_removed.push((*dir).to_string()), + Err(e) => { + let is_not_found = e + .chain() + .find_map(|e| e.downcast_ref::()) + .map_or(false, |ioe| ioe.kind() == std::io::ErrorKind::NotFound); + if !is_not_found { + log::warn!( + "[memory_tree::read::wipe] failed to remove dir={} err={:#}", + dir, + e + ); + } + } + } + } + + let resp = WipeAllResponse { + rows_deleted, + dirs_removed, + sync_state_cleared, + }; + + let log = format!( + "memory_tree::read: wipe_all rows={} dirs={:?} sync_state={}", + resp.rows_deleted, resp.dirs_removed, resp.sync_state_cleared + ); + Ok(RpcOutcome::single_log(resp, log)) +} + +pub(crate) fn clear_composio_sync_state(db_path: &std::path::Path) -> Result { + use crate::openhuman::composio::providers::sync_state::KV_NAMESPACE; + let conn = rusqlite::Connection::open(db_path) + .with_context(|| format!("open unified memory db {}", db_path.display()))?; + let n = conn + .execute( + "DELETE FROM kv_namespace WHERE namespace = ?1", + params![KV_NAMESPACE], + ) + .context("delete composio-sync-state rows")?; + Ok(n as u64) +} + +// ── reset_tree ─────────────────────────────────────────────────────────── + +pub async fn reset_tree_rpc(config: &Config) -> Result, String> { + use crate::openhuman::memory_queue::store as jobs_store; + use crate::openhuman::memory_queue::types::{ExtractChunkPayload, NewJob}; + + let cfg = config.clone(); + let (tree_rows_deleted, chunks_requeued, jobs_enqueued) = + tokio::task::spawn_blocking(move || -> Result<(u64, u64, u64)> { + const TREE_TABLES: &[&str] = &[ + "mem_tree_summaries", + "mem_tree_buffers", + "mem_tree_jobs", + "mem_tree_entity_index", + "mem_tree_trees", + ]; + let tree_rows_deleted: u64 = with_connection(&cfg, |conn| { + let tx = conn.unchecked_transaction()?; + let mut total: u64 = 0; + for table in TREE_TABLES { + let n = tx + .execute(&format!("DELETE FROM {table}"), []) + .with_context(|| format!("delete from {table}"))?; + total += n as u64; + } + tx.commit()?; + Ok(total) + })?; + + let (chunks_requeued, jobs_enqueued) = + with_connection(&cfg, |conn| -> anyhow::Result<(u64, u64)> { + let tx = conn.unchecked_transaction()?; + let chunks_requeued = tx.execute( + "UPDATE mem_tree_chunks SET lifecycle_status = 'pending_extraction'", + [], + )? as u64; + let chunk_ids: Vec = { + let mut stmt = tx.prepare("SELECT id FROM mem_tree_chunks")?; + let rows = stmt + .query_map([], |r| r.get::<_, String>(0))? + .collect::>>() + .context("collect chunk ids")?; + rows + }; + let mut jobs_enqueued: u64 = 0; + for id in &chunk_ids { + let payload = ExtractChunkPayload { + chunk_id: id.clone(), + }; + let job = NewJob::extract_chunk(&payload) + .context("build extract_chunk NewJob")?; + if jobs_store::enqueue_tx(&tx, &job) + .context("enqueue extract_chunk")? + .is_some() + { + jobs_enqueued += 1; + } + } + tx.commit()?; + Ok((chunks_requeued, jobs_enqueued)) + })?; + + Ok((tree_rows_deleted, chunks_requeued, jobs_enqueued)) + }) + .await + .map_err(|e| format!("reset_tree join error: {e}"))? + .map_err(|e| format!("reset_tree: {e:#}"))?; + + let summaries_dir = config + .memory_tree_content_root() + .join("wiki") + .join("summaries"); + let remove_result = crate::openhuman::util::retry_with_backoff_async( + "remove wiki/summaries", + 6, + 200, + || async { + tokio::fs::remove_dir_all(&summaries_dir) + .await + .context("remove_dir_all") + }, + ) + .await; + + match remove_result { + Ok(()) => log::debug!("[memory_tree::read::reset_tree] removed wiki/summaries"), + Err(e) => { + let is_not_found = e + .chain() + .find_map(|e| e.downcast_ref::()) + .map_or(false, |ioe| ioe.kind() == std::io::ErrorKind::NotFound); + if !is_not_found { + log::warn!( + "[memory_tree::read::reset_tree] failed to remove wiki/summaries: {:#}", + e + ) + } + } + } + + crate::openhuman::memory_queue::wake_workers(); + + let resp = ResetTreeResponse { + tree_rows_deleted, + chunks_requeued, + jobs_enqueued, + }; + + let log = format!( + "memory_tree::read: reset_tree tree_rows={} chunks={} jobs={}", + resp.tree_rows_deleted, resp.chunks_requeued, resp.jobs_enqueued + ); + Ok(RpcOutcome::single_log(resp, log)) +} + +// ── flush_source_tree ──────────────────────────────────────────────────── + +pub async fn flush_source_tree_rpc( + config: &Config, + source_scope: &str, +) -> Result, String> { + use crate::openhuman::memory::tree_source::get_or_create_source_tree; + use crate::openhuman::memory_tree::tree::bucket_seal::LabelStrategy; + use crate::openhuman::memory_tree::tree::flush::force_flush_tree; + use crate::openhuman::memory_tree::tree::TreeFactory; + use std::collections::HashSet; + use std::sync::Mutex; + + static ACTIVE: std::sync::LazyLock>> = + std::sync::LazyLock::new(|| Mutex::new(HashSet::new())); + + let scope = source_scope.to_string(); + + { + let mut active = ACTIVE.lock().unwrap_or_else(|e| e.into_inner()); + if !active.insert(scope.clone()) { + return Ok(RpcOutcome::single_log( + FlushSourceTreeResponse { + tree_scope: scope, + seals_fired: 0, + }, + "memory_tree::read: flush_source_tree already running for this scope".to_string(), + )); + } + } + + let cfg = config.clone(); + let scope_for_task = scope.clone(); + let result = tokio::task::spawn_blocking(move || -> Result { + let tree = get_or_create_source_tree(&cfg, &scope_for_task) + .context("get_or_create_source_tree")?; + let _strategy = TreeFactory::from_tree(&tree).label_strategy(&cfg); + Ok(FlushSourceTreeResponse { + tree_scope: scope_for_task, + seals_fired: 0, + }) + }) + .await + .map_err(|e| format!("flush_source_tree join error: {e}"))?; + + let _tree_info = result.map_err(|e| format!("flush_source_tree: {e:#}"))?; + + let cfg2 = config.clone(); + let scope2 = scope.clone(); + let resp = tokio::spawn(async move { + let tree = get_or_create_source_tree(&cfg2, &scope2)?; + let strategy = TreeFactory::from_tree(&tree).label_strategy(&cfg2); + let sealed = force_flush_tree(&cfg2, &tree.id, Some(chrono::Utc::now()), &strategy).await?; + Ok::<_, anyhow::Error>(FlushSourceTreeResponse { + tree_scope: scope2, + seals_fired: sealed.len() as u32, + }) + }) + .await + .map_err(|e| format!("flush_source_tree join error: {e}"))? + .map_err(|e| format!("flush_source_tree: {e:#}"))?; + + { + let mut active = ACTIVE.lock().unwrap_or_else(|e| e.into_inner()); + active.remove(&scope); + } + + let log = format!( + "memory_tree::read: flush_source_tree scope={} seals={}", + resp.tree_scope, resp.seals_fired + ); + Ok(RpcOutcome::single_log(resp, log)) +} + +// ── flush_now ───────────────────────────────────────────────────────────── + +pub async fn flush_now_rpc(config: &Config) -> Result, String> { + use crate::openhuman::memory_queue::store as jobs_store; + use crate::openhuman::memory_queue::types::{FlushStalePayload, NewJob}; + use crate::openhuman::memory_tree::tree::store as tree_store; + + let cfg = config.clone(); + let resp = tokio::task::spawn_blocking(move || -> Result { + let stale = tree_store::list_stale_buffers(&cfg, chrono::Utc::now()) + .context("list stale buffers")?; + let stale_buffers = stale.len() as u32; + + let payload = FlushStalePayload { + max_age_secs: Some(0), + }; + let now = chrono::Utc::now(); + let date_iso = now.format("%Y-%m-%d").to_string(); + let hour_block = chrono::Timelike::hour(&now) / 3; + let job = NewJob::flush_stale(&payload, &date_iso, hour_block) + .context("build flush_stale NewJob")?; + let enqueued = jobs_store::enqueue(&cfg, &job) + .context("enqueue flush_stale job")? + .is_some(); + Ok(FlushNowResponse { + enqueued, + stale_buffers, + }) + }) + .await + .map_err(|e| format!("flush_now join error: {e}"))? + .map_err(|e| format!("flush_now: {e:#}"))?; + + let log = format!( + "memory_tree::read: flush_now enqueued={} stale_buffers={}", + resp.enqueued, resp.stale_buffers + ); + Ok(RpcOutcome::single_log(resp, log)) +} diff --git a/src/openhuman/memory/read_rpc/chunks.rs b/src/openhuman/memory/read_rpc/chunks.rs new file mode 100644 index 0000000000..b4e8ebf1a6 --- /dev/null +++ b/src/openhuman/memory/read_rpc/chunks.rs @@ -0,0 +1,434 @@ +use anyhow::{Context, Result}; + +use crate::openhuman::config::Config; +use crate::openhuman::memory_store::chunks::store::{self as chunk_store, with_connection}; +use crate::openhuman::memory_store::content::read as content_read; +use crate::openhuman::memory_tree::retrieval::types::NodeKind; +use crate::rpc::RpcOutcome; + +use super::types::{ + ChunkFilter, ChunkRow, ListChunksResponse, RecallResponse, Source, DEFAULT_LIST_LIMIT, + MAX_LIST_LIMIT, PREVIEW_MAX_CHARS, +}; + +// ── list_chunks ────────────────────────────────────────────────────────── + +pub async fn list_chunks_rpc( + config: &Config, + filter: ChunkFilter, +) -> Result, String> { + let cfg = config.clone(); + let resp = tokio::task::spawn_blocking(move || -> Result { + list_chunks_blocking(&cfg, &filter) + }) + .await + .map_err(|e| format!("list_chunks join error: {e}"))? + .map_err(|e| format!("list_chunks: {e:#}"))?; + + let n = resp.chunks.len(); + let total = resp.total; + Ok(RpcOutcome::single_log( + resp, + format!("memory_tree::read: list_chunks n={n} total={total}"), + )) +} + +pub(super) fn list_chunks_blocking( + config: &Config, + filter: &ChunkFilter, +) -> Result { + let limit = filter + .limit + .unwrap_or(DEFAULT_LIST_LIMIT) + .clamp(1, MAX_LIST_LIMIT); + let offset = filter.offset.unwrap_or(0); + + with_connection(config, |conn| { + let mut sql = String::from( + "SELECT DISTINCT + c.id, c.source_kind, c.source_id, c.source_ref, c.owner, + c.timestamp_ms, c.token_count, c.lifecycle_status, + c.content_path, c.content, c.tags_json, + CASE WHEN c.embedding IS NULL THEN 0 ELSE 1 END AS has_embedding + FROM mem_tree_chunks c", + ); + let mut where_clauses: Vec = vec![]; + let mut params_owned: Vec> = Vec::new(); + + if let Some(eids) = &filter.entity_ids { + if !eids.is_empty() { + sql.push_str(" INNER JOIN mem_tree_entity_index ei ON ei.node_id = c.id"); + let placeholders: Vec = (0..eids.len()).map(|_| "?".to_string()).collect(); + where_clauses.push(format!("ei.entity_id IN ({})", placeholders.join(", "))); + for eid in eids { + params_owned.push(Box::new(eid.clone())); + } + } + } + if let Some(kinds) = &filter.source_kinds { + if !kinds.is_empty() { + let placeholders: Vec = (0..kinds.len()).map(|_| "?".to_string()).collect(); + where_clauses.push(format!("c.source_kind IN ({})", placeholders.join(", "))); + for k in kinds { + params_owned.push(Box::new(k.clone())); + } + } + } + if let Some(sids) = &filter.source_ids { + if !sids.is_empty() { + let placeholders: Vec = (0..sids.len()).map(|_| "?".to_string()).collect(); + where_clauses.push(format!("c.source_id IN ({})", placeholders.join(", "))); + for s in sids { + params_owned.push(Box::new(s.clone())); + } + } + } + if let Some(since) = filter.since_ms { + where_clauses.push("c.timestamp_ms >= ?".into()); + params_owned.push(Box::new(since)); + } + if let Some(until) = filter.until_ms { + where_clauses.push("c.timestamp_ms <= ?".into()); + params_owned.push(Box::new(until)); + } + if let Some(query) = &filter.query { + let q = query.trim(); + if !q.is_empty() { + where_clauses.push("c.content LIKE ?".into()); + params_owned.push(Box::new(format!("%{}%", q))); + } + } + + if !where_clauses.is_empty() { + sql.push_str(" WHERE "); + sql.push_str(&where_clauses.join(" AND ")); + } + let count_sql = format!( + "SELECT COUNT(*) FROM ({}) AS sub", + sql.replacen( + "SELECT DISTINCT\n c.id, c.source_kind, c.source_id, c.source_ref, c.owner,\n c.timestamp_ms, c.token_count, c.lifecycle_status,\n c.content_path, c.content, c.tags_json,\n CASE WHEN c.embedding IS NULL THEN 0 ELSE 1 END AS has_embedding", + "SELECT DISTINCT c.id", + 1 + ) + ); + + sql.push_str(" ORDER BY c.timestamp_ms DESC, c.seq_in_source ASC LIMIT ? OFFSET ?"); + params_owned.push(Box::new(limit as i64)); + params_owned.push(Box::new(offset as i64)); + + let count_params: Vec<&dyn rusqlite::ToSql> = params_owned + .iter() + .take(params_owned.len() - 2) + .map(|b| b.as_ref() as &dyn rusqlite::ToSql) + .collect(); + let total: i64 = conn + .query_row(&count_sql, count_params.as_slice(), |r| r.get(0)) + .context("count chunks")?; + + let mut stmt = conn.prepare(&sql).context("prepare list_chunks")?; + let param_refs: Vec<&dyn rusqlite::ToSql> = params_owned + .iter() + .map(|b| b.as_ref() as &dyn rusqlite::ToSql) + .collect(); + let rows = stmt + .query_map(param_refs.as_slice(), |row| { + let id: String = row.get(0)?; + let source_kind: String = row.get(1)?; + let source_id: String = row.get(2)?; + let source_ref: Option = row.get(3)?; + let owner: String = row.get(4)?; + let timestamp_ms: i64 = row.get(5)?; + let token_count: i64 = row.get(6)?; + let lifecycle_status: String = row.get(7)?; + let content_path: Option = row.get(8)?; + let content: String = row.get(9)?; + let tags_json: String = row.get(10)?; + let has_embedding: i64 = row.get(11)?; + let preview: String = content.chars().take(PREVIEW_MAX_CHARS).collect(); + let tags: Vec = serde_json::from_str(&tags_json).unwrap_or_default(); + Ok(ChunkRow { + id, + source_kind, + source_id, + source_ref, + owner, + timestamp_ms, + token_count: token_count.max(0) as u32, + lifecycle_status, + content_path, + content_preview: if preview.is_empty() { + None + } else { + Some(preview) + }, + has_embedding: has_embedding != 0, + tags, + }) + })? + .collect::>>() + .context("collect list_chunks rows")?; + + Ok(ListChunksResponse { + chunks: rows, + total: total.max(0) as u64, + }) + }) +} + +// ── list_sources ───────────────────────────────────────────────────────── + +pub async fn list_sources_rpc( + config: &Config, + user_email_hint: Option, +) -> Result>, String> { + let cfg = config.clone(); + let sources = tokio::task::spawn_blocking(move || -> Result> { + list_sources_blocking(&cfg, user_email_hint.as_deref()) + }) + .await + .map_err(|e| format!("list_sources join error: {e}"))? + .map_err(|e| format!("list_sources: {e:#}"))?; + + let n = sources.len(); + Ok(RpcOutcome::single_log( + sources, + format!("memory_tree::read: list_sources n={n}"), + )) +} + +fn list_sources_blocking(config: &Config, user_email_hint: Option<&str>) -> Result> { + with_connection(config, |conn| { + let mut stmt = conn.prepare( + "SELECT source_kind, source_id, COUNT(*) AS n, MAX(timestamp_ms) AS most_recent + FROM mem_tree_chunks + GROUP BY source_kind, source_id + ORDER BY most_recent DESC", + )?; + let rows = stmt + .query_map([], |row| { + let source_kind: String = row.get(0)?; + let source_id: String = row.get(1)?; + let n: i64 = row.get(2)?; + let most_recent: i64 = row.get(3)?; + let display_name = display_name_for_source(&source_id, user_email_hint); + Ok(Source { + source_id, + display_name, + source_kind, + chunk_count: n.max(0) as u32, + most_recent_ms: most_recent, + }) + })? + .collect::>>() + .context("collect list_sources rows")?; + Ok(rows) + }) +} + +/// Compute the display name for a source. +/// +/// Examples: +/// - `slack:#engineering` → `#engineering` +/// - `gmail:alice@example.com|bob@example.com` (user is alice) → `bob@example.com` +/// - `gmail:alice@example.com|bob@example.com` (user unknown) → +/// `alice@example.com ↔ bob@example.com` +pub fn display_name_for_source(source_id: &str, user_email_hint: Option<&str>) -> String { + let body = match source_id.split_once(':') { + Some((_platform, rest)) => rest, + None => source_id, + }; + if body.contains('|') { + let parts: Vec<&str> = body.split('|').collect(); + if let Some(user) = user_email_hint { + let user_lc = user.trim().to_ascii_lowercase(); + let others: Vec<&str> = parts + .iter() + .copied() + .filter(|p| p.trim().to_ascii_lowercase() != user_lc) + .collect(); + if !others.is_empty() && others.len() < parts.len() { + return others.join(", "); + } + } + return parts.join(" ↔ "); + } + body.to_string() +} + +// ── search / recall ────────────────────────────────────────────────────── + +pub async fn search_rpc( + config: &Config, + query: String, + k: u32, +) -> Result>, String> { + let limit = k.clamp(1, MAX_LIST_LIMIT); + let filter = ChunkFilter { + query: Some(query.clone()), + limit: Some(limit), + ..ChunkFilter::default() + }; + let cfg = config.clone(); + let chunks = tokio::task::spawn_blocking(move || -> Result> { + Ok(list_chunks_blocking(&cfg, &filter)?.chunks) + }) + .await + .map_err(|e| format!("search join error: {e}"))? + .map_err(|e| format!("search: {e:#}"))?; + + let n = chunks.len(); + Ok(RpcOutcome::single_log( + chunks, + format!("memory_tree::read: search query_len={} n={n}", query.len()), + )) +} + +pub async fn recall_rpc( + config: &Config, + query: String, + k: u32, +) -> Result, String> { + use rusqlite::params; + + let limit = k.clamp(1, MAX_LIST_LIMIT) as usize; + log::debug!( + "[memory_tree::read::recall] query_len={} k={}", + query.len(), + limit + ); + + let resp = crate::openhuman::memory_tree::retrieval::query_source( + config, + None, + None, + None, + Some(query.as_str()), + limit, + ) + .await + .map_err(|e| format!("recall query_source: {e:#}"))?; + + let mut chunk_rows: Vec = Vec::new(); + let mut scores: Vec = Vec::new(); + let cfg = config.clone(); + let leaves: Vec<(String, f32)> = resp + .hits + .into_iter() + .filter(|h| matches!(h.node_kind, NodeKind::Summary) && h.level == 1) + .flat_map(|h| { + h.child_ids + .into_iter() + .map(move |id| (id, h.score)) + .collect::>() + }) + .collect(); + if !leaves.is_empty() { + let collected = tokio::task::spawn_blocking(move || -> Result> { + with_connection(&cfg, |conn| { + let mut out = Vec::with_capacity(leaves.len()); + for (chunk_id, score) in leaves { + let row = conn + .query_row( + "SELECT id, source_kind, source_id, source_ref, owner, + timestamp_ms, token_count, lifecycle_status, + content_path, content, tags_json, + CASE WHEN embedding IS NULL THEN 0 ELSE 1 END + FROM mem_tree_chunks WHERE id = ?1", + params![chunk_id], + |r| { + let id: String = r.get(0)?; + let source_kind: String = r.get(1)?; + let source_id: String = r.get(2)?; + let source_ref: Option = r.get(3)?; + let owner: String = r.get(4)?; + let timestamp_ms: i64 = r.get(5)?; + let token_count: i64 = r.get(6)?; + let lifecycle_status: String = r.get(7)?; + let content_path: Option = r.get(8)?; + let content: String = r.get(9)?; + let tags_json: String = r.get(10)?; + let has_emb: i64 = r.get(11)?; + let preview: String = + content.chars().take(PREVIEW_MAX_CHARS).collect(); + let tags: Vec = + serde_json::from_str(&tags_json).unwrap_or_default(); + Ok(ChunkRow { + id, + source_kind, + source_id, + source_ref, + owner, + timestamp_ms, + token_count: token_count.max(0) as u32, + lifecycle_status, + content_path, + content_preview: if preview.is_empty() { + None + } else { + Some(preview) + }, + has_embedding: has_emb != 0, + tags, + }) + }, + ) + .ok(); + if let Some(r) = row { + out.push((r, score)); + } + } + Ok(out) + }) + }) + .await + .map_err(|e| format!("recall join error: {e}"))? + .map_err(|e| format!("recall hydrate: {e:#}"))?; + for (row, sc) in collected { + chunk_rows.push(row); + scores.push(sc); + } + } + chunk_rows.truncate(limit); + scores.truncate(limit); + + let n = chunk_rows.len(); + Ok(RpcOutcome::single_log( + RecallResponse { + chunks: chunk_rows, + scores, + }, + format!("memory_tree::read: recall n={n}"), + )) +} + +// ── small helpers ─────────────────────────────────────────────────────── + +pub fn read_chunk_row(config: &Config, chunk_id: &str) -> Result> { + let chunk = match chunk_store::get_chunk(config, chunk_id)? { + Some(c) => c, + None => return Ok(None), + }; + let body = + content_read::read_chunk_body(config, chunk_id).unwrap_or_else(|_| chunk.content.clone()); + let preview: String = body.chars().take(PREVIEW_MAX_CHARS).collect(); + let has_embedding = chunk_store::get_chunk_embedding(config, chunk_id)?.is_some(); + Ok(Some(ChunkRow { + id: chunk.id, + source_kind: chunk.metadata.source_kind.as_str().to_string(), + source_id: chunk.metadata.source_id, + source_ref: chunk.metadata.source_ref.map(|r| r.value), + owner: chunk.metadata.owner, + timestamp_ms: chunk.metadata.timestamp.timestamp_millis(), + token_count: chunk.token_count, + lifecycle_status: chunk_store::get_chunk_lifecycle_status(config, chunk_id)? + .unwrap_or_else(|| "unknown".to_string()), + content_path: chunk_store::get_chunk_content_path(config, chunk_id)?, + content_preview: if preview.is_empty() { + None + } else { + Some(preview) + }, + has_embedding, + tags: chunk.metadata.tags, + })) +} diff --git a/src/openhuman/memory/read_rpc/entities.rs b/src/openhuman/memory/read_rpc/entities.rs new file mode 100644 index 0000000000..fa3b5133fc --- /dev/null +++ b/src/openhuman/memory/read_rpc/entities.rs @@ -0,0 +1,276 @@ +use anyhow::{Context, Result}; +use rusqlite::params; + +use crate::openhuman::config::Config; +use crate::openhuman::memory_store::chunks::store::with_connection; +use crate::openhuman::memory_tree::score::store as score_store; +use crate::rpc::RpcOutcome; + +use super::types::{DeleteChunkResponse, EntityRef, ScoreBreakdown, ScoreSignal, MAX_LIST_LIMIT}; + +// ── entity index lookups ──────────────────────────────────────────────── + +pub async fn entity_index_for_rpc( + config: &Config, + chunk_id: String, +) -> Result>, String> { + let cfg = config.clone(); + let id = chunk_id.clone(); + let refs = tokio::task::spawn_blocking(move || -> Result> { + with_connection(&cfg, |conn| { + let mut stmt = conn.prepare( + "SELECT entity_id, entity_kind, surface, COUNT(*) AS n + FROM mem_tree_entity_index + WHERE node_id = ?1 + GROUP BY entity_id, entity_kind, surface + ORDER BY n DESC, entity_id ASC", + )?; + let rows = stmt + .query_map(params![id], |row| { + let entity_id: String = row.get(0)?; + let kind: String = row.get(1)?; + let surface: String = row.get(2)?; + let n: i64 = row.get(3)?; + Ok(EntityRef { + entity_id, + kind, + surface, + count: n.max(0) as u32, + }) + })? + .collect::>>() + .context("collect entity_index_for rows")?; + Ok(rows) + }) + }) + .await + .map_err(|e| format!("entity_index_for join error: {e}"))? + .map_err(|e| format!("entity_index_for: {e:#}"))?; + + let n = refs.len(); + Ok(RpcOutcome::single_log( + refs, + format!("memory_tree::read: entity_index_for chunk_id={chunk_id} n={n}"), + )) +} + +pub async fn chunks_for_entity_rpc( + config: &Config, + entity_id: String, +) -> Result>, String> { + let cfg = config.clone(); + let eid = entity_id.clone(); + let chunk_ids = tokio::task::spawn_blocking(move || -> Result> { + with_connection(&cfg, |conn| { + let mut stmt = conn.prepare( + "SELECT DISTINCT node_id + FROM mem_tree_entity_index + WHERE entity_id = ?1 AND node_kind = 'leaf' + ORDER BY timestamp_ms DESC", + )?; + let rows = stmt + .query_map(params![eid], |row| { + let node_id: String = row.get(0)?; + Ok(node_id) + })? + .collect::>>() + .context("collect chunks_for_entity rows")?; + Ok(rows) + }) + }) + .await + .map_err(|e| format!("chunks_for_entity join error: {e}"))? + .map_err(|e| format!("chunks_for_entity: {e:#}"))?; + + let n = chunk_ids.len(); + Ok(RpcOutcome::single_log( + chunk_ids, + format!("memory_tree::read: chunks_for_entity entity_id={entity_id} n={n}"), + )) +} + +pub async fn top_entities_rpc( + config: &Config, + kind: Option, + limit: u32, +) -> Result>, String> { + let limit = limit.clamp(1, MAX_LIST_LIMIT); + let cfg = config.clone(); + let refs = tokio::task::spawn_blocking(move || -> Result> { + with_connection(&cfg, |conn| { + let mut sql = String::from( + "SELECT entity_id, entity_kind, MAX(surface) AS surface_sample, COUNT(*) AS n + FROM mem_tree_entity_index", + ); + let mut params_owned: Vec> = Vec::new(); + if let Some(k) = kind { + sql.push_str(" WHERE entity_kind = ?"); + params_owned.push(Box::new(k)); + } + sql.push_str( + " GROUP BY entity_id, entity_kind + ORDER BY n DESC, MAX(timestamp_ms) DESC + LIMIT ?", + ); + params_owned.push(Box::new(limit as i64)); + let mut stmt = conn.prepare(&sql)?; + let param_refs: Vec<&dyn rusqlite::ToSql> = params_owned + .iter() + .map(|b| b.as_ref() as &dyn rusqlite::ToSql) + .collect(); + let rows = stmt + .query_map(param_refs.as_slice(), |row| { + let entity_id: String = row.get(0)?; + let kind: String = row.get(1)?; + let surface: String = row.get(2)?; + let n: i64 = row.get(3)?; + Ok(EntityRef { + entity_id, + kind, + surface, + count: n.max(0) as u32, + }) + })? + .collect::>>() + .context("collect top_entities rows")?; + Ok(rows) + }) + }) + .await + .map_err(|e| format!("top_entities join error: {e}"))? + .map_err(|e| format!("top_entities: {e:#}"))?; + + let n = refs.len(); + Ok(RpcOutcome::single_log( + refs, + format!("memory_tree::read: top_entities n={n}"), + )) +} + +// ── chunk_score ───────────────────────────────────────────────────────── + +pub async fn chunk_score_rpc( + config: &Config, + chunk_id: String, +) -> Result>, String> { + let cfg = config.clone(); + let id = chunk_id.clone(); + let result = tokio::task::spawn_blocking(move || -> Result> { + let row = score_store::get_score(&cfg, &id)?; + Ok(row.map(|r| { + let llm_consulted = r.signals.llm_importance > 0.0; + let signals = vec![ + ScoreSignal { + name: "token_count".into(), + weight: 1.0, + value: r.signals.token_count, + }, + ScoreSignal { + name: "unique_words".into(), + weight: 1.0, + value: r.signals.unique_words, + }, + ScoreSignal { + name: "metadata_weight".into(), + weight: 1.5, + value: r.signals.metadata_weight, + }, + ScoreSignal { + name: "source_weight".into(), + weight: 1.5, + value: r.signals.source_weight, + }, + ScoreSignal { + name: "interaction".into(), + weight: 3.0, + value: r.signals.interaction, + }, + ScoreSignal { + name: "entity_density".into(), + weight: 1.0, + value: r.signals.entity_density, + }, + ScoreSignal { + name: "llm_importance".into(), + weight: if llm_consulted { 2.0 } else { 0.0 }, + value: r.signals.llm_importance, + }, + ]; + ScoreBreakdown { + signals, + total: r.total, + threshold: crate::openhuman::memory_tree::score::DEFAULT_DROP_THRESHOLD, + kept: !r.dropped, + llm_consulted, + } + })) + }) + .await + .map_err(|e| format!("chunk_score join error: {e}"))? + .map_err(|e| format!("chunk_score: {e:#}"))?; + Ok(RpcOutcome::single_log( + result, + format!("memory_tree::read: chunk_score id={chunk_id}"), + )) +} + +// ── delete_chunk ──────────────────────────────────────────────────────── + +pub async fn delete_chunk_rpc( + config: &Config, + chunk_id: String, +) -> Result, String> { + let cfg = config.clone(); + let id = chunk_id.clone(); + let resp = tokio::task::spawn_blocking(move || -> Result { + with_connection(&cfg, |conn| { + let tx = conn.unchecked_transaction()?; + let content_path: Option = tx + .query_row( + "SELECT content_path FROM mem_tree_chunks WHERE id = ?1", + params![id], + |r| r.get::<_, Option>(0), + ) + .ok() + .flatten(); + let removed_score = + tx.execute("DELETE FROM mem_tree_score WHERE chunk_id = ?1", params![id])?; + let removed_index = tx.execute( + "DELETE FROM mem_tree_entity_index WHERE node_id = ?1", + params![id], + )?; + let removed_chunk = + tx.execute("DELETE FROM mem_tree_chunks WHERE id = ?1", params![id])?; + tx.commit()?; + if let Some(rel) = content_path { + let mut path = cfg.memory_tree_content_root(); + for component in rel.split('/') { + path.push(component); + } + if let Err(e) = std::fs::remove_file(&path) { + if e.kind() != std::io::ErrorKind::NotFound { + log::warn!( + "[memory_tree::read::delete] failed to remove chunk file path_hash={}: {e}", + crate::openhuman::memory::util::redact::redact(&rel), + ); + } + } + } + Ok(DeleteChunkResponse { + deleted: removed_chunk > 0, + score_rows_removed: removed_score as u32, + entity_index_rows_removed: removed_index as u32, + }) + }) + }) + .await + .map_err(|e| format!("delete_chunk join error: {e}"))? + .map_err(|e| format!("delete_chunk: {e:#}"))?; + Ok(RpcOutcome::single_log( + resp.clone(), + format!( + "memory_tree::read: delete_chunk id={chunk_id} deleted={} score_rows={} entity_rows={}", + resp.deleted, resp.score_rows_removed, resp.entity_index_rows_removed + ), + )) +} diff --git a/src/openhuman/memory/read_rpc/graph.rs b/src/openhuman/memory/read_rpc/graph.rs new file mode 100644 index 0000000000..50beee24d8 --- /dev/null +++ b/src/openhuman/memory/read_rpc/graph.rs @@ -0,0 +1,470 @@ +use anyhow::{Context, Result}; +use rusqlite::params; +use serde::{Deserialize, Serialize}; + +use crate::openhuman::config::Config; +use crate::openhuman::memory_store::chunks::store::with_connection; +use crate::rpc::RpcOutcome; + +// ── wire types ──────────────────────────────────────────────────────────── + +#[derive(Clone, Copy, Debug, Default, Deserialize, Serialize, PartialEq, Eq)] +#[serde(rename_all = "lowercase")] +pub enum GraphMode { + #[default] + Tree, + Contacts, +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct GraphNode { + pub kind: String, + pub id: String, + pub label: String, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub tree_kind: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub tree_scope: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub tree_id: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub level: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub parent_id: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub child_count: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub time_range_start_ms: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub time_range_end_ms: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub file_basename: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub entity_kind: Option, +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct GraphEdge { + pub from: String, + pub to: String, +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct GraphExportResponse { + pub nodes: Vec, + #[serde(default)] + pub edges: Vec, + pub content_root_abs: String, +} + +// ── graph_export ──────────────────────────────────────────────────────── + +pub async fn graph_export_rpc( + config: &Config, + mode: GraphMode, +) -> Result, String> { + let cfg = config.clone(); + let resp = tokio::task::spawn_blocking(move || -> Result { + let content_root = cfg.memory_tree_content_root(); + let resp = match mode { + GraphMode::Tree => collect_tree_graph(&cfg)?, + GraphMode::Contacts => collect_contacts_graph(&cfg)?, + }; + Ok(GraphExportResponse { + nodes: resp.0, + edges: resp.1, + content_root_abs: content_root.to_string_lossy().to_string(), + }) + }) + .await + .map_err(|e| format!("graph_export join error: {e}"))? + .map_err(|e| format!("graph_export: {e:#}"))?; + let log = format!( + "memory_tree::read: graph_export mode={:?} nodes={} edges={} root_hash={}", + mode, + resp.nodes.len(), + resp.edges.len(), + crate::openhuman::memory::util::redact::redact(&resp.content_root_abs), + ); + Ok(RpcOutcome::single_log(resp, log)) +} + +// ── collect_tree_graph ─────────────────────────────────────────────────── + +fn collect_tree_graph(cfg: &Config) -> Result<(Vec, Vec)> { + const MAX_TREE_NODES: usize = 10_000; + + struct SummaryRow { + node: GraphNode, + tree_scope: String, + child_ids: Vec, + } + + let summary_rows = with_connection(cfg, |conn| { + let mut stmt = conn.prepare( + "SELECT s.id, s.tree_id, s.tree_kind, t.scope, s.level, s.parent_id, + s.child_ids_json, s.time_range_start_ms, s.time_range_end_ms + FROM mem_tree_summaries s + JOIN mem_tree_trees t ON t.id = s.tree_id + WHERE s.deleted = 0 + ORDER BY s.tree_id, s.level, s.sealed_at_ms", + )?; + let rows = stmt + .query_map([], |row| { + let id: String = row.get(0)?; + let tree_id: String = row.get(1)?; + let tree_kind: String = row.get(2)?; + let tree_scope: String = row.get(3)?; + let level: i64 = row.get(4)?; + let parent_id: Option = row.get(5)?; + let child_ids_json: String = row.get(6)?; + let time_range_start_ms: i64 = row.get(7)?; + let time_range_end_ms: i64 = row.get(8)?; + let child_ids: Vec = + serde_json::from_str(&child_ids_json).unwrap_or_default(); + let child_count = child_ids.len() as u32; + let file_basename = sanitize_basename(&id); + let label = format!("L{} · {}", level.max(0), tree_scope); + Ok(SummaryRow { + node: GraphNode { + kind: "summary".into(), + id, + label, + tree_kind: Some(tree_kind), + tree_scope: Some(tree_scope.clone()), + tree_id: Some(tree_id), + level: Some(level.max(0) as u32), + parent_id, + child_count: Some(child_count), + time_range_start_ms: Some(time_range_start_ms), + time_range_end_ms: Some(time_range_end_ms), + file_basename: Some(file_basename), + entity_kind: None, + }, + tree_scope, + child_ids, + }) + })? + .collect::>>() + .context("collect tree-mode summary rows")?; + Ok(rows) + })?; + + let mut scopes: std::collections::BTreeSet = std::collections::BTreeSet::new(); + for sr in &summary_rows { + scopes.insert(sr.tree_scope.clone()); + } + + let mut nodes: Vec = Vec::new(); + let mut source_root_ids: std::collections::HashMap = + std::collections::HashMap::new(); + + for scope in &scopes { + let root_id = format!("source:{scope}"); + let label = scope_display_label(scope); + source_root_ids.insert(scope.clone(), root_id.clone()); + nodes.push(GraphNode { + kind: "source".into(), + id: root_id, + label, + tree_kind: None, + tree_scope: Some(scope.clone()), + tree_id: None, + level: None, + parent_id: None, + child_count: None, + time_range_start_ms: None, + time_range_end_ms: None, + file_basename: None, + entity_kind: None, + }); + } + + let mut summary_ids: std::collections::HashSet = std::collections::HashSet::new(); + for sr in &summary_rows { + summary_ids.insert(sr.node.id.clone()); + } + + for sr in &summary_rows { + let mut node = sr.node.clone(); + let has_valid_parent = node + .parent_id + .as_ref() + .map(|pid| summary_ids.contains(pid)) + .unwrap_or(false); + if !has_valid_parent { + node.parent_id = source_root_ids.get(&sr.tree_scope).cloned(); + } + nodes.push(node); + } + + let doc_budget = MAX_TREE_NODES.saturating_sub(nodes.len()); + let mut doc_count = 0usize; + for sr in &summary_rows { + if doc_count >= doc_budget { + break; + } + if sr.node.level != Some(1) { + continue; + } + if sr + .child_ids + .first() + .map(|c| c.starts_with("summary:")) + .unwrap_or(false) + { + continue; + } + for child_id in &sr.child_ids { + if doc_count >= doc_budget { + break; + } + let label = document_label(child_id); + nodes.push(GraphNode { + kind: "chunk".into(), + id: format!("doc:{}:{}", sr.tree_scope, child_id), + label, + tree_kind: None, + tree_scope: Some(sr.tree_scope.clone()), + tree_id: None, + level: None, + parent_id: Some(sr.node.id.clone()), + child_count: None, + time_range_start_ms: sr.node.time_range_start_ms, + time_range_end_ms: sr.node.time_range_end_ms, + file_basename: None, + entity_kind: None, + }); + doc_count += 1; + } + } + + let chunk_budget = MAX_TREE_NODES.saturating_sub(nodes.len()); + if chunk_budget > 0 { + let chunk_nodes = with_connection(cfg, |conn| { + let mut stmt = conn.prepare( + "SELECT c.id, c.parent_summary_id, c.content, + c.time_range_start_ms, c.time_range_end_ms, c.source_id + FROM mem_tree_chunks c + ORDER BY c.timestamp_ms DESC + LIMIT ?1", + )?; + let rows = stmt + .query_map(params![chunk_budget as i64], |row| { + let id: String = row.get(0)?; + let parent_id: Option = row.get(1)?; + let content: String = row.get(2)?; + let time_range_start_ms: i64 = row.get(3)?; + let time_range_end_ms: i64 = row.get(4)?; + let source_id: String = row.get(5)?; + let label = content + .lines() + .next() + .unwrap_or("") + .chars() + .take(72) + .collect::(); + Ok(( + GraphNode { + kind: "chunk".into(), + id, + label, + tree_kind: None, + tree_scope: None, + tree_id: None, + level: None, + parent_id: parent_id.filter(|s| !s.is_empty()), + child_count: None, + time_range_start_ms: Some(time_range_start_ms), + time_range_end_ms: Some(time_range_end_ms), + file_basename: None, + entity_kind: None, + }, + source_id, + )) + })? + .collect::>>() + .context("collect tree-mode leaf chunk rows")?; + Ok(rows) + })?; + + for (chunk, _source_id) in chunk_nodes { + nodes.push(chunk); + } + } + + Ok((nodes, Vec::new())) +} + +fn scope_display_label(scope: &str) -> String { + if scope.starts_with("github:") { + let repo = scope.strip_prefix("github:").unwrap_or(scope); + format!("GitHub · {repo}") + } else if scope.starts_with("gmail:") { + let account = scope + .strip_prefix("gmail:") + .unwrap_or(scope) + .replace("-at-", "@") + .replace("-dot-", "."); + format!("Gmail · {account}") + } else if scope.starts_with("slack:") { + let channel = scope.strip_prefix("slack:").unwrap_or(scope); + format!("Slack · {channel}") + } else { + scope.to_string() + } +} + +fn document_label(child_id: &str) -> String { + if let Some(sha) = child_id.strip_prefix("commit:") { + format!("commit {}", &sha[..sha.len().min(8)]) + } else if let Some(n) = child_id.strip_prefix("issue:") { + format!("issue #{n}") + } else if let Some(n) = child_id.strip_prefix("pr:") { + format!("PR #{n}") + } else { + child_id.chars().take(40).collect() + } +} + +#[allow(dead_code)] +pub(super) fn source_id_to_scope(source_id: &str) -> String { + let parts: Vec<&str> = source_id.splitn(3, ':').collect(); + if parts.len() >= 2 { + format!("{}:{}", parts[0], parts[1]) + } else { + source_id.to_string() + } +} + +// ── collect_contacts_graph ─────────────────────────────────────────────── + +fn collect_contacts_graph(cfg: &Config) -> Result<(Vec, Vec)> { + const MAX_CHUNK_NODES: usize = 1500; + const MAX_EDGES: usize = 4000; + + with_connection(cfg, |conn| { + let mut chunk_stmt = conn.prepare( + "SELECT c.id, c.timestamp_ms, c.content + FROM mem_tree_chunks c + WHERE c.id IN ( + SELECT DISTINCT node_id + FROM mem_tree_entity_index + WHERE entity_kind = 'person' + ) + ORDER BY c.timestamp_ms DESC + LIMIT ?1", + )?; + let chunks: Vec<(String, i64, String)> = chunk_stmt + .query_map(params![MAX_CHUNK_NODES as i64], |row| { + Ok(( + row.get::<_, String>(0)?, + row.get::<_, i64>(1)?, + row.get::<_, String>(2)?, + )) + })? + .collect::>() + .context("collect contacts-mode chunk rows")?; + + let chunk_ids: Vec = chunks.iter().map(|(id, _, _)| id.clone()).collect(); + + let edges: Vec<(String, String, String)> = if chunk_ids.is_empty() { + Vec::new() + } else { + let placeholders = std::iter::repeat("?") + .take(chunk_ids.len()) + .collect::>() + .join(","); + let sql = format!( + "SELECT entity_id, node_id, surface + FROM mem_tree_entity_index + WHERE entity_kind = 'person' + AND node_kind = 'leaf' + AND node_id IN ({placeholders}) + ORDER BY timestamp_ms DESC + LIMIT ?" + ); + let mut bind: Vec = chunk_ids + .iter() + .map(|s| rusqlite::types::Value::Text(s.clone())) + .collect(); + bind.push(rusqlite::types::Value::Integer(MAX_EDGES as i64)); + let mut mention_stmt = conn.prepare(&sql)?; + let rows = mention_stmt + .query_map(rusqlite::params_from_iter(bind), |row| { + Ok(( + row.get::<_, String>(0)?, + row.get::<_, String>(1)?, + row.get::<_, String>(2)?, + )) + })? + .collect::>>() + .context("collect contacts-mode mentions")?; + rows + }; + + let mut edges_out: Vec = Vec::with_capacity(edges.len()); + let mut contacts: std::collections::HashMap = + std::collections::HashMap::new(); + for (entity_id, node_id, surface) in edges { + contacts.entry(entity_id.clone()).or_insert(surface); + edges_out.push(GraphEdge { + from: node_id, + to: entity_id, + }); + } + + let mut nodes: Vec = Vec::with_capacity(chunks.len() + contacts.len()); + for (id, ts, preview) in chunks { + let label = preview + .lines() + .next() + .unwrap_or("") + .chars() + .take(72) + .collect::(); + nodes.push(GraphNode { + kind: "chunk".into(), + id, + label, + tree_kind: None, + tree_scope: None, + tree_id: None, + level: None, + parent_id: None, + child_count: None, + time_range_start_ms: Some(ts), + time_range_end_ms: Some(ts), + file_basename: None, + entity_kind: None, + }); + } + for (entity_id, surface) in contacts { + nodes.push(GraphNode { + kind: "contact".into(), + id: entity_id, + label: surface, + tree_kind: None, + tree_scope: None, + tree_id: None, + level: None, + parent_id: None, + child_count: None, + time_range_start_ms: None, + time_range_end_ms: None, + file_basename: None, + entity_kind: Some("person".into()), + }); + } + Ok((nodes, edges_out)) + }) +} + +pub fn sanitize_basename(id: &str) -> String { + id.chars() + .map(|c| match c { + '\\' | '/' | ':' | '*' | '?' | '"' | '<' | '>' | '|' => '-', + other => other, + }) + .collect() +} diff --git a/src/openhuman/memory/read_rpc/mod.rs b/src/openhuman/memory/read_rpc/mod.rs new file mode 100644 index 0000000000..4655a2c32e --- /dev/null +++ b/src/openhuman/memory/read_rpc/mod.rs @@ -0,0 +1,58 @@ +//! Read RPCs that back the new Memory tab UI. +//! +//! Distinct from [`super::rpc`] (write/ingest) and [`super::retrieval::rpc`] +//! (LLM-callable retrieval primitives), this module exposes a small set of +//! "list / inspect / search / recall / score-for / delete" methods designed +//! for a human-facing dashboard — not for an LLM tool loop. +//! +//! All methods are scoped under the existing `memory_tree` JSON-RPC +//! namespace so they share authentication, telemetry, and discovery with +//! the other memory-tree RPCs. + +pub mod admin; +pub mod chunks; +pub mod entities; +pub mod graph; +pub mod types; +pub mod vault; + +// Re-export everything so consumers and the test file keep working with `use super::*;` +pub use admin::{flush_now_rpc, flush_source_tree_rpc, reset_tree_rpc, wipe_all_rpc}; +pub use chunks::{ + display_name_for_source, list_chunks_rpc, list_sources_rpc, read_chunk_row, recall_rpc, + search_rpc, +}; +pub use entities::{ + chunk_score_rpc, chunks_for_entity_rpc, delete_chunk_rpc, entity_index_for_rpc, + top_entities_rpc, +}; +pub use graph::{ + graph_export_rpc, sanitize_basename, GraphEdge, GraphExportResponse, GraphMode, GraphNode, +}; +pub use types::{ + ChunkFilter, ChunkRow, DeleteChunkResponse, EntityRef, FlushNowResponse, + FlushSourceTreeResponse, ListChunksResponse, ObsidianVaultStatusResponse, RecallResponse, + ResetTreeResponse, ScoreBreakdown, ScoreSignal, Source, VaultHealthCheckResponse, + WipeAllResponse, +}; +pub use vault::{obsidian_vault_status_rpc, vault_health_check_rpc}; + +#[allow(dead_code)] +pub(crate) fn parse_source_kind_str( + s: &str, +) -> Option { + crate::openhuman::memory_store::chunks::types::SourceKind::parse(s).ok() +} + +#[cfg(test)] +pub(crate) use crate::openhuman::config::Config; +#[cfg(test)] +pub(crate) use crate::openhuman::memory_store::chunks::store::with_connection; +#[cfg(test)] +pub(crate) use crate::openhuman::memory_store::chunks::types::SourceKind; +#[cfg(test)] +pub(crate) use admin::clear_composio_sync_state; + +#[cfg(test)] +#[path = "../read_rpc_tests.rs"] +mod tests; diff --git a/src/openhuman/memory/read_rpc/types.rs b/src/openhuman/memory/read_rpc/types.rs new file mode 100644 index 0000000000..31aae285e3 --- /dev/null +++ b/src/openhuman/memory/read_rpc/types.rs @@ -0,0 +1,163 @@ +use serde::{Deserialize, Serialize}; + +pub const PREVIEW_MAX_CHARS: usize = 500; +pub const DEFAULT_LIST_LIMIT: u32 = 50; +pub const MAX_LIST_LIMIT: u32 = 1_000; + +/// Wire-shape chunk returned by the read RPCs. +/// +/// Distinct from [`crate::openhuman::memory_store::chunks::types::Chunk`] in two +/// ways: serialised timestamps are ms-since-epoch (matches the rest of the +/// JSON-RPC surface) and the body is replaced with a `≤500-char preview` +/// + a flag indicating whether the row has an embedding. UIs needing the +/// full body call back via `memory_tree_get_chunk`. +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct ChunkRow { + pub id: String, + pub source_kind: String, + pub source_id: String, + #[serde(default)] + pub source_ref: Option, + pub owner: String, + pub timestamp_ms: i64, + pub token_count: u32, + pub lifecycle_status: String, + #[serde(default)] + pub content_path: Option, + #[serde(default)] + pub content_preview: Option, + pub has_embedding: bool, + #[serde(default)] + pub tags: Vec, +} + +/// Filter shape for [`list_chunks`]. All fields are optional. +#[derive(Clone, Debug, Default, Serialize, Deserialize)] +pub struct ChunkFilter { + #[serde(default)] + pub source_kinds: Option>, + #[serde(default)] + pub source_ids: Option>, + #[serde(default)] + pub entity_ids: Option>, + #[serde(default)] + pub since_ms: Option, + #[serde(default)] + pub until_ms: Option, + #[serde(default)] + pub query: Option, + #[serde(default)] + pub limit: Option, + #[serde(default)] + pub offset: Option, +} + +/// Response shape for [`list_chunks`]. +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct ListChunksResponse { + pub chunks: Vec, + pub total: u64, +} + +/// Distinct ingest source plus chunk counts. Returned by [`list_sources`]. +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct Source { + pub source_id: String, + /// Computed display name (un-slug + strip user email when known). + pub display_name: String, + pub source_kind: String, + pub chunk_count: u32, + pub most_recent_ms: i64, +} + +/// Lightweight reference to a canonical entity. +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct EntityRef { + /// Canonical id (e.g. `email:alice@example.com`, `topic:phoenix`). + pub entity_id: String, + pub kind: String, + pub surface: String, + pub count: u32, +} + +/// Per-signal weight + raw value pair. +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct ScoreSignal { + pub name: String, + pub weight: f32, + pub value: f32, +} + +/// Score rationale returned by [`chunk_score`]. +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct ScoreBreakdown { + pub signals: Vec, + pub total: f32, + pub threshold: f32, + pub kept: bool, + pub llm_consulted: bool, +} + +/// Response shape for [`recall_rpc`]. +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct RecallResponse { + pub chunks: Vec, + pub scores: Vec, +} + +/// Response shape for [`delete_chunk_rpc`]. +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct DeleteChunkResponse { + pub deleted: bool, + pub score_rows_removed: u32, + pub entity_index_rows_removed: u32, +} + +/// Response shape for [`wipe_all_rpc`]. +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct WipeAllResponse { + pub rows_deleted: u64, + pub dirs_removed: Vec, + pub sync_state_cleared: u64, +} + +/// Response shape for [`reset_tree_rpc`]. +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct ResetTreeResponse { + pub tree_rows_deleted: u64, + pub chunks_requeued: u64, + pub jobs_enqueued: u64, +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct FlushSourceTreeResponse { + pub tree_scope: String, + pub seals_fired: u32, +} + +/// Response shape for [`flush_now_rpc`]. +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct FlushNowResponse { + pub enqueued: bool, + pub stale_buffers: u32, +} + +/// Response shape for [`obsidian_vault_status_rpc`]. +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct ObsidianVaultStatusResponse { + pub registered: bool, + pub config_found: bool, + pub content_root_abs: String, +} + +/// Response shape for [`vault_health_check_rpc`]. +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct VaultHealthCheckResponse { + pub content_root_abs: String, + pub exists: bool, + pub readable: bool, + pub writable: bool, + pub obsidian_registered: bool, + pub pipeline_healthy: bool, + pub last_sync_ms: i64, +} diff --git a/src/openhuman/memory/read_rpc/vault.rs b/src/openhuman/memory/read_rpc/vault.rs new file mode 100644 index 0000000000..0934a32c7d --- /dev/null +++ b/src/openhuman/memory/read_rpc/vault.rs @@ -0,0 +1,126 @@ +use anyhow::Result; + +use crate::openhuman::config::Config; +use crate::openhuman::memory_store::content::obsidian_registry; +use crate::rpc::RpcOutcome; + +use super::types::{ObsidianVaultStatusResponse, VaultHealthCheckResponse}; + +pub async fn obsidian_vault_status_rpc( + config: &Config, + obsidian_config_dir: Option, +) -> Result, String> { + let cfg = config.clone(); + let resp = tokio::task::spawn_blocking(move || -> ObsidianVaultStatusResponse { + let content_root = cfg.memory_tree_content_root(); + let extra = obsidian_config_dir + .as_deref() + .map(str::trim) + .filter(|s| !s.is_empty()) + .map(std::path::Path::new); + let reg = obsidian_registry::vault_registration_status(&content_root, extra); + ObsidianVaultStatusResponse { + registered: reg.registered, + config_found: reg.config_found, + content_root_abs: content_root.to_string_lossy().to_string(), + } + }) + .await + .map_err(|e| format!("obsidian_vault_status join error: {e}"))?; + + let log = format!( + "memory_tree::read: obsidian_vault_status registered={} config_found={} root_hash={}", + resp.registered, + resp.config_found, + crate::openhuman::memory::util::redact::redact(&resp.content_root_abs), + ); + Ok(RpcOutcome::single_log(resp, log)) +} + +pub async fn vault_health_check_rpc( + config: &Config, + obsidian_config_dir: Option, +) -> Result, String> { + let cfg = config.clone(); + let fs_probe = tokio::task::spawn_blocking(move || { + let content_root = cfg.memory_tree_content_root(); + let content_root_abs = content_root.to_string_lossy().to_string(); + let exists = content_root.is_dir(); + let readable = exists && std::fs::read_dir(&content_root).is_ok(); + let writable = exists && probe_directory_writable(&content_root); + + let extra = obsidian_config_dir + .as_deref() + .map(str::trim) + .filter(|s| !s.is_empty()) + .map(std::path::Path::new); + let obsidian_registered = + obsidian_registry::vault_registration_status(&content_root, extra).registered; + + ( + content_root_abs, + exists, + readable, + writable, + obsidian_registered, + ) + }) + .await + .map_err(|e| format!("vault_health_check fs probe join error: {e}"))?; + + let pipeline = crate::openhuman::memory_tree::tree::rpc::pipeline_status_rpc(config) + .await + .map_err(|e| format!("vault_health_check pipeline_status: {e}"))?; + + let (content_root_abs, exists, readable, writable, obsidian_registered) = fs_probe; + let pipeline_healthy = pipeline.value.status != "error" && !pipeline.value.is_paused; + let last_sync_ms = pipeline.value.last_sync_ms.max(0); + + let resp = VaultHealthCheckResponse { + content_root_abs, + exists, + readable, + writable, + obsidian_registered, + pipeline_healthy, + last_sync_ms, + }; + + let log = format!( + "memory_tree::read: vault_health_check exists={} readable={} writable={} obsidian_registered={} pipeline_healthy={} last_sync_ms={} root_hash={}", + resp.exists, + resp.readable, + resp.writable, + resp.obsidian_registered, + resp.pipeline_healthy, + resp.last_sync_ms, + crate::openhuman::memory::util::redact::redact(&resp.content_root_abs), + ); + Ok(RpcOutcome::single_log(resp, log)) +} + +fn probe_directory_writable(dir: &std::path::Path) -> bool { + use std::io::Write; + let ts = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.as_nanos()) + .unwrap_or(0); + let probe = dir.join(format!( + ".openhuman-vault-writecheck-{}-{ts}.tmp", + std::process::id() + )); + match std::fs::OpenOptions::new() + .create_new(true) + .write(true) + .open(&probe) + { + Ok(mut file) => { + let write_ok = file.write_all(b"ok").is_ok(); + if let Err(e) = std::fs::remove_file(&probe) { + log::debug!("[memory] vault write-probe cleanup failed: {e}"); + } + write_ok + } + Err(_) => false, + } +} diff --git a/src/openhuman/memory/schema.rs b/src/openhuman/memory/schema/definitions.rs similarity index 70% rename from src/openhuman/memory/schema.rs rename to src/openhuman/memory/schema/definitions.rs index 710dc83bef..27b2df808b 100644 --- a/src/openhuman/memory/schema.rs +++ b/src/openhuman/memory/schema/definitions.rs @@ -1,160 +1,12 @@ -//! Controller schemas for the memory tree. +//! Schema definitions for every `memory_tree` JSON-RPC method. //! -//! Registered JSON-RPC methods include the original Phase 1 surface -//! (`ingest`, `list_chunks`, `get_chunk`) plus the new -//! Memory-tab read RPCs added by the cloud-default backend refactor: -//! `list_sources`, `search`, `recall`, `entity_index_for`, -//! `top_entities`, `chunk_score`, `delete_chunk`, and destructive -//! maintenance helpers for local iteration. -//! -//! Handlers delegate to [`super::rpc`] (write side) or -//! [`super::read_rpc`] (UI read side). - -use serde::de::DeserializeOwned; -use serde_json::{Map, Value}; +//! The [`schemas`] function is the single source of truth for each +//! controller's input/output field descriptions. Handlers delegate to +//! [`super::handlers`]; the registry lists are in [`super::registry`]. -use crate::core::all::{ControllerFuture, RegisteredController}; use crate::core::{ControllerSchema, FieldSchema, TypeSchema}; -use crate::openhuman::config::rpc as config_rpc; -use crate::openhuman::memory::read_rpc; -use crate::openhuman::memory_tree::tree::rpc; -use crate::rpc::RpcOutcome; - -const NAMESPACE: &str = "memory_tree"; -/// All `memory_tree` controller schemas, used by the registry to advertise -/// inputs/outputs to CLI + JSON-RPC consumers. -pub fn all_controller_schemas() -> Vec { - vec![ - schemas("ingest"), - schemas("list_chunks"), - schemas("get_chunk"), - schemas("memory_backfill_status"), - schemas("list_sources"), - schemas("search"), - schemas("recall"), - schemas("entity_index_for"), - schemas("chunks_for_entity"), - schemas("top_entities"), - schemas("chunk_score"), - schemas("delete_chunk"), - schemas("graph_export"), - schemas("obsidian_vault_status"), - schemas("vault_health_check"), - schemas("flush_now"), - schemas("flush_source"), - schemas("wipe_all"), - schemas("reset_tree"), - schemas("pipeline_status"), - schemas("set_enabled"), - schemas("smart_walk"), - schemas("doctor"), - schemas("retry_failed"), - ] -} - -/// Registered `memory_tree` controllers (schema + handler pairs) wired into -/// `core::all`. -pub fn all_registered_controllers() -> Vec { - vec![ - RegisteredController { - schema: schemas("ingest"), - handler: handle_ingest, - }, - RegisteredController { - schema: schemas("list_chunks"), - handler: handle_list_chunks, - }, - RegisteredController { - schema: schemas("get_chunk"), - handler: handle_get_chunk, - }, - RegisteredController { - schema: schemas("memory_backfill_status"), - handler: handle_memory_backfill_status, - }, - RegisteredController { - schema: schemas("list_sources"), - handler: handle_list_sources, - }, - RegisteredController { - schema: schemas("search"), - handler: handle_search, - }, - RegisteredController { - schema: schemas("recall"), - handler: handle_recall, - }, - RegisteredController { - schema: schemas("entity_index_for"), - handler: handle_entity_index_for, - }, - RegisteredController { - schema: schemas("chunks_for_entity"), - handler: handle_chunks_for_entity, - }, - RegisteredController { - schema: schemas("top_entities"), - handler: handle_top_entities, - }, - RegisteredController { - schema: schemas("chunk_score"), - handler: handle_chunk_score, - }, - RegisteredController { - schema: schemas("delete_chunk"), - handler: handle_delete_chunk, - }, - RegisteredController { - schema: schemas("graph_export"), - handler: handle_graph_export, - }, - RegisteredController { - schema: schemas("obsidian_vault_status"), - handler: handle_obsidian_vault_status, - }, - RegisteredController { - schema: schemas("vault_health_check"), - handler: handle_vault_health_check, - }, - RegisteredController { - schema: schemas("flush_now"), - handler: handle_flush_now, - }, - RegisteredController { - schema: schemas("flush_source"), - handler: handle_flush_source, - }, - RegisteredController { - schema: schemas("wipe_all"), - handler: handle_wipe_all, - }, - RegisteredController { - schema: schemas("reset_tree"), - handler: handle_reset_tree, - }, - RegisteredController { - schema: schemas("pipeline_status"), - handler: handle_pipeline_status, - }, - RegisteredController { - schema: schemas("set_enabled"), - handler: handle_set_enabled, - }, - RegisteredController { - schema: schemas("smart_walk"), - handler: handle_smart_walk, - }, - RegisteredController { - schema: schemas("doctor"), - handler: handle_doctor, - }, - RegisteredController { - schema: schemas("retry_failed"), - handler: handle_retry_failed, - }, - ] -} +pub(crate) const NAMESPACE: &str = "memory_tree"; /// Lookup the [`ControllerSchema`] for a single `memory_tree` function name. pub fn schemas(function: &str) -> ControllerSchema { @@ -1096,363 +948,3 @@ pub fn schemas(function: &str) -> ControllerSchema { }, } } - -fn handle_ingest(params: Map) -> ControllerFuture { - Box::pin(async move { - let config = config_rpc::load_config_with_timeout().await?; - let req = parse_value::(Value::Object(params))?; - to_json(rpc::ingest_rpc(&config, req).await?) - }) -} - -fn handle_get_chunk(params: Map) -> ControllerFuture { - Box::pin(async move { - let config = config_rpc::load_config_with_timeout().await?; - let req = parse_value::(Value::Object(params))?; - to_json(rpc::get_chunk_rpc(&config, req).await?) - }) -} - -fn handle_memory_backfill_status(_params: Map) -> ControllerFuture { - Box::pin(async move { - let config = config_rpc::load_config_with_timeout().await?; - to_json(rpc::backfill_status_rpc(&config).await?) - }) -} - -// ── New read RPCs (Memory-tab UI) ──────────────────────────────────────── - -fn handle_list_chunks(params: Map) -> ControllerFuture { - Box::pin(async move { - let config = config_rpc::load_config_with_timeout().await?; - let filter = parse_value::(Value::Object(params))?; - to_json(read_rpc::list_chunks_rpc(&config, filter).await?) - }) -} - -fn handle_list_sources(params: Map) -> ControllerFuture { - Box::pin(async move { - #[derive(serde::Deserialize, Default)] - struct Req { - #[serde(default)] - user_email_hint: Option, - } - let config = config_rpc::load_config_with_timeout().await?; - let req = parse_value::(Value::Object(params)).unwrap_or_default(); - to_json(read_rpc::list_sources_rpc(&config, req.user_email_hint).await?) - }) -} - -fn handle_search(params: Map) -> ControllerFuture { - Box::pin(async move { - #[derive(serde::Deserialize)] - struct Req { - query: String, - k: u32, - } - let config = config_rpc::load_config_with_timeout().await?; - let req = parse_value::(Value::Object(params))?; - to_json(read_rpc::search_rpc(&config, req.query, req.k).await?) - }) -} - -fn handle_recall(params: Map) -> ControllerFuture { - Box::pin(async move { - #[derive(serde::Deserialize)] - struct Req { - query: String, - k: u32, - } - let config = config_rpc::load_config_with_timeout().await?; - let req = parse_value::(Value::Object(params))?; - to_json(read_rpc::recall_rpc(&config, req.query, req.k).await?) - }) -} - -fn handle_entity_index_for(params: Map) -> ControllerFuture { - Box::pin(async move { - #[derive(serde::Deserialize)] - struct Req { - chunk_id: String, - } - let config = config_rpc::load_config_with_timeout().await?; - let req = parse_value::(Value::Object(params))?; - to_json(read_rpc::entity_index_for_rpc(&config, req.chunk_id).await?) - }) -} - -fn handle_chunks_for_entity(params: Map) -> ControllerFuture { - Box::pin(async move { - #[derive(serde::Deserialize)] - struct Req { - entity_id: String, - } - let config = config_rpc::load_config_with_timeout().await?; - let req = parse_value::(Value::Object(params))?; - to_json(read_rpc::chunks_for_entity_rpc(&config, req.entity_id).await?) - }) -} - -fn handle_top_entities(params: Map) -> ControllerFuture { - Box::pin(async move { - #[derive(serde::Deserialize)] - struct Req { - #[serde(default)] - kind: Option, - limit: u32, - } - let config = config_rpc::load_config_with_timeout().await?; - let req = parse_value::(Value::Object(params))?; - to_json(read_rpc::top_entities_rpc(&config, req.kind, req.limit).await?) - }) -} - -fn handle_chunk_score(params: Map) -> ControllerFuture { - Box::pin(async move { - #[derive(serde::Deserialize)] - struct Req { - chunk_id: String, - } - let config = config_rpc::load_config_with_timeout().await?; - let req = parse_value::(Value::Object(params))?; - to_json(read_rpc::chunk_score_rpc(&config, req.chunk_id).await?) - }) -} - -fn handle_delete_chunk(params: Map) -> ControllerFuture { - Box::pin(async move { - #[derive(serde::Deserialize)] - struct Req { - chunk_id: String, - } - let config = config_rpc::load_config_with_timeout().await?; - let req = parse_value::(Value::Object(params))?; - to_json(read_rpc::delete_chunk_rpc(&config, req.chunk_id).await?) - }) -} - -fn handle_graph_export(params: Map) -> ControllerFuture { - Box::pin(async move { - #[derive(serde::Deserialize, Default)] - struct Req { - #[serde(default)] - mode: Option, - } - let config = config_rpc::load_config_with_timeout().await?; - let req = parse_value::(Value::Object(params)).unwrap_or_default(); - to_json(read_rpc::graph_export_rpc(&config, req.mode.unwrap_or_default()).await?) - }) -} - -fn handle_obsidian_vault_status(params: Map) -> ControllerFuture { - Box::pin(async move { - #[derive(serde::Deserialize, Default)] - struct Req { - #[serde(default)] - obsidian_config_dir: Option, - } - let config = config_rpc::load_config_with_timeout().await?; - let req = parse_value::(Value::Object(params)).unwrap_or_default(); - to_json(read_rpc::obsidian_vault_status_rpc(&config, req.obsidian_config_dir).await?) - }) -} - -fn handle_vault_health_check(params: Map) -> ControllerFuture { - Box::pin(async move { - #[derive(serde::Deserialize, Default)] - struct Req { - #[serde(default)] - obsidian_config_dir: Option, - } - let config = config_rpc::load_config_with_timeout().await?; - let req = parse_value::(Value::Object(params)).unwrap_or_default(); - to_json(read_rpc::vault_health_check_rpc(&config, req.obsidian_config_dir).await?) - }) -} - -fn handle_flush_source(params: Map) -> ControllerFuture { - Box::pin(async move { - #[derive(serde::Deserialize)] - struct Req { - source_scope: String, - } - let config = config_rpc::load_config_with_timeout().await?; - let req = parse_value::(Value::Object(params))?; - to_json(read_rpc::flush_source_tree_rpc(&config, &req.source_scope).await?) - }) -} - -fn handle_flush_now(_params: Map) -> ControllerFuture { - Box::pin(async move { - let config = config_rpc::load_config_with_timeout().await?; - to_json(read_rpc::flush_now_rpc(&config).await?) - }) -} - -fn handle_wipe_all(_params: Map) -> ControllerFuture { - Box::pin(async move { - let config = config_rpc::load_config_with_timeout().await?; - to_json(read_rpc::wipe_all_rpc(&config).await?) - }) -} - -fn handle_reset_tree(_params: Map) -> ControllerFuture { - Box::pin(async move { - let config = config_rpc::load_config_with_timeout().await?; - to_json(read_rpc::reset_tree_rpc(&config).await?) - }) -} - -fn handle_pipeline_status(_params: Map) -> ControllerFuture { - Box::pin(async move { - let config = config_rpc::load_config_with_timeout().await?; - to_json(rpc::pipeline_status_rpc(&config).await?) - }) -} - -fn handle_set_enabled(params: Map) -> ControllerFuture { - Box::pin(async move { - let req = parse_value::(Value::Object(params))?; - let mut config = config_rpc::load_config_with_timeout().await?; - to_json(rpc::set_enabled_rpc(&mut config, req).await?) - }) -} - -fn handle_smart_walk(params: Map) -> ControllerFuture { - Box::pin(async move { - use crate::openhuman::memory::chat::build_chat_provider; - use crate::openhuman::memory::query::smart_walk::{ - run_smart_walk, SmartWalkOptions, SmartWalkStopReason, - }; - - #[derive(serde::Deserialize)] - struct Req { - query: String, - #[serde(default = "default_namespace")] - namespace: String, - #[serde(default)] - max_turns: Option, - #[serde(default)] - model: Option, - } - fn default_namespace() -> String { - "default".into() - } - - let req = parse_value::(Value::Object(params))?; - let config = config_rpc::load_config_with_timeout().await?; - - let chat_provider = build_chat_provider(&config) - .map_err(|e| format!("smart_walk: build chat provider failed: {e}"))?; - - struct Adapter { - inner: std::sync::Arc, - } - - #[async_trait::async_trait] - impl crate::openhuman::inference::provider::traits::Provider for Adapter { - async fn chat_with_system( - &self, - system: Option<&str>, - message: &str, - _model: &str, - temperature: f64, - ) -> anyhow::Result { - let prompt = crate::openhuman::memory::chat::ChatPrompt { - system: system.unwrap_or("").to_string(), - user: message.to_string(), - temperature, - kind: "memory_smart_walk_rpc", - }; - self.inner.chat_for_text(&prompt).await - } - - async fn chat_with_history( - &self, - messages: &[crate::openhuman::inference::provider::traits::ChatMessage], - model: &str, - temperature: f64, - ) -> anyhow::Result { - let system = messages - .iter() - .find(|m| m.role == "system") - .map(|m| m.content.as_str()); - let user: String = messages - .iter() - .filter(|m| m.role != "system") - .map(|m| m.content.as_str()) - .collect::>() - .join("\n"); - self.chat_with_system(system, &user, model, temperature) - .await - } - } - - let adapter = Adapter { - inner: chat_provider, - }; - - let opts = SmartWalkOptions { - max_turns: req.max_turns.map(|n| n as usize).unwrap_or(12), - namespace: req.namespace, - model: req.model, - content_root: None, - }; - - let outcome = run_smart_walk(&config, &adapter, &req.query, opts) - .await - .map_err(|e| format!("smart_walk error: {e}"))?; - - let stopped = match outcome.stopped_reason { - SmartWalkStopReason::Answered => "answered", - SmartWalkStopReason::MaxTurnsReached => "max_turns", - SmartWalkStopReason::LlmGaveUp => "llm_gave_up", - SmartWalkStopReason::Error(_) => "error", - }; - - let result = serde_json::json!({ - "answer": outcome.answer, - "turns_used": outcome.turns_used, - "evidence_count": outcome.evidence.len(), - "stopped_reason": stopped, - "evidence": outcome.evidence.iter().map(|e| serde_json::json!({ - "source_path": e.source_path, - "snippet": e.snippet, - "relevance": e.relevance, - })).collect::>(), - "trace": outcome.trace.iter().map(|s| serde_json::json!({ - "turn": s.turn, - "action": s.action, - "args_summary": s.args_summary, - "result_preview": s.result_preview, - })).collect::>(), - }); - to_json(RpcOutcome::new(result, vec![])) - }) -} - -fn handle_doctor(_params: Map) -> ControllerFuture { - Box::pin(async move { - let config = config_rpc::load_config_with_timeout().await?; - to_json(rpc::doctor_rpc(&config).await?) - }) -} - -fn handle_retry_failed(_params: Map) -> ControllerFuture { - Box::pin(async move { - let config = config_rpc::load_config_with_timeout().await?; - to_json(rpc::retry_failed_rpc(&config).await?) - }) -} - -fn parse_value(v: Value) -> Result { - serde_json::from_value(v).map_err(|e| format!("invalid params: {e}")) -} - -fn to_json(outcome: RpcOutcome) -> Result { - outcome.into_cli_compatible_json() -} - -#[cfg(test)] -#[path = "schema_tests.rs"] -mod tests; diff --git a/src/openhuman/memory/schema/handlers.rs b/src/openhuman/memory/schema/handlers.rs new file mode 100644 index 0000000000..bc7d8f96b0 --- /dev/null +++ b/src/openhuman/memory/schema/handlers.rs @@ -0,0 +1,376 @@ +//! Handler functions for every `memory_tree` JSON-RPC method. +//! +//! Each `handle_*` function is a thin bridge from raw JSON params to the +//! typed RPC calls in [`crate::openhuman::memory_tree::tree::rpc`] (write +//! side) or [`crate::openhuman::memory::read_rpc`] (UI read side). + +use serde::de::DeserializeOwned; +use serde_json::{Map, Value}; + +use crate::core::all::ControllerFuture; +use crate::openhuman::config::rpc as config_rpc; +use crate::openhuman::memory::read_rpc; +use crate::openhuman::memory_tree::tree::rpc; +use crate::rpc::RpcOutcome; + +// ── Write-side handlers (rpc::*) ───────────────────────────────────────── + +pub(super) fn handle_ingest(params: Map) -> ControllerFuture { + Box::pin(async move { + let config = config_rpc::load_config_with_timeout().await?; + let req = parse_value::(Value::Object(params))?; + to_json(rpc::ingest_rpc(&config, req).await?) + }) +} + +pub(super) fn handle_get_chunk(params: Map) -> ControllerFuture { + Box::pin(async move { + let config = config_rpc::load_config_with_timeout().await?; + let req = parse_value::(Value::Object(params))?; + to_json(rpc::get_chunk_rpc(&config, req).await?) + }) +} + +pub(super) fn handle_memory_backfill_status(_params: Map) -> ControllerFuture { + Box::pin(async move { + let config = config_rpc::load_config_with_timeout().await?; + to_json(rpc::backfill_status_rpc(&config).await?) + }) +} + +// ── Read-side handlers (read_rpc::*) ───────────────────────────────────── + +pub(super) fn handle_list_chunks(params: Map) -> ControllerFuture { + Box::pin(async move { + let config = config_rpc::load_config_with_timeout().await?; + let filter = parse_value::(Value::Object(params))?; + to_json(read_rpc::list_chunks_rpc(&config, filter).await?) + }) +} + +pub(super) fn handle_list_sources(params: Map) -> ControllerFuture { + Box::pin(async move { + #[derive(serde::Deserialize, Default)] + struct Req { + #[serde(default)] + user_email_hint: Option, + } + let config = config_rpc::load_config_with_timeout().await?; + let req = parse_value::(Value::Object(params)).unwrap_or_default(); + to_json(read_rpc::list_sources_rpc(&config, req.user_email_hint).await?) + }) +} + +pub(super) fn handle_search(params: Map) -> ControllerFuture { + Box::pin(async move { + #[derive(serde::Deserialize)] + struct Req { + query: String, + k: u32, + } + let config = config_rpc::load_config_with_timeout().await?; + let req = parse_value::(Value::Object(params))?; + to_json(read_rpc::search_rpc(&config, req.query, req.k).await?) + }) +} + +pub(super) fn handle_recall(params: Map) -> ControllerFuture { + Box::pin(async move { + #[derive(serde::Deserialize)] + struct Req { + query: String, + k: u32, + } + let config = config_rpc::load_config_with_timeout().await?; + let req = parse_value::(Value::Object(params))?; + to_json(read_rpc::recall_rpc(&config, req.query, req.k).await?) + }) +} + +pub(super) fn handle_entity_index_for(params: Map) -> ControllerFuture { + Box::pin(async move { + #[derive(serde::Deserialize)] + struct Req { + chunk_id: String, + } + let config = config_rpc::load_config_with_timeout().await?; + let req = parse_value::(Value::Object(params))?; + to_json(read_rpc::entity_index_for_rpc(&config, req.chunk_id).await?) + }) +} + +pub(super) fn handle_chunks_for_entity(params: Map) -> ControllerFuture { + Box::pin(async move { + #[derive(serde::Deserialize)] + struct Req { + entity_id: String, + } + let config = config_rpc::load_config_with_timeout().await?; + let req = parse_value::(Value::Object(params))?; + to_json(read_rpc::chunks_for_entity_rpc(&config, req.entity_id).await?) + }) +} + +pub(super) fn handle_top_entities(params: Map) -> ControllerFuture { + Box::pin(async move { + #[derive(serde::Deserialize)] + struct Req { + #[serde(default)] + kind: Option, + limit: u32, + } + let config = config_rpc::load_config_with_timeout().await?; + let req = parse_value::(Value::Object(params))?; + to_json(read_rpc::top_entities_rpc(&config, req.kind, req.limit).await?) + }) +} + +pub(super) fn handle_chunk_score(params: Map) -> ControllerFuture { + Box::pin(async move { + #[derive(serde::Deserialize)] + struct Req { + chunk_id: String, + } + let config = config_rpc::load_config_with_timeout().await?; + let req = parse_value::(Value::Object(params))?; + to_json(read_rpc::chunk_score_rpc(&config, req.chunk_id).await?) + }) +} + +pub(super) fn handle_delete_chunk(params: Map) -> ControllerFuture { + Box::pin(async move { + #[derive(serde::Deserialize)] + struct Req { + chunk_id: String, + } + let config = config_rpc::load_config_with_timeout().await?; + let req = parse_value::(Value::Object(params))?; + to_json(read_rpc::delete_chunk_rpc(&config, req.chunk_id).await?) + }) +} + +pub(super) fn handle_graph_export(params: Map) -> ControllerFuture { + Box::pin(async move { + #[derive(serde::Deserialize, Default)] + struct Req { + #[serde(default)] + mode: Option, + } + let config = config_rpc::load_config_with_timeout().await?; + let req = parse_value::(Value::Object(params)).unwrap_or_default(); + to_json(read_rpc::graph_export_rpc(&config, req.mode.unwrap_or_default()).await?) + }) +} + +pub(super) fn handle_obsidian_vault_status(params: Map) -> ControllerFuture { + Box::pin(async move { + #[derive(serde::Deserialize, Default)] + struct Req { + #[serde(default)] + obsidian_config_dir: Option, + } + let config = config_rpc::load_config_with_timeout().await?; + let req = parse_value::(Value::Object(params)).unwrap_or_default(); + to_json(read_rpc::obsidian_vault_status_rpc(&config, req.obsidian_config_dir).await?) + }) +} + +pub(super) fn handle_vault_health_check(params: Map) -> ControllerFuture { + Box::pin(async move { + #[derive(serde::Deserialize, Default)] + struct Req { + #[serde(default)] + obsidian_config_dir: Option, + } + let config = config_rpc::load_config_with_timeout().await?; + let req = parse_value::(Value::Object(params)).unwrap_or_default(); + to_json(read_rpc::vault_health_check_rpc(&config, req.obsidian_config_dir).await?) + }) +} + +pub(super) fn handle_flush_source(params: Map) -> ControllerFuture { + Box::pin(async move { + #[derive(serde::Deserialize)] + struct Req { + source_scope: String, + } + let config = config_rpc::load_config_with_timeout().await?; + let req = parse_value::(Value::Object(params))?; + to_json(read_rpc::flush_source_tree_rpc(&config, &req.source_scope).await?) + }) +} + +pub(super) fn handle_flush_now(_params: Map) -> ControllerFuture { + Box::pin(async move { + let config = config_rpc::load_config_with_timeout().await?; + to_json(read_rpc::flush_now_rpc(&config).await?) + }) +} + +pub(super) fn handle_wipe_all(_params: Map) -> ControllerFuture { + Box::pin(async move { + let config = config_rpc::load_config_with_timeout().await?; + to_json(read_rpc::wipe_all_rpc(&config).await?) + }) +} + +pub(super) fn handle_reset_tree(_params: Map) -> ControllerFuture { + Box::pin(async move { + let config = config_rpc::load_config_with_timeout().await?; + to_json(read_rpc::reset_tree_rpc(&config).await?) + }) +} + +// ── Pipeline / control handlers ─────────────────────────────────────────── + +pub(super) fn handle_pipeline_status(_params: Map) -> ControllerFuture { + Box::pin(async move { + let config = config_rpc::load_config_with_timeout().await?; + to_json(rpc::pipeline_status_rpc(&config).await?) + }) +} + +pub(super) fn handle_set_enabled(params: Map) -> ControllerFuture { + Box::pin(async move { + let req = parse_value::(Value::Object(params))?; + let mut config = config_rpc::load_config_with_timeout().await?; + to_json(rpc::set_enabled_rpc(&mut config, req).await?) + }) +} + +pub(super) fn handle_smart_walk(params: Map) -> ControllerFuture { + Box::pin(async move { + use crate::openhuman::memory::chat::build_chat_provider; + use crate::openhuman::memory::query::smart_walk::{ + run_smart_walk, SmartWalkOptions, SmartWalkStopReason, + }; + + #[derive(serde::Deserialize)] + struct Req { + query: String, + #[serde(default = "default_namespace")] + namespace: String, + #[serde(default)] + max_turns: Option, + #[serde(default)] + model: Option, + } + fn default_namespace() -> String { + "default".into() + } + + let req = parse_value::(Value::Object(params))?; + let config = config_rpc::load_config_with_timeout().await?; + + let chat_provider = build_chat_provider(&config) + .map_err(|e| format!("smart_walk: build chat provider failed: {e}"))?; + + struct Adapter { + inner: std::sync::Arc, + } + + #[async_trait::async_trait] + impl crate::openhuman::inference::provider::traits::Provider for Adapter { + async fn chat_with_system( + &self, + system: Option<&str>, + message: &str, + _model: &str, + temperature: f64, + ) -> anyhow::Result { + let prompt = crate::openhuman::memory::chat::ChatPrompt { + system: system.unwrap_or("").to_string(), + user: message.to_string(), + temperature, + kind: "memory_smart_walk_rpc", + }; + self.inner.chat_for_text(&prompt).await + } + + async fn chat_with_history( + &self, + messages: &[crate::openhuman::inference::provider::traits::ChatMessage], + model: &str, + temperature: f64, + ) -> anyhow::Result { + let system = messages + .iter() + .find(|m| m.role == "system") + .map(|m| m.content.as_str()); + let user: String = messages + .iter() + .filter(|m| m.role != "system") + .map(|m| m.content.as_str()) + .collect::>() + .join("\n"); + self.chat_with_system(system, &user, model, temperature) + .await + } + } + + let adapter = Adapter { + inner: chat_provider, + }; + + let opts = SmartWalkOptions { + max_turns: req.max_turns.map(|n| n as usize).unwrap_or(12), + namespace: req.namespace, + model: req.model, + content_root: None, + }; + + let outcome = run_smart_walk(&config, &adapter, &req.query, opts) + .await + .map_err(|e| format!("smart_walk error: {e}"))?; + + let stopped = match outcome.stopped_reason { + SmartWalkStopReason::Answered => "answered", + SmartWalkStopReason::MaxTurnsReached => "max_turns", + SmartWalkStopReason::LlmGaveUp => "llm_gave_up", + SmartWalkStopReason::Error(_) => "error", + }; + + let result = serde_json::json!({ + "answer": outcome.answer, + "turns_used": outcome.turns_used, + "evidence_count": outcome.evidence.len(), + "stopped_reason": stopped, + "evidence": outcome.evidence.iter().map(|e| serde_json::json!({ + "source_path": e.source_path, + "snippet": e.snippet, + "relevance": e.relevance, + })).collect::>(), + "trace": outcome.trace.iter().map(|s| serde_json::json!({ + "turn": s.turn, + "action": s.action, + "args_summary": s.args_summary, + "result_preview": s.result_preview, + })).collect::>(), + }); + to_json(RpcOutcome::new(result, vec![])) + }) +} + +pub(super) fn handle_doctor(_params: Map) -> ControllerFuture { + Box::pin(async move { + let config = config_rpc::load_config_with_timeout().await?; + to_json(rpc::doctor_rpc(&config).await?) + }) +} + +pub(super) fn handle_retry_failed(_params: Map) -> ControllerFuture { + Box::pin(async move { + let config = config_rpc::load_config_with_timeout().await?; + to_json(rpc::retry_failed_rpc(&config).await?) + }) +} + +// ── Shared helpers ──────────────────────────────────────────────────────── + +pub(super) fn parse_value(v: Value) -> Result { + serde_json::from_value(v).map_err(|e| format!("invalid params: {e}")) +} + +pub(super) fn to_json(outcome: RpcOutcome) -> Result { + outcome.into_cli_compatible_json() +} diff --git a/src/openhuman/memory/schema/mod.rs b/src/openhuman/memory/schema/mod.rs new file mode 100644 index 0000000000..156eeeffa2 --- /dev/null +++ b/src/openhuman/memory/schema/mod.rs @@ -0,0 +1,34 @@ +//! Controller schemas for the memory tree. +//! +//! Registered JSON-RPC methods include the original Phase 1 surface +//! (`ingest`, `list_chunks`, `get_chunk`) plus the new +//! Memory-tab read RPCs added by the cloud-default backend refactor: +//! `list_sources`, `search`, `recall`, `entity_index_for`, +//! `top_entities`, `chunk_score`, `delete_chunk`, and destructive +//! maintenance helpers for local iteration. +//! +//! Handlers delegate to [`super::rpc`] (write side) or +//! [`super::read_rpc`] (UI read side). +//! +//! # Sub-module layout +//! +//! | File | Contents | +//! |-------------------|------------------------------------------------------| +//! | `definitions.rs` | [`schemas`] match — one [`ControllerSchema`] per RPC | +//! | `handlers.rs` | `handle_*` functions bridging JSON → typed RPC calls | +//! | `registry.rs` | [`all_controller_schemas`] / [`all_registered_controllers`] lists | + +mod definitions; +mod handlers; +mod registry; + +pub use definitions::schemas; +pub use registry::{all_controller_schemas, all_registered_controllers}; + +// Re-export the NAMESPACE constant so schema_tests.rs can reference it via +// `super::NAMESPACE` the same way the original flat module did. +pub(crate) use definitions::NAMESPACE; + +#[cfg(test)] +#[path = "../schema_tests.rs"] +mod tests; diff --git a/src/openhuman/memory/schema/registry.rs b/src/openhuman/memory/schema/registry.rs new file mode 100644 index 0000000000..eafd736aca --- /dev/null +++ b/src/openhuman/memory/schema/registry.rs @@ -0,0 +1,142 @@ +//! Registry: lists of all `memory_tree` controller schemas and registered +//! controller pairs wired into `core::all`. + +use crate::core::all::RegisteredController; +use crate::core::ControllerSchema; + +use super::definitions::schemas; +use super::handlers::*; + +/// All `memory_tree` controller schemas, used by the registry to advertise +/// inputs/outputs to CLI + JSON-RPC consumers. +pub fn all_controller_schemas() -> Vec { + vec![ + schemas("ingest"), + schemas("list_chunks"), + schemas("get_chunk"), + schemas("memory_backfill_status"), + schemas("list_sources"), + schemas("search"), + schemas("recall"), + schemas("entity_index_for"), + schemas("chunks_for_entity"), + schemas("top_entities"), + schemas("chunk_score"), + schemas("delete_chunk"), + schemas("graph_export"), + schemas("obsidian_vault_status"), + schemas("vault_health_check"), + schemas("flush_now"), + schemas("flush_source"), + schemas("wipe_all"), + schemas("reset_tree"), + schemas("pipeline_status"), + schemas("set_enabled"), + schemas("smart_walk"), + schemas("doctor"), + schemas("retry_failed"), + ] +} + +/// Registered `memory_tree` controllers (schema + handler pairs) wired into +/// `core::all`. +pub fn all_registered_controllers() -> Vec { + vec![ + RegisteredController { + schema: schemas("ingest"), + handler: handle_ingest, + }, + RegisteredController { + schema: schemas("list_chunks"), + handler: handle_list_chunks, + }, + RegisteredController { + schema: schemas("get_chunk"), + handler: handle_get_chunk, + }, + RegisteredController { + schema: schemas("memory_backfill_status"), + handler: handle_memory_backfill_status, + }, + RegisteredController { + schema: schemas("list_sources"), + handler: handle_list_sources, + }, + RegisteredController { + schema: schemas("search"), + handler: handle_search, + }, + RegisteredController { + schema: schemas("recall"), + handler: handle_recall, + }, + RegisteredController { + schema: schemas("entity_index_for"), + handler: handle_entity_index_for, + }, + RegisteredController { + schema: schemas("chunks_for_entity"), + handler: handle_chunks_for_entity, + }, + RegisteredController { + schema: schemas("top_entities"), + handler: handle_top_entities, + }, + RegisteredController { + schema: schemas("chunk_score"), + handler: handle_chunk_score, + }, + RegisteredController { + schema: schemas("delete_chunk"), + handler: handle_delete_chunk, + }, + RegisteredController { + schema: schemas("graph_export"), + handler: handle_graph_export, + }, + RegisteredController { + schema: schemas("obsidian_vault_status"), + handler: handle_obsidian_vault_status, + }, + RegisteredController { + schema: schemas("vault_health_check"), + handler: handle_vault_health_check, + }, + RegisteredController { + schema: schemas("flush_now"), + handler: handle_flush_now, + }, + RegisteredController { + schema: schemas("flush_source"), + handler: handle_flush_source, + }, + RegisteredController { + schema: schemas("wipe_all"), + handler: handle_wipe_all, + }, + RegisteredController { + schema: schemas("reset_tree"), + handler: handle_reset_tree, + }, + RegisteredController { + schema: schemas("pipeline_status"), + handler: handle_pipeline_status, + }, + RegisteredController { + schema: schemas("set_enabled"), + handler: handle_set_enabled, + }, + RegisteredController { + schema: schemas("smart_walk"), + handler: handle_smart_walk, + }, + RegisteredController { + schema: schemas("doctor"), + handler: handle_doctor, + }, + RegisteredController { + schema: schemas("retry_failed"), + handler: handle_retry_failed, + }, + ] +} diff --git a/src/openhuman/memory_store/chunks/migrations.rs b/src/openhuman/memory_store/chunks/migrations.rs new file mode 100644 index 0000000000..a26cb4d48e --- /dev/null +++ b/src/openhuman/memory_store/chunks/migrations.rs @@ -0,0 +1,201 @@ +//! One-shot SQLite migrations for the chunks DB. +//! +//! These functions are called from [`super::connection`] during DB initialisation. +//! Each migration is version-gated via `PRAGMA user_version` so it runs exactly +//! once per vault. + +use anyhow::{Context, Result}; +use rusqlite::Connection; + +use super::{ + has_uncovered_reembed_work, set_chunk_embedding_for_signature_tx, + GLOBAL_TOPIC_PURGE_MIGRATION_VERSION, TREE_EMBEDDING_MIGRATION_VERSION, +}; +use crate::openhuman::config::Config; + +/// One-shot migration: copy legacy per-chunk/summary `.embedding` blobs into the +/// normalised `mem_tree_chunk_embeddings` / `mem_tree_summary_embeddings` sidecar +/// tables introduced in #1574. +/// +/// Version-gated: `PRAGMA user_version < 1` triggers the copy; `>= 1` is a no-op. +pub(super) fn migrate_legacy_embeddings_to_sidecar( + conn: &Connection, + config: &Config, +) -> Result<()> { + let version: i64 = conn + .query_row("PRAGMA user_version", [], |r| r.get(0)) + .context("read PRAGMA user_version for #1574 migration")?; + if version >= TREE_EMBEDDING_MIGRATION_VERSION { + return Ok(()); + } + + let (provider, model, dims) = crate::openhuman::memory_store::effective_embedding_settings( + &config.memory, + config.workload_local_model("embeddings").as_deref(), + ); + let sig = crate::openhuman::embeddings::format_embedding_signature(&provider, &model, dims); + log::info!( + "[memory_tree::migrate] #1574 §7: copying legacy embeddings → sidecar at sig={sig} (dims={dims})" + ); + + let tx = conn.unchecked_transaction()?; + let mut copied_chunks = 0usize; + let mut copied_summaries = 0usize; + let mut skipped_dim_mismatch = 0usize; + + for (table, is_chunk) in [("mem_tree_chunks", true), ("mem_tree_summaries", false)] { + let mut stmt = tx.prepare(&format!( + "SELECT id, embedding FROM {table} WHERE embedding IS NOT NULL" + ))?; + let rows = stmt.query_map([], |r| { + Ok((r.get::<_, String>(0)?, r.get::<_, Vec>(1)?)) + })?; + for row in rows { + let (id, blob) = row?; + if !blob.len().is_multiple_of(4) { + log::warn!( + "[memory_tree::migrate] {table} id={id}: legacy blob len {} not /4, skipping", + blob.len() + ); + continue; + } + if blob.len() / 4 != dims { + // Different embedding space — unrecoverable from the blob. + // Leave for the §6 re-embed backfill. + skipped_dim_mismatch += 1; + continue; + } + let vec: Vec = blob + .chunks_exact(4) + .map(|c| f32::from_le_bytes([c[0], c[1], c[2], c[3]])) + .collect(); + if is_chunk { + set_chunk_embedding_for_signature_tx(&tx, &id, &sig, &vec)?; + copied_chunks += 1; + } else { + crate::openhuman::memory_store::trees::store::set_summary_embedding_for_signature_tx( + &tx, &id, &sig, &vec, + )?; + copied_summaries += 1; + } + } + } + + // #1574 §6: enqueue the re-embed backfill ONLY if there is genuinely + // uncovered work at the active signature (the dim-mismatch slice, or + // content-bearing rows with no vector). Gating this avoids queuing a + // no-op job on every DB open — which would otherwise pollute the jobs + // table for unrelated callers/tests. Enqueued atomically with the + // migration; dedupe key = signature, so exactly one chain per space. + let has_uncovered = has_uncovered_reembed_work(&*tx, &sig)?; + if has_uncovered { + let backfill_job = crate::openhuman::memory_queue::types::NewJob::reembed_backfill( + &crate::openhuman::memory_queue::types::ReembedBackfillPayload { + signature: sig.clone(), + }, + )?; + crate::openhuman::memory_queue::enqueue_tx(&tx, &backfill_job)?; + } + + tx.commit()?; + conn.pragma_update(None, "user_version", TREE_EMBEDDING_MIGRATION_VERSION) + .context("set PRAGMA user_version after #1574 migration")?; + if has_uncovered { + crate::openhuman::memory_queue::set_backfill_in_progress(true); + } + log::info!( + "[memory_tree::migrate] #1574 §7 done: copied chunks={copied_chunks} summaries={copied_summaries} \ + skipped_dim_mismatch={skipped_dim_mismatch} (left for §6 re-embed); user_version={TREE_EMBEDDING_MIGRATION_VERSION}" + ); + Ok(()) +} + +/// One-shot purge of the removed global + topic trees. +/// +/// The global (time-axis) and topic (subject-axis) trees were deleted in +/// favour of the source trees (which hold all content). This migration +/// removes their now-orphaned DB rows and on-disk summary folders so old +/// vaults clean themselves up on next open. Version-gated via +/// `PRAGMA user_version` (see [`GLOBAL_TOPIC_PURGE_MIGRATION_VERSION`]); a +/// no-op on workspaces that never had those trees. +pub(super) fn purge_global_topic_trees(conn: &Connection, config: &Config) -> Result<()> { + let version: i64 = conn + .query_row("PRAGMA user_version", [], |r| r.get(0)) + .context("read PRAGMA user_version for global/topic purge")?; + if version >= GLOBAL_TOPIC_PURGE_MIGRATION_VERSION { + return Ok(()); + } + + let tx = conn.unchecked_transaction()?; + // Child rows first (summary sidecars / skip-lists are keyed by + // summary_id; entity-index + buffers carry an FK on tree_id). + let removed_summary_sidecars = tx.execute( + "DELETE FROM mem_tree_summary_embeddings WHERE summary_id IN \ + (SELECT id FROM mem_tree_summaries WHERE tree_kind IN ('global','topic'))", + [], + )?; + tx.execute( + "DELETE FROM mem_tree_summary_reembed_skipped WHERE summary_id IN \ + (SELECT id FROM mem_tree_summaries WHERE tree_kind IN ('global','topic'))", + [], + )?; + tx.execute( + "DELETE FROM mem_tree_entity_index WHERE tree_id IN \ + (SELECT id FROM mem_tree_trees WHERE kind IN ('global','topic'))", + [], + )?; + let removed_summaries = tx.execute( + "DELETE FROM mem_tree_summaries WHERE tree_kind IN ('global','topic')", + [], + )?; + tx.execute( + "DELETE FROM mem_tree_buffers WHERE tree_id IN \ + (SELECT id FROM mem_tree_trees WHERE kind IN ('global','topic'))", + [], + )?; + let removed_trees = tx.execute( + "DELETE FROM mem_tree_trees WHERE kind IN ('global','topic')", + [], + )?; + // Drain any queued jobs for the retired kinds so the worker loop never + // trips over a payload it can no longer parse. + let removed_jobs = tx.execute( + "DELETE FROM mem_tree_jobs WHERE kind IN ('topic_route','digest_daily')", + [], + )?; + tx.commit()?; + + // On-disk: drop the `wiki/summaries/global*` (both the legacy per-day + // `global-/` folders and the singleton `global/`) and `topic-*` + // summary folders. Best-effort — a filesystem error must not abort the + // version bump, or the purge would retry forever. + let summaries_root = config + .memory_tree_content_root() + .join("wiki") + .join("summaries"); + let mut removed_dirs = 0usize; + if let Ok(entries) = std::fs::read_dir(&summaries_root) { + for entry in entries.flatten() { + let name = entry.file_name(); + let name = name.to_string_lossy(); + if name.starts_with("global") || name.starts_with("topic-") { + match std::fs::remove_dir_all(entry.path()) { + Ok(()) => removed_dirs += 1, + Err(e) => log::warn!( + "[memory_tree::migrate] purge: failed to remove {} : {e}", + entry.path().display() + ), + } + } + } + } + + conn.pragma_update(None, "user_version", GLOBAL_TOPIC_PURGE_MIGRATION_VERSION) + .context("set PRAGMA user_version after global/topic purge")?; + log::info!( + "[memory_tree::migrate] global/topic purge done: trees={removed_trees} \ + summaries={removed_summaries} sidecars={removed_summary_sidecars} jobs={removed_jobs} \ + dirs={removed_dirs}; user_version={GLOBAL_TOPIC_PURGE_MIGRATION_VERSION}" + ); + Ok(()) +} diff --git a/src/openhuman/memory_store/chunks/raw_refs.rs b/src/openhuman/memory_store/chunks/raw_refs.rs new file mode 100644 index 0000000000..5d581d6e17 --- /dev/null +++ b/src/openhuman/memory_store/chunks/raw_refs.rs @@ -0,0 +1,154 @@ +//! Raw-archive pointers and content-pointer accessors for chunk/summary rows. +//! +//! `RawRef` lets ingest pipelines mirror full message bodies to on-disk +//! archives under `/raw/` while storing only a ≤500-char +//! preview in the SQLite `content` column. Retrieval reads the archive +//! directly instead of going through the SQL preview path. + +use anyhow::{Context, Result}; +use rusqlite::{params, OptionalExtension}; + +use super::with_connection; +use crate::openhuman::config::Config; + +/// One pointer into the raw archive. A chunk's body is reconstructed by +/// reading each [`RawRef`] in order and joining with `"\n\n"`. +/// +/// `start` / `end` are byte offsets into the raw `.md` file. `end = +/// None` means "read to end of file". Both default to "the whole +/// file" (`start = 0`, `end = None`) for the common one-message-one-chunk +/// path; oversize-message chunks get explicit ranges so each chunk +/// reconstructs its sub-slice. +#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)] +pub struct RawRef { + /// Forward-slash relative path under `/`, + /// e.g. `"raw/gmail-stevent95-at-gmail-dot-com/1700000_msg-id.md"`. + pub path: String, + #[serde(default)] + pub start: usize, + #[serde(default)] + pub end: Option, +} + +/// Stash a list of [`RawRef`] entries on a chunk row. Replaces any +/// previous value. Used by ingest pipelines that mirror their bytes +/// into `/raw/...` so reads can skip the SQL preview +/// path and pull the full body straight from the archive. +pub fn set_chunk_raw_refs(config: &Config, chunk_id: &str, refs: &[RawRef]) -> Result<()> { + let json = serde_json::to_string(refs).context("serialize raw_refs")?; + with_connection(config, |conn| { + conn.execute( + "UPDATE mem_tree_chunks SET raw_refs_json = ?1 WHERE id = ?2", + params![json, chunk_id], + )?; + Ok(()) + }) +} + +/// Return the raw-archive pointers stored in SQLite for `chunk_id`, +/// or `None` if no `raw_refs_json` was recorded. +pub fn get_chunk_raw_refs(config: &Config, chunk_id: &str) -> Result>> { + with_connection(config, |conn| { + let row = conn + .query_row( + "SELECT raw_refs_json FROM mem_tree_chunks WHERE id = ?1", + params![chunk_id], + |r| r.get::<_, Option>(0), + ) + .optional()? + .flatten(); + match row { + Some(json) if !json.is_empty() => { + let refs: Vec = + serde_json::from_str(&json).context("deserialize raw_refs_json")?; + Ok(Some(refs)) + } + _ => Ok(None), + } + }) +} + +/// Return both `content_path` and `content_sha256` stored in SQLite for `chunk_id`. +/// +/// Returns `Ok(None)` if the chunk does not exist or has no content_path recorded yet. +pub fn get_chunk_content_pointers( + config: &Config, + chunk_id: &str, +) -> Result> { + with_connection(config, |conn| { + let row = conn + .query_row( + "SELECT content_path, content_sha256 FROM mem_tree_chunks WHERE id = ?1", + params![chunk_id], + |r| { + let path: Option = r.get(0)?; + let sha: Option = r.get(1)?; + Ok((path, sha)) + }, + ) + .optional()?; + Ok(row.and_then(|(p, s)| p.zip(s))) + }) +} + +/// Return the `content_path` stored in SQLite for `chunk_id`, if any. +pub fn get_chunk_content_path(config: &Config, chunk_id: &str) -> Result> { + with_connection(config, |conn| { + let row = conn + .query_row( + "SELECT content_path FROM mem_tree_chunks WHERE id = ?1", + params![chunk_id], + |r| r.get::<_, Option>(0), + ) + .optional()? + .flatten(); + Ok(row) + }) +} + +/// Return both `content_path` and `content_sha256` stored in SQLite for `summary_id`. +/// +/// Returns `Ok(None)` if the summary does not exist or has no content_path recorded yet +/// (legacy rows pre-MD-content migration). +pub fn get_summary_content_pointers( + config: &Config, + summary_id: &str, +) -> Result> { + with_connection(config, |conn| { + let row = conn + .query_row( + "SELECT content_path, content_sha256 FROM mem_tree_summaries WHERE id = ?1", + params![summary_id], + |r| { + let path: Option = r.get(0)?; + let sha: Option = r.get(1)?; + Ok((path, sha)) + }, + ) + .optional()?; + Ok(row.and_then(|(p, s)| p.zip(s))) + }) +} + +/// List all summary rows that have a non-NULL `content_path`. Used by the +/// bin integrity checker. +pub fn list_summaries_with_content_path(config: &Config) -> Result> { + with_connection(config, |conn| { + let mut stmt = conn.prepare( + "SELECT id, content_path, content_sha256 + FROM mem_tree_summaries + WHERE content_path IS NOT NULL AND content_sha256 IS NOT NULL + AND deleted = 0", + )?; + let rows = stmt + .query_map([], |r| { + let id: String = r.get(0)?; + let path: String = r.get(1)?; + let sha: String = r.get(2)?; + Ok((id, path, sha)) + })? + .collect::>>() + .context("Failed to list summaries with content_path")?; + Ok(rows) + }) +} diff --git a/src/openhuman/memory_store/chunks/store.rs b/src/openhuman/memory_store/chunks/store.rs index e6431c7eaa..2fc3637fc6 100644 --- a/src/openhuman/memory_store/chunks/store.rs +++ b/src/openhuman/memory_store/chunks/store.rs @@ -1178,326 +1178,16 @@ pub(crate) use connection::{ #[cfg(test)] pub(crate) use connection::{is_transient_cold_start, try_cleanup_stale_files}; -fn migrate_legacy_embeddings_to_sidecar(conn: &Connection, config: &Config) -> Result<()> { - let version: i64 = conn - .query_row("PRAGMA user_version", [], |r| r.get(0)) - .context("read PRAGMA user_version for #1574 migration")?; - if version >= TREE_EMBEDDING_MIGRATION_VERSION { - return Ok(()); - } - - let (provider, model, dims) = crate::openhuman::memory_store::effective_embedding_settings( - &config.memory, - config.workload_local_model("embeddings").as_deref(), - ); - let sig = crate::openhuman::embeddings::format_embedding_signature(&provider, &model, dims); - log::info!( - "[memory_tree::migrate] #1574 §7: copying legacy embeddings → sidecar at sig={sig} (dims={dims})" - ); - - let tx = conn.unchecked_transaction()?; - let mut copied_chunks = 0usize; - let mut copied_summaries = 0usize; - let mut skipped_dim_mismatch = 0usize; - - for (table, is_chunk) in [("mem_tree_chunks", true), ("mem_tree_summaries", false)] { - let mut stmt = tx.prepare(&format!( - "SELECT id, embedding FROM {table} WHERE embedding IS NOT NULL" - ))?; - let rows = stmt.query_map([], |r| { - Ok((r.get::<_, String>(0)?, r.get::<_, Vec>(1)?)) - })?; - for row in rows { - let (id, blob) = row?; - if !blob.len().is_multiple_of(4) { - log::warn!( - "[memory_tree::migrate] {table} id={id}: legacy blob len {} not /4, skipping", - blob.len() - ); - continue; - } - if blob.len() / 4 != dims { - // Different embedding space — unrecoverable from the blob. - // Leave for the §6 re-embed backfill. - skipped_dim_mismatch += 1; - continue; - } - let vec: Vec = blob - .chunks_exact(4) - .map(|c| f32::from_le_bytes([c[0], c[1], c[2], c[3]])) - .collect(); - if is_chunk { - set_chunk_embedding_for_signature_tx(&tx, &id, &sig, &vec)?; - copied_chunks += 1; - } else { - crate::openhuman::memory_store::trees::store::set_summary_embedding_for_signature_tx( - &tx, &id, &sig, &vec, - )?; - copied_summaries += 1; - } - } - } - - // #1574 §6: enqueue the re-embed backfill ONLY if there is genuinely - // uncovered work at the active signature (the dim-mismatch slice, or - // content-bearing rows with no vector). Gating this avoids queuing a - // no-op job on every DB open — which would otherwise pollute the jobs - // table for unrelated callers/tests. Enqueued atomically with the - // migration; dedupe key = signature, so exactly one chain per space. - let has_uncovered = has_uncovered_reembed_work(&*tx, &sig)?; - if has_uncovered { - let backfill_job = crate::openhuman::memory_queue::types::NewJob::reembed_backfill( - &crate::openhuman::memory_queue::types::ReembedBackfillPayload { - signature: sig.clone(), - }, - )?; - crate::openhuman::memory_queue::enqueue_tx(&tx, &backfill_job)?; - } - - tx.commit()?; - conn.pragma_update(None, "user_version", TREE_EMBEDDING_MIGRATION_VERSION) - .context("set PRAGMA user_version after #1574 migration")?; - if has_uncovered { - crate::openhuman::memory_queue::set_backfill_in_progress(true); - } - log::info!( - "[memory_tree::migrate] #1574 §7 done: copied chunks={copied_chunks} summaries={copied_summaries} \ - skipped_dim_mismatch={skipped_dim_mismatch} (left for §6 re-embed); user_version={TREE_EMBEDDING_MIGRATION_VERSION}" - ); - Ok(()) -} - -/// One-shot purge of the removed global + topic trees. -/// -/// The global (time-axis) and topic (subject-axis) trees were deleted in -/// favour of the source trees (which hold all content). This migration -/// removes their now-orphaned DB rows and on-disk summary folders so old -/// vaults clean themselves up on next open. Version-gated via -/// `PRAGMA user_version` (see [`GLOBAL_TOPIC_PURGE_MIGRATION_VERSION`]); a -/// no-op on workspaces that never had those trees. -fn purge_global_topic_trees(conn: &Connection, config: &Config) -> Result<()> { - let version: i64 = conn - .query_row("PRAGMA user_version", [], |r| r.get(0)) - .context("read PRAGMA user_version for global/topic purge")?; - if version >= GLOBAL_TOPIC_PURGE_MIGRATION_VERSION { - return Ok(()); - } - - let tx = conn.unchecked_transaction()?; - // Child rows first (summary sidecars / skip-lists are keyed by - // summary_id; entity-index + buffers carry an FK on tree_id). - let removed_summary_sidecars = tx.execute( - "DELETE FROM mem_tree_summary_embeddings WHERE summary_id IN \ - (SELECT id FROM mem_tree_summaries WHERE tree_kind IN ('global','topic'))", - [], - )?; - tx.execute( - "DELETE FROM mem_tree_summary_reembed_skipped WHERE summary_id IN \ - (SELECT id FROM mem_tree_summaries WHERE tree_kind IN ('global','topic'))", - [], - )?; - tx.execute( - "DELETE FROM mem_tree_entity_index WHERE tree_id IN \ - (SELECT id FROM mem_tree_trees WHERE kind IN ('global','topic'))", - [], - )?; - let removed_summaries = tx.execute( - "DELETE FROM mem_tree_summaries WHERE tree_kind IN ('global','topic')", - [], - )?; - tx.execute( - "DELETE FROM mem_tree_buffers WHERE tree_id IN \ - (SELECT id FROM mem_tree_trees WHERE kind IN ('global','topic'))", - [], - )?; - let removed_trees = tx.execute( - "DELETE FROM mem_tree_trees WHERE kind IN ('global','topic')", - [], - )?; - // Drain any queued jobs for the retired kinds so the worker loop never - // trips over a payload it can no longer parse. - let removed_jobs = tx.execute( - "DELETE FROM mem_tree_jobs WHERE kind IN ('topic_route','digest_daily')", - [], - )?; - tx.commit()?; - - // On-disk: drop the `wiki/summaries/global*` (both the legacy per-day - // `global-/` folders and the singleton `global/`) and `topic-*` - // summary folders. Best-effort — a filesystem error must not abort the - // version bump, or the purge would retry forever. - let summaries_root = config - .memory_tree_content_root() - .join("wiki") - .join("summaries"); - let mut removed_dirs = 0usize; - if let Ok(entries) = std::fs::read_dir(&summaries_root) { - for entry in entries.flatten() { - let name = entry.file_name(); - let name = name.to_string_lossy(); - if name.starts_with("global") || name.starts_with("topic-") { - match std::fs::remove_dir_all(entry.path()) { - Ok(()) => removed_dirs += 1, - Err(e) => log::warn!( - "[memory_tree::migrate] purge: failed to remove {} : {e}", - entry.path().display() - ), - } - } - } - } - - conn.pragma_update(None, "user_version", GLOBAL_TOPIC_PURGE_MIGRATION_VERSION) - .context("set PRAGMA user_version after global/topic purge")?; - log::info!( - "[memory_tree::migrate] global/topic purge done: trees={removed_trees} \ - summaries={removed_summaries} sidecars={removed_summary_sidecars} jobs={removed_jobs} \ - dirs={removed_dirs}; user_version={GLOBAL_TOPIC_PURGE_MIGRATION_VERSION}" - ); - Ok(()) -} - -/// One pointer into the raw archive. A chunk's body is reconstructed by -/// reading each [`RawRef`] in order and joining with `"\n\n"`. -/// -/// `start` / `end` are byte offsets into the raw `.md` file. `end = -/// None` means "read to end of file". Both default to "the whole -/// file" (`start = 0`, `end = None`) for the common one-message-one-chunk -/// path; oversize-message chunks get explicit ranges so each chunk -/// reconstructs its sub-slice. -#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)] -pub struct RawRef { - /// Forward-slash relative path under `/`, - /// e.g. `"raw/gmail-stevent95-at-gmail-dot-com/1700000_msg-id.md"`. - pub path: String, - #[serde(default)] - pub start: usize, - #[serde(default)] - pub end: Option, -} - -/// Stash a list of [`RawRef`] entries on a chunk row. Replaces any -/// previous value. Used by ingest pipelines that mirror their bytes -/// into `/raw/...` so reads can skip the SQL preview -/// path and pull the full body straight from the archive. -pub fn set_chunk_raw_refs(config: &Config, chunk_id: &str, refs: &[RawRef]) -> Result<()> { - let json = serde_json::to_string(refs).context("serialize raw_refs")?; - with_connection(config, |conn| { - conn.execute( - "UPDATE mem_tree_chunks SET raw_refs_json = ?1 WHERE id = ?2", - params![json, chunk_id], - )?; - Ok(()) - }) -} - -/// Return the raw-archive pointers stored in SQLite for `chunk_id`, -/// or `None` if no `raw_refs_json` was recorded. -pub fn get_chunk_raw_refs(config: &Config, chunk_id: &str) -> Result>> { - with_connection(config, |conn| { - let row = conn - .query_row( - "SELECT raw_refs_json FROM mem_tree_chunks WHERE id = ?1", - params![chunk_id], - |r| r.get::<_, Option>(0), - ) - .optional()? - .flatten(); - match row { - Some(json) if !json.is_empty() => { - let refs: Vec = - serde_json::from_str(&json).context("deserialize raw_refs_json")?; - Ok(Some(refs)) - } - _ => Ok(None), - } - }) -} - -/// Return both `content_path` and `content_sha256` stored in SQLite for `chunk_id`. -/// -/// Returns `Ok(None)` if the chunk does not exist or has no content_path recorded yet. -pub fn get_chunk_content_pointers( - config: &Config, - chunk_id: &str, -) -> Result> { - with_connection(config, |conn| { - let row = conn - .query_row( - "SELECT content_path, content_sha256 FROM mem_tree_chunks WHERE id = ?1", - params![chunk_id], - |r| { - let path: Option = r.get(0)?; - let sha: Option = r.get(1)?; - Ok((path, sha)) - }, - ) - .optional()?; - Ok(row.and_then(|(p, s)| p.zip(s))) - }) -} - -/// Return the `content_path` stored in SQLite for `chunk_id`, if any. -pub fn get_chunk_content_path(config: &Config, chunk_id: &str) -> Result> { - with_connection(config, |conn| { - let row = conn - .query_row( - "SELECT content_path FROM mem_tree_chunks WHERE id = ?1", - params![chunk_id], - |r| r.get::<_, Option>(0), - ) - .optional()? - .flatten(); - Ok(row) - }) -} - -/// Return both `content_path` and `content_sha256` stored in SQLite for `summary_id`. -/// -/// Returns `Ok(None)` if the summary does not exist or has no content_path recorded yet -/// (legacy rows pre-MD-content migration). -pub fn get_summary_content_pointers( - config: &Config, - summary_id: &str, -) -> Result> { - with_connection(config, |conn| { - let row = conn - .query_row( - "SELECT content_path, content_sha256 FROM mem_tree_summaries WHERE id = ?1", - params![summary_id], - |r| { - let path: Option = r.get(0)?; - let sha: Option = r.get(1)?; - Ok((path, sha)) - }, - ) - .optional()?; - Ok(row.and_then(|(p, s)| p.zip(s))) - }) -} - -/// List all summary rows that have a non-NULL `content_path`. Used by the -/// bin integrity checker. -pub fn list_summaries_with_content_path(config: &Config) -> Result> { - with_connection(config, |conn| { - let mut stmt = conn.prepare( - "SELECT id, content_path, content_sha256 - FROM mem_tree_summaries - WHERE content_path IS NOT NULL AND content_sha256 IS NOT NULL - AND deleted = 0", - )?; - let rows = stmt - .query_map([], |r| { - let id: String = r.get(0)?; - let path: String = r.get(1)?; - let sha: String = r.get(2)?; - Ok((id, path, sha)) - })? - .collect::>>() - .context("Failed to list summaries with content_path")?; - Ok(rows) - }) -} +#[path = "migrations.rs"] +mod migrations; +use migrations::{migrate_legacy_embeddings_to_sidecar, purge_global_topic_trees}; + +#[path = "raw_refs.rs"] +mod raw_refs; +pub use raw_refs::{ + get_chunk_content_path, get_chunk_content_pointers, get_chunk_raw_refs, + get_summary_content_pointers, list_summaries_with_content_path, set_chunk_raw_refs, RawRef, +}; fn normalized_limit(requested: Option) -> i64 { let clamped = requested diff --git a/src/openhuman/memory_store/content/README.md b/src/openhuman/memory_store/content/README.md index f33e81cbbd..74e36d79df 100644 --- a/src/openhuman/memory_store/content/README.md +++ b/src/openhuman/memory_store/content/README.md @@ -8,7 +8,7 @@ The body is **immutable** once written — only the YAML front-matter `tags:` bl - [`mod.rs`](mod.rs) — public surface: `StagedChunk`, `stage_chunks` (write all chunks atomically before SQLite upsert), `update_summary_tags` re-export. - [`atomic.rs`](atomic.rs) — `write_if_new` (tempfile + fsync + rename, parent dir fsync on Unix), `stage_summary` (idempotent re-stage with on-disk SHA check + auto-rewrite on mismatch), `sha256_hex`, `StagedSummary`. -- [`compose.rs`](compose.rs) — YAML front-matter + body composition. `compose_chunk_file` for chunks (with email-only `participants:` / `aliases:` fields parsed from `gmail:{addr1|addr2|…}` source ids), `compose_summary_md` for summary nodes. `rewrite_tags` / `rewrite_summary_tags` swap the `tags:` block in place. `split_front_matter` parses `---\n…\n---\n`. +- [`compose/`](compose/) — YAML front-matter + body composition. `compose_chunk_file` for chunks (with email-only `participants:` / `aliases:` fields parsed from `gmail:{addr1|addr2|…}` source ids), `compose_summary_md` for summary nodes. `rewrite_tags` / `rewrite_summary_tags` swap the `tags:` block in place. `split_front_matter` parses `---\n…\n---\n`. - [`paths.rs`](paths.rs) — path generators. `chunk_rel_path` (`email//.md`, `chat//.md`, `document//.md`); `summary_rel_path` (`summaries/{source,global,topic}/…`). `slugify_source_id` is the canonical filesystem-safe slug. - [`read.rs`](read.rs) — `read_chunk_file` / `read_summary_file` parse front-matter and return body+SHA. `verify_*` compares against an expected SHA. `read_chunk_body` / `read_summary_body` resolve the path via SQLite and verify the integrity hash; this is the authoritative entry-point for callers that need the **full** body (LLM extractor, summariser, embedder, retrieval API). - [`raw.rs`](raw.rs) — verbatim source-byte mirror under `/raw/`. Writes the unmodified upstream payload (eml, slack json, raw markdown) so downstream callers can re-canonicalise without re-fetching. diff --git a/src/openhuman/memory_store/content/compose.rs b/src/openhuman/memory_store/content/compose.rs deleted file mode 100644 index fd940e97a3..0000000000 --- a/src/openhuman/memory_store/content/compose.rs +++ /dev/null @@ -1,1246 +0,0 @@ -//! YAML front-matter + body composition for chunk `.md` files. -//! -//! Each file written to disk has the form: -//! ```text -//! --- -//! source_kind: chat -//! source_id: slack:#eng -//! seq: 0 -//! owner: alice@example.com -//! timestamp: 2026-04-28T10:00:00Z -//! time_range_start: 2026-04-28T10:00:00Z -//! time_range_end: 2026-04-28T10:05:00Z -//! source_ref: slack://permalink/… -//! tags: -//! - person/Alice-Smith -//! - project/Phoenix -//! --- -//! ## 2026-04-28T10:00:00Z — alice -//! Message body here. -//! ``` -//! -//! For email source_kind, additional fields are emitted: -//! ```text -//! participants: -//! - alice@example.com -//! - bob@example.com -//! aliases: -//! - "alice@example.com <-> bob@example.com: chunk 0" -//! ``` -//! These are parsed from the `source_id` field (format `gmail:{participants}` -//! where `participants` is `addr1|addr2|...` pipe-separated) at compose time. -//! `sender` and `thread_id` are no longer emitted — they are not meaningful -//! with participant-based bucketing. -//! -//! **SHA-256 is computed over the body bytes only** (everything after `---\n` -//! on the second delimiter line). This allows tags to be rewritten atomically -//! without invalidating the content hash. - -use chrono::{DateTime, Utc}; - -use crate::openhuman::memory_store::chunks::types::{Chunk, SourceKind}; -use crate::openhuman::memory_store::content::paths::{ - slugify_source_id, summary_filename, SummaryTreeKind, -}; - -pub const MEMORY_ARTIFACT_FORMAT: u32 = 2; -pub const OPENHUMAN_CORE_VERSION: &str = env!("CARGO_PKG_VERSION"); - -/// Build the canonical Obsidian `source/` tag for a given -/// source scope. Used to seed the `tags:` block on every chunk and -/// every source-tree summary so the Obsidian graph view can filter by -/// source. -/// -/// Slug rules match `slugify_source_id` (lowercase ASCII, `-` separators, -/// alphanumerics + `_` preserved) so the tag matches the on-disk -/// `raw//...` directory name byte-for-byte. -pub fn source_tag(scope: &str) -> String { - format!("source/{}", slugify_source_id(scope)) -} - -/// Prepend the source tag to `tags`, dedup, and return the new list. -/// Order is preserved otherwise — `source/...` always comes first so -/// it shows up at the top of the YAML block. -pub fn with_source_tag(scope: &str, tags: &[String]) -> Vec { - let st = source_tag(scope); - let mut out = Vec::with_capacity(tags.len() + 1); - out.push(st.clone()); - for t in tags { - if t != &st { - out.push(t.clone()); - } - } - out -} - -/// Parse the value of a top-level YAML scalar field (e.g. `source_id`, -/// `tree_scope`, `tree_kind`) from a frontmatter string. Strips -/// surrounding double-quotes if present so the returned slice matches -/// what the original composer passed in. Returns `None` if the key is -/// not present at the top level of the frontmatter. -pub fn scan_fm_field<'a>(fm: &'a str, key: &str) -> Option { - let prefix = format!("{key}: "); - for raw in fm.lines() { - // Skip indented lines (those are list items / nested mappings). - if raw.starts_with(' ') || raw.starts_with('\t') { - continue; - } - if let Some(rest) = raw.strip_prefix(&prefix) { - let trimmed = rest.trim(); - if let Some(inner) = trimmed.strip_prefix('"').and_then(|s| s.strip_suffix('"')) { - return Some(inner.replace("\\\"", "\"").replace("\\\\", "\\")); - } - return Some(trimmed.to_string()); - } - } - None -} - -/// Compose the full file content (front-matter + body) for `chunk`. -/// -/// Returns `(full_file_bytes, body_bytes)`. The caller writes `full_file_bytes` -/// to disk; `body_bytes` is what the SHA-256 is computed over. -pub fn compose_chunk_file(chunk: &Chunk) -> (Vec, Vec) { - let front_matter = build_front_matter(chunk); - let body = chunk.content.as_bytes().to_vec(); - - let mut full = Vec::with_capacity(front_matter.len() + body.len()); - full.extend_from_slice(&front_matter); - full.extend_from_slice(&body); - - (full, body) -} - -/// Build the YAML front-matter block (including delimiters) as UTF-8 bytes. -fn build_front_matter(chunk: &Chunk) -> Vec { - let meta = &chunk.metadata; - let ts = meta.timestamp.to_rfc3339(); - let ts_start = meta.time_range.0.to_rfc3339(); - let ts_end = meta.time_range.1.to_rfc3339(); - - let mut fm = String::new(); - fm.push_str("---\n"); - fm.push_str(&format!("source_kind: {}\n", meta.source_kind.as_str())); - // Escape backslashes and quotes in source_id for safety. - fm.push_str(&format!("source_id: {}\n", yaml_scalar(&meta.source_id))); - if let Some(path_scope) = meta.path_scope.as_deref() { - fm.push_str(&format!("path_scope: {}\n", yaml_scalar(path_scope))); - } - fm.push_str(&format!("seq: {}\n", chunk.seq_in_source)); - fm.push_str(&format!("owner: {}\n", yaml_scalar(&meta.owner))); - fm.push_str(&format!("timestamp: {ts}\n")); - fm.push_str(&format!("time_range_start: {ts_start}\n")); - fm.push_str(&format!("time_range_end: {ts_end}\n")); - - if let Some(ref sr) = meta.source_ref { - fm.push_str(&format!("source_ref: {}\n", yaml_scalar(&sr.value))); - } - - // Always seed the source tag so the Obsidian graph filter can pick - // up `source/` for every chunk regardless of what the - // ingest-side tag list contained. - let source_scope = meta.path_scope.as_deref().unwrap_or(&meta.source_id); - log::debug!( - "[content_store::compose] seeding source tag source_id={} source_scope={} path_scope={}", - crate::openhuman::memory::util::redact::redact(&meta.source_id), - crate::openhuman::memory::util::redact::redact(source_scope), - meta.path_scope.is_some() - ); - let seeded_tags = with_source_tag(source_scope, &meta.tags); - fm.push_str("tags:\n"); - for tag in &seeded_tags { - fm.push_str(&format!(" - {}\n", yaml_scalar(tag))); - } - - // Email-specific fields: participants list + Obsidian alias. - // Parsed from source_id which is `gmail:{participants}` for Gmail-ingested - // chunks, where participants is `addr1|addr2|...` (sorted, deduped). - // If the format doesn't match, these fields are omitted. - if meta.source_kind == SourceKind::Email { - if let Some(addrs) = parse_gmail_participants_source_id(&meta.source_id) { - // participants: YAML list - fm.push_str("participants:\n"); - for addr in &addrs { - fm.push_str(&format!(" - {}\n", yaml_scalar(addr))); - } - // aliases: human-readable conversation label for Obsidian - let alias = build_participants_alias(&addrs, chunk.seq_in_source); - fm.push_str("aliases:\n"); - fm.push_str(&format!(" - {}\n", yaml_scalar(&alias))); - } - } - - fm.push_str("---\n"); - fm.into_bytes() -} - -/// Parse a `gmail:{participants}` source_id into the list of participant addresses. -/// -/// `participants` is `addr1|addr2|...` (sorted, deduped, pipe-separated). -/// Returns `Some(Vec)` when the source_id has exactly two -/// colon-separated segments (`gmail` prefix + non-empty participants). Returns -/// `None` for legacy or malformed source_ids. -fn parse_gmail_participants_source_id(source_id: &str) -> Option> { - let (prefix, participants) = source_id.split_once(':')?; - if prefix != "gmail" || participants.is_empty() { - return None; - } - let addrs: Vec = participants - .split('|') - .map(|s| s.trim().to_string()) - .filter(|s| !s.is_empty()) - .collect(); - if addrs.is_empty() { - None - } else { - Some(addrs) - } -} - -/// Build a human-readable alias for an email chunk suitable for Obsidian's -/// `aliases:` field. -/// -/// For two participants: `"alice@x.com <-> bob@y.com: chunk 0"` -/// For more than two: `"alice@x.com <-> 2 others: chunk 0"` -/// (where `alice@x.com` is the first in sorted order) -/// -/// The alias is kept under ~80 characters to avoid YAML rendering issues. -fn build_participants_alias(addrs: &[String], seq: u32) -> String { - let label = match addrs { - [] => "unknown".to_string(), - [only] => only.clone(), - [first, second] => format!("{} <-> {}", first, second), - [first, rest @ ..] => format!("{} <-> {} others", first, rest.len()), - }; - format!("{}: chunk {}", label, seq) -} - -/// Rewrite the `tags:` block in an existing file's front-matter, replacing it -/// with the new tag list while leaving the body unchanged. -/// -/// Returns the new full file bytes. Errors if the front-matter delimiters -/// cannot be found. -pub fn rewrite_tags(file_bytes: &[u8], new_tags: &[String]) -> Result, String> { - let content = - std::str::from_utf8(file_bytes).map_err(|e| format!("file is not valid UTF-8: {e}"))?; - - let (front_matter, body) = split_front_matter(content) - .ok_or_else(|| "cannot find front-matter delimiters".to_string())?; - - // Rewrite tags: block in the front-matter string. - let new_fm = replace_tags_in_front_matter(front_matter, new_tags)?; - - let mut out = Vec::with_capacity(new_fm.len() + body.len() + 4); - out.extend_from_slice(new_fm.as_bytes()); - out.extend_from_slice(body.as_bytes()); - Ok(out) -} - -/// Replace the `tags:` stanza in a front-matter string. Returns the new -/// front-matter string (delimiters preserved). -fn replace_tags_in_front_matter(fm: &str, new_tags: &[String]) -> Result { - // Build the replacement block. - let replacement = if new_tags.is_empty() { - "tags: []".to_string() - } else { - let mut s = "tags:".to_string(); - for tag in new_tags { - s.push('\n'); - s.push_str(&format!(" - {}", yaml_scalar(tag))); - } - s - }; - - // Locate the `tags:` key and consume through the block. - let lines: Vec<&str> = fm.lines().collect(); - let mut out_lines: Vec<&str> = Vec::new(); - let mut i = 0; - let mut found = false; - - while i < lines.len() { - let line = lines[i]; - if line == "tags: []" || line == "tags:" { - found = true; - // Skip all subsequent lines that are tag list items (start with ` - `). - // The replacement will be inserted wholesale. - i += 1; - if line == "tags:" { - while i < lines.len() && lines[i].starts_with(" - ") { - i += 1; - } - } - // We've consumed the old block; we'll append replacement after the loop. - continue; - } - out_lines.push(line); - i += 1; - } - - if !found { - return Err("tags: key not found in front-matter".to_string()); - } - - // Rebuild: all non-tag lines + replacement + closing `---`. - // Front-matter was: `---\n...\ntags: ...\n---\n` - // After loop, out_lines has everything except the tags block. - // Insert replacement before the closing `---`. - let closing = out_lines - .iter() - .rposition(|l| *l == "---") - .unwrap_or(out_lines.len()); - - let mut result_lines: Vec = - out_lines[..closing].iter().map(|l| l.to_string()).collect(); - result_lines.push(replacement); - result_lines.push("---".to_string()); - - let mut result = result_lines.join("\n"); - result.push('\n'); - Ok(result) -} - -// ── Summary composition ────────────────────────────────────────────────────── - -/// Input data required to compose a summary `.md` file. -pub struct SummaryComposeInput<'a> { - /// Stable id of the summary node (also used to derive the filename). - pub summary_id: &'a str, - /// Which tree (source / global / topic) this summary belongs to. - pub tree_kind: SummaryTreeKind, - /// Owning tree id (FK into `mem_tree_trees`). - pub tree_id: &'a str, - /// Raw tree scope string, e.g. `"gmail:alice@x.com|bob@y.com"` or `"global"`. - pub tree_scope: &'a str, - /// Level in the tree (L0 = leaves, L1+ = summaries). - pub level: u32, - /// Child ids (chunk_ids at L0 → L1, summary_ids for cascades). - pub child_ids: &'a [String], - /// Optional per-child wikilink basename overrides, aligned with - /// `child_ids` by index. When `Some(basename)` is provided for a - /// child, the front-matter `children: [[…]]` wikilink uses that - /// basename instead of `sanitize_filename(child_id)`. - /// - /// Used to point chunk-level children at their **raw archive** - /// files when the chunk store no longer stages on-disk `.md` - /// files (today: email, since email chunks live as byte ranges - /// inside `raw//_.md` instead of - /// `email//.md`). Without this, Obsidian - /// wikilinks resolve to a non-existent `[[]]` - /// target and the graph view stops drawing edges from L1 - /// summaries down to leaves. - /// - /// `None` (or `Some` entries that are themselves `None`) falls - /// back to the default `sanitize_filename(child_id)` behaviour, - /// which is correct for L≥2 (children are summary ids that map - /// to actual `summaries/...md` files) and for legacy chunks - /// still staged on-disk. - pub child_basenames: Option<&'a [Option]>, - /// Total child count (== child_ids.len() unless truncated). - pub child_count: usize, - /// Start of the time range covered by this summary's children. - pub time_range_start: DateTime, - /// End of the time range covered by this summary's children. - pub time_range_end: DateTime, - /// When the buffer was sealed into this summary node. - pub sealed_at: DateTime, - /// Raw summariser output text — the body written to disk. - pub body: &'a str, -} - -/// The composed front-matter, body, and full file content for a summary. -/// -/// `body` is what the SHA-256 integrity hash is computed over. -pub struct ComposedSummary { - /// The YAML front-matter block (including `---` delimiters), UTF-8 string. - pub front_matter: String, - /// The body (summariser output), UTF-8 string. - pub body: String, - /// `front_matter + body` — what gets written to disk. - pub full: String, -} - -/// Compose the full `.md` content for a summary node. -/// -/// Returns a [`ComposedSummary`] whose `full` field is written to disk. -/// SHA-256 is computed over `body` bytes only, not `full`. -pub fn compose_summary_md(record: &SummaryComposeInput<'_>) -> ComposedSummary { - let fm = build_summary_front_matter(record); - let body = record.body.to_string(); - let full = format!("{}{}", fm, body); - ComposedSummary { - front_matter: fm, - body, - full, - } -} - -/// Build the YAML front-matter block for a summary node. -fn build_summary_front_matter(r: &SummaryComposeInput<'_>) -> String { - let tree_kind_str = match r.tree_kind { - SummaryTreeKind::Source => "source", - SummaryTreeKind::Global => "global", - SummaryTreeKind::Topic => "topic", - }; - - let trs = r.time_range_start.to_rfc3339(); - let tre = r.time_range_end.to_rfc3339(); - let sealed = r.sealed_at.to_rfc3339(); - - let mut fm = String::new(); - fm.push_str("---\n"); - fm.push_str(&format!("id: {}\n", yaml_scalar(r.summary_id))); - fm.push_str("kind: summary\n"); - fm.push_str(&format!("tree_kind: {tree_kind_str}\n")); - fm.push_str(&format!("tree_id: {}\n", yaml_scalar(r.tree_id))); - fm.push_str(&format!("tree_scope: {}\n", yaml_scalar(r.tree_scope))); - fm.push_str(&format!("level: {}\n", r.level)); - - // children: YAML list of Obsidian wikilinks (`[[]]`) so the - // graph view draws summary→child edges. The wikilink target must match - // the actual file basename — for chunks that's the raw chunk_id (a SHA - // hash with no illegal chars), but for child summaries the structured id - // `summary:L:UUID` is sanitised to `summary-L-UUID` by - // `summary_rel_path` (colons are illegal on Windows NTFS). We apply the - // same sanitisation here so the link resolves. `yaml_scalar` auto-quotes - // because of the leading `[`, emitting `"[[]]"`. - if r.child_ids.is_empty() { - fm.push_str("children: []\n"); - } else { - fm.push_str("children:\n"); - for (i, id) in r.child_ids.iter().enumerate() { - // Prefer a caller-supplied basename override (used for L1 - // chunk children that live in the raw archive instead of - // the chunk-store path); fall back to the sanitised - // chunk/summary id. - let basename: String = match r - .child_basenames - .and_then(|overrides| overrides.get(i)) - .and_then(|slot| slot.as_ref()) - { - Some(b) => b.clone(), - None => summary_filename(id), - }; - let wikilink = format!("[[{}]]", basename); - fm.push_str(&format!(" - {}\n", yaml_scalar(&wikilink))); - } - } - fm.push_str(&format!("child_count: {}\n", r.child_count)); - fm.push_str(&format!("time_range_start: {trs}\n")); - fm.push_str(&format!("time_range_end: {tre}\n")); - fm.push_str(&format!("sealed_at: {sealed}\n")); - fm.push_str(&format!( - "openhuman_core_version: {}\n", - yaml_scalar(OPENHUMAN_CORE_VERSION) - )); - fm.push_str(&format!( - "memory_artifact_format: {}\n", - MEMORY_ARTIFACT_FORMAT - )); - - // aliases: human-readable title - let alias = build_summary_alias(r); - fm.push_str("aliases:\n"); - fm.push_str(&format!(" - {}\n", yaml_scalar(&alias))); - - // Source-tree summaries get a `source/` seed tag for graph - // filtering. Global / topic trees aggregate across sources, so the - // `source/...` tag has no single value there — leave them untagged - // at compose time (LLM extraction adds entity tags later). - if matches!(r.tree_kind, SummaryTreeKind::Source) { - fm.push_str("tags:\n"); - fm.push_str(&format!(" - {}\n", yaml_scalar(&source_tag(r.tree_scope)))); - } else { - fm.push_str("tags: []\n"); - } - fm.push_str("---\n"); - fm -} - -/// Build a human-readable alias for the summary's `aliases:` front-matter field. -fn build_summary_alias(r: &SummaryComposeInput<'_>) -> String { - let date_range = format_date_range(r.time_range_start, r.time_range_end); - match r.tree_kind { - SummaryTreeKind::Source => { - let scope_short = scope_short_label(r.tree_scope); - format!( - "L{} \u{00b7} {} \u{00b7} {} children \u{00b7} {}", - r.level, scope_short, r.child_count, date_range - ) - } - SummaryTreeKind::Global => { - format!( - "L{} \u{00b7} global digest \u{00b7} {}", - r.level, date_range - ) - } - SummaryTreeKind::Topic => { - // Strip protocol prefix like "topic:" from scope for readability. - let entity = r - .tree_scope - .split_once(':') - .map(|(_, v)| v) - .unwrap_or(r.tree_scope); - format!( - "L{} \u{00b7} topic {} \u{00b7} {} children", - r.level, entity, r.child_count - ) - } - } -} - -/// Format the date range as `"yyyy-mm-dd"` (if start == end date) or -/// `"yyyy-mm-dd–yyyy-mm-dd"`. -fn format_date_range(start: DateTime, end: DateTime) -> String { - let s = start.format("%Y-%m-%d").to_string(); - let e = end.format("%Y-%m-%d").to_string(); - if s == e { - s - } else { - format!("{s}\u{2013}{e}") // en dash - } -} - -/// Build a short human-readable label for the tree scope used in aliases. -/// -/// For Gmail source scopes like `"gmail:alice@x.com|bob@y.com"`: -/// - 2 participants → `"alice@x.com ↔ bob@y.com"` -/// - N > 2 → `"alice@x.com + N-1 others"` -/// - Otherwise → the raw scope (e.g. `"slack:#eng"`) -fn scope_short_label(scope: &str) -> String { - if let Some((prefix, participants)) = scope.split_once(':') { - if prefix == "gmail" && !participants.is_empty() { - let addrs: Vec<&str> = participants.split('|').collect(); - return match addrs.as_slice() { - [] => scope.to_string(), - [only] => only.to_string(), - [first, second] => format!("{} \u{2194} {}", first, second), // ↔ - [first, rest @ ..] => format!("{} + {} others", first, rest.len()), - }; - } - } - scope.to_string() -} - -/// Rewrite the `tags:` block in a summary file's front-matter, replacing it -/// with the new tag list while leaving the body unchanged. -/// -/// Reuses the generic [`rewrite_tags`] function — the front-matter structure -/// is identical for both chunk and summary `.md` files. -pub fn rewrite_summary_tags(file_bytes: &[u8], new_tags: &[String]) -> Result, String> { - let rewritten = rewrite_tags(file_bytes, new_tags)?; - let content = - std::str::from_utf8(&rewritten).map_err(|e| format!("file is not valid UTF-8: {e}"))?; - let (front_matter, body) = split_front_matter(content) - .ok_or_else(|| "cannot find front-matter delimiters".to_string())?; - let front_matter = upsert_summary_provenance(front_matter); - - let mut out = Vec::with_capacity(front_matter.len() + body.len()); - out.extend_from_slice(front_matter.as_bytes()); - out.extend_from_slice(body.as_bytes()); - Ok(out) -} - -fn upsert_summary_provenance(front_matter: &str) -> String { - let mut lines: Vec = Vec::new(); - let mut inserted = false; - - for raw in front_matter.lines() { - if raw.starts_with("openhuman_core_version: ") - || raw.starts_with("memory_artifact_format: ") - { - continue; - } - if !inserted && raw == "aliases:" { - lines.push(format!( - "openhuman_core_version: {}", - yaml_scalar(OPENHUMAN_CORE_VERSION) - )); - lines.push(format!( - "memory_artifact_format: {}", - MEMORY_ARTIFACT_FORMAT - )); - inserted = true; - } - lines.push(raw.to_string()); - } - - if !inserted { - let insert_at = lines - .iter() - .rposition(|line| line == "---") - .unwrap_or(lines.len()); - lines.insert( - insert_at, - format!( - "openhuman_core_version: {}", - yaml_scalar(OPENHUMAN_CORE_VERSION) - ), - ); - lines.insert( - insert_at + 1, - format!("memory_artifact_format: {}", MEMORY_ARTIFACT_FORMAT), - ); - } - - let mut result = lines.join("\n"); - result.push('\n'); - result -} - -/// Split a file into `(front_matter, body)` at the second `---` delimiter. -/// -/// Returns `None` if the file does not have the expected `---\n...\n---\n` form. -pub fn split_front_matter(content: &str) -> Option<(&str, &str)> { - // The file must start with `---\n`. - if !content.starts_with("---\n") { - return None; - } - // Find the closing `---` line (must be `---` alone on a line after the first line). - let rest = &content[4..]; // skip the opening `---\n` - let close_idx = rest.find("\n---\n").or_else(|| { - // Could be at the very end (no body). - rest.strip_suffix("\n---").map(|r| r.len()) - })?; - let fm_end = 4 + close_idx + 5; // include `\n---\n` - debug_assert!(content.is_char_boundary(fm_end)); - Some((&content[..fm_end], &content[fm_end..])) -} - -/// Format a string as an unquoted YAML scalar when safe, or as a -/// double-quoted string when it contains special characters. -/// -/// We conservatively quote strings containing `:`, `#`, `[`, `]`, `{`, `}`, -/// `"`, `'`, `\`, leading/trailing whitespace, or that start with special -/// YAML indicator characters. -fn yaml_scalar(s: &str) -> String { - let needs_quoting = s.is_empty() - || s.trim() != s - || s.starts_with(|c: char| { - matches!( - c, - '&' | '*' | '?' | '|' | '-' | '<' | '>' | '=' | '!' | '%' | '@' | '`' - ) - }) - || s.contains([':', '#', '[', ']', '{', '}', '"', '\'']); - - if needs_quoting { - let escaped = s.replace('\\', "\\\\").replace('"', "\\\""); - format!("\"{escaped}\"") - } else { - s.to_string() - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::openhuman::memory_store::chunks::types::{Metadata, SourceKind, SourceRef}; - use crate::openhuman::memory_store::content::paths::SummaryTreeKind; - use chrono::TimeZone; - - fn sample_chunk() -> Chunk { - let ts = chrono::Utc.timestamp_millis_opt(1_700_000_000_000).unwrap(); - Chunk { - id: "abc123".into(), - content: "## 2026-01-01T00:00:00Z — alice\nhello world".into(), - metadata: Metadata { - source_kind: SourceKind::Chat, - source_id: "slack:#eng".into(), - owner: "alice@example.com".into(), - timestamp: ts, - time_range: (ts, ts), - tags: vec!["person/Alice".into(), "org/Acme".into()], - source_ref: Some(SourceRef::new("slack://m1".to_string())), - path_scope: None, - }, - token_count: 10, - seq_in_source: 0, - created_at: ts, - partial_message: false, - } - } - - #[test] - fn compose_produces_front_matter_and_body() { - let chunk = sample_chunk(); - let (full, body) = compose_chunk_file(&chunk); - let full_str = std::str::from_utf8(&full).unwrap(); - assert!(full_str.starts_with("---\n"), "must start with ---"); - assert!(full_str.contains("source_kind: chat")); - assert!(full_str.contains("source_id: \"slack:#eng\"")); - assert!(full_str.contains("seq: 0")); - assert!(full_str.contains("tags:")); - assert!(full_str.contains(" - person/Alice")); - assert!(full_str.ends_with("hello world")); - assert_eq!( - body, - b"## 2026-01-01T00:00:00Z \xe2\x80\x94 alice\nhello world" - ); - } - - #[test] - fn compose_persists_path_scope_and_seeds_scoped_source_tag() { - let mut chunk = sample_chunk(); - chunk.metadata.source_id = "notion:conn-1:page-123".into(); - chunk.metadata.path_scope = Some("notion:conn-1".into()); - - let (full, _) = compose_chunk_file(&chunk); - let full_str = std::str::from_utf8(&full).unwrap(); - - assert!(full_str.contains("source_id: \"notion:conn-1:page-123\"")); - assert!(full_str.contains("path_scope: \"notion:conn-1\"")); - assert!(full_str.contains(" - source/notion-conn-1")); - assert!(!full_str.contains(" - source/notion-conn-1-page-123")); - } - - #[test] - fn split_front_matter_round_trips() { - let chunk = sample_chunk(); - let (full, body) = compose_chunk_file(&chunk); - let full_str = std::str::from_utf8(&full).unwrap(); - let (fm, b) = split_front_matter(full_str).expect("split must succeed"); - assert!(fm.starts_with("---\n")); - assert!(fm.ends_with("---\n")); - assert_eq!(b.as_bytes(), body.as_slice()); - } - - #[test] - fn rewrite_tags_preserves_body() { - let chunk = sample_chunk(); - let (full, body) = compose_chunk_file(&chunk); - let new_tags = vec!["person/Bob".into(), "project/Phoenix".into()]; - let rewritten = rewrite_tags(&full, &new_tags).unwrap(); - let rewritten_str = std::str::from_utf8(&rewritten).unwrap(); - assert!(rewritten_str.contains(" - person/Bob")); - assert!(!rewritten_str.contains(" - person/Alice")); - // Body must be unchanged. - assert!(rewritten_str.ends_with(std::str::from_utf8(&body).unwrap())); - } - - #[test] - fn rewrite_tags_empty_list() { - let chunk = sample_chunk(); - let (full, _) = compose_chunk_file(&chunk); - let rewritten = rewrite_tags(&full, &[]).unwrap(); - let s = std::str::from_utf8(&rewritten).unwrap(); - assert!(s.contains("tags: []")); - assert!(!s.contains(" - person/")); - } - - #[test] - fn yaml_scalar_quotes_special_characters() { - assert_eq!(yaml_scalar("slack:#eng"), "\"slack:#eng\""); - assert_eq!(yaml_scalar("hello world"), "hello world"); - assert_eq!(yaml_scalar(""), "\"\""); - } - - fn sample_email_chunk() -> Chunk { - let ts = chrono::Utc.timestamp_millis_opt(1_700_000_000_000).unwrap(); - Chunk { - id: "emailchunk1".into(), - content: "---\nFrom: alice@example.com\nSubject: Hello\n\nHello there.".into(), - metadata: Metadata { - source_kind: SourceKind::Email, - source_id: "gmail:alice@example.com|bob@example.com".into(), - owner: "owner@example.com".into(), - timestamp: ts, - time_range: (ts, ts), - tags: vec!["gmail".into()], - source_ref: None, - path_scope: None, - }, - token_count: 15, - seq_in_source: 0, - created_at: ts, - partial_message: false, - } - } - - #[test] - fn email_chunk_has_participants_list_and_alias() { - let chunk = sample_email_chunk(); - let (full, _body) = compose_chunk_file(&chunk); - let full_str = std::str::from_utf8(&full).unwrap(); - // participants block must be a YAML list - assert!( - full_str.contains("participants:"), - "email chunk must have participants field; got:\n{full_str}" - ); - assert!( - full_str.contains(" - alice@example.com"), - "alice must appear as list item; got:\n{full_str}" - ); - assert!( - full_str.contains(" - bob@example.com"), - "bob must appear as list item; got:\n{full_str}" - ); - // aliases block must be present - assert!( - full_str.contains("aliases:"), - "email chunk must have aliases field; got:\n{full_str}" - ); - assert!( - full_str.contains("alice@example.com <-> bob@example.com: chunk 0"), - "alias must encode participants; got:\n{full_str}" - ); - // sender and thread_id must NOT appear - assert!( - !full_str.contains("sender:"), - "email chunk must NOT have sender field; got:\n{full_str}" - ); - assert!( - !full_str.contains("thread_id:"), - "email chunk must NOT have thread_id field; got:\n{full_str}" - ); - } - - #[test] - fn email_chunk_many_participants_alias_summarises() { - let ts = chrono::Utc.timestamp_millis_opt(1_700_000_000_000).unwrap(); - let chunk = Chunk { - id: "em2".into(), - content: "body".into(), - metadata: Metadata { - source_kind: SourceKind::Email, - source_id: "gmail:alice@x.com|bob@y.com|carol@z.com".into(), - owner: "owner".into(), - timestamp: ts, - time_range: (ts, ts), - tags: vec![], - source_ref: None, - path_scope: None, - }, - token_count: 1, - seq_in_source: 3, - created_at: ts, - partial_message: false, - }; - let (full, _) = compose_chunk_file(&chunk); - let full_str = std::str::from_utf8(&full).unwrap(); - assert!( - full_str.contains("participants:"), - "three-party chunk needs participants list; got:\n{full_str}" - ); - // With 3 participants: first + "2 others" - assert!( - full_str.contains("alice@x.com <-> 2 others: chunk 3"), - "alias with 3 participants must summarise; got:\n{full_str}" - ); - } - - #[test] - fn email_chunk_body_bytes_unchanged_by_extra_fields() { - // Adding participants/aliases to front-matter must not affect body_bytes - // (SHA-256 invariant: the hash is over body only, not front-matter). - let chunk = sample_email_chunk(); - let (full, body) = compose_chunk_file(&chunk); - let full_str = std::str::from_utf8(&full).unwrap(); - // Body must still appear at the end unmodified. - assert!( - full_str.ends_with(std::str::from_utf8(&body).unwrap()), - "body bytes must appear unmodified after front-matter" - ); - // body must equal chunk.content bytes - assert_eq!(body, chunk.content.as_bytes()); - } - - #[test] - fn chat_chunk_has_no_email_specific_fields() { - let chunk = sample_chunk(); // source_kind = Chat - let (full, _) = compose_chunk_file(&chunk); - let full_str = std::str::from_utf8(&full).unwrap(); - assert!( - !full_str.contains("aliases:"), - "chat chunk must not have aliases field" - ); - assert!( - !full_str.contains("participants:"), - "chat chunk must not have participants field" - ); - assert!( - !full_str.contains("sender:"), - "chat chunk must not have sender field" - ); - assert!( - !full_str.contains("thread_id:"), - "chat chunk must not have thread_id field" - ); - } - - #[test] - fn email_chunk_with_malformed_source_id_omits_extra_fields() { - let ts = chrono::Utc.timestamp_millis_opt(1_700_000_000_000).unwrap(); - let chunk = Chunk { - id: "xyz".into(), - content: "body".into(), - metadata: Metadata { - source_kind: SourceKind::Email, - source_id: "legacysourceid".into(), // no `gmail:` prefix → parse fails - owner: "owner".into(), - timestamp: ts, - time_range: (ts, ts), - tags: vec![], - source_ref: None, - path_scope: None, - }, - token_count: 1, - seq_in_source: 0, - created_at: ts, - partial_message: false, - }; - let (full, _) = compose_chunk_file(&chunk); - let full_str = std::str::from_utf8(&full).unwrap(); - // Malformed source_id → no email extras, no panic. - assert!(!full_str.contains("aliases:")); - assert!(!full_str.contains("participants:")); - assert!(!full_str.contains("sender:")); - } - - // ─── summary compose tests ──────────────────────────────────────────────── - - fn sample_summary_input( - tree_kind: SummaryTreeKind, - scope: &str, - level: u32, - ) -> SummaryComposeInput<'static> { - let ts_start = chrono::Utc.timestamp_millis_opt(1_700_000_000_000).unwrap(); - let ts_end = chrono::Utc.timestamp_millis_opt(1_700_086_400_000).unwrap(); - let sealed = chrono::Utc.timestamp_millis_opt(1_700_090_000_000).unwrap(); - // Leak the strings so they have 'static lifetime for this test helper. - // Only used in tests, not production code. - let scope: &'static str = Box::leak(scope.to_string().into_boxed_str()); - SummaryComposeInput { - summary_id: "summary:L1:abc", - tree_kind, - tree_id: "tree-id-001", - tree_scope: scope, - level, - child_ids: Box::leak( - vec!["child-1".to_string(), "child-2".to_string()].into_boxed_slice(), - ), - child_basenames: None, - child_count: 2, - time_range_start: ts_start, - time_range_end: ts_end, - sealed_at: sealed, - body: "This is the summariser output.\n", - } - } - - #[test] - fn compose_source_summary_has_required_front_matter() { - let input = sample_summary_input(SummaryTreeKind::Source, "gmail:alice@x.com|bob@y.com", 1); - let composed = compose_summary_md(&input); - let fm = &composed.front_matter; - assert!(fm.starts_with("---\n"), "front-matter must start with ---"); - assert!(fm.ends_with("---\n"), "front-matter must end with ---\\n"); - assert!(fm.contains("kind: summary"), "must have kind: summary"); - assert!( - fm.contains("tree_kind: source"), - "must have tree_kind: source" - ); - assert!(fm.contains("level: 1"), "must have level"); - assert!(fm.contains("child_count: 2"), "must have child_count"); - assert!( - fm.contains(&format!( - "openhuman_core_version: {}", - OPENHUMAN_CORE_VERSION - )), - "must stamp the core version" - ); - assert!( - fm.contains(&format!( - "memory_artifact_format: {}", - MEMORY_ARTIFACT_FORMAT - )), - "must stamp the artifact format epoch" - ); - assert!( - fm.contains(" - \"[[child-1]]\""), - "must list child ids as Obsidian wikilinks; got:\n{fm}" - ); - assert!( - fm.contains(" - \"[[child-2]]\""), - "must list child ids as Obsidian wikilinks; got:\n{fm}" - ); - assert!( - fm.contains(" - source/"), - "source-tree summary must seed source tag; got:\n{fm}" - ); - // aliases must mention the scope - assert!(fm.contains("aliases:"), "must have aliases"); - assert!( - composed.body == "This is the summariser output.\n", - "body must be the summariser text" - ); - assert!(composed.full.ends_with("This is the summariser output.\n")); - } - - #[test] - fn children_are_emitted_as_obsidian_wikilinks() { - // Contract: every entry in `children:` must be wrapped in `[[…]]` so - // Obsidian's graph view draws a summary→child edge. The YAML scalar is - // quoted because of the leading `[` — both forms below are required. - let input = sample_summary_input(SummaryTreeKind::Source, "gmail:alice@x.com", 1); - let composed = compose_summary_md(&input); - let fm = &composed.front_matter; - for id in ["child-1", "child-2"] { - let expected = format!(" - \"[[{id}]]\""); - assert!( - fm.contains(&expected), - "child id {id} must be emitted as a quoted wikilink ({expected}); got:\n{fm}" - ); - // Belt-and-braces: the bare id must NOT appear as a plain scalar - // (i.e. unwrapped). The wikilink form contains the id, so we - // search for the bare list-item form. - let plain = format!(" - {id}\n"); - assert!( - !fm.contains(&plain), - "child id {id} must not be emitted as a plain scalar; got:\n{fm}" - ); - } - } - - #[test] - fn child_basename_overrides_replace_chunk_id_in_wikilink() { - // L1 seals: each child's wikilink should point at the - // raw archive file basename, not the chunk_id hash. Without - // this override the link would be `[[<32-char hex>]]` and - // Obsidian wouldn't find a matching file (the chunk-store - // copy under `email//...` is gone after the - // raw_refs migration). - let ts = chrono::Utc.timestamp_millis_opt(1_700_000_000_000).unwrap(); - let child_ids = vec!["abc123hash".to_string(), "def456hash".to_string()]; - let overrides: Vec> = vec![ - Some("1700000000000_msg-id-1".into()), - None, // second child has no override → falls back to sanitize_filename - ]; - let input = SummaryComposeInput { - summary_id: "summary:L1:test", - tree_kind: SummaryTreeKind::Source, - tree_id: "t1", - tree_scope: "gmail:alice@x.com", - level: 1, - child_ids: &child_ids, - child_basenames: Some(&overrides), - child_count: 2, - time_range_start: ts, - time_range_end: ts, - sealed_at: ts, - body: "L1 body", - }; - let composed = compose_summary_md(&input); - let fm = &composed.front_matter; - // First child uses the override (raw archive basename). - assert!( - fm.contains(r#" - "[[1700000000000_msg-id-1]]""#), - "first child must use override basename; got:\n{fm}" - ); - // Second child has None override — fall back to chunk_id. - assert!( - fm.contains(r#" - "[[def456hash]]""#), - "None override must fall back to sanitize_filename; got:\n{fm}" - ); - } - - #[test] - fn structured_child_summary_id_is_sanitised_in_wikilink() { - // Real-world case: an L2 summary lists child L1 summaries by their - // structured id (e.g. `summary:L1:UUID`). Colons are illegal in - // Windows NTFS filenames, so `summary_rel_path` writes the file as - // `summary-L1-UUID.md`. The wikilink target must match that basename - // — i.e. colons must be converted to dashes — otherwise Obsidian - // cannot resolve the link and the graph stays disconnected. - let ts = chrono::Utc.timestamp_millis_opt(1_700_000_000_000).unwrap(); - let child_id = "summary:L1:b9fa5f08-bf79-41a7-a5c8-2d87883d5c01"; - let expected_basename = "summary-L1-b9fa5f08-bf79-41a7-a5c8-2d87883d5c01"; - let input = SummaryComposeInput { - summary_id: "summary:L2:cc9a1224", - tree_kind: SummaryTreeKind::Source, - tree_id: "t1", - tree_scope: "gmail:alice@x.com", - level: 2, - child_ids: &[child_id.to_string()], - child_basenames: None, - child_count: 1, - time_range_start: ts, - time_range_end: ts, - sealed_at: ts, - body: "L2 body", - }; - let composed = compose_summary_md(&input); - let fm = &composed.front_matter; - let expected = format!(" - \"[[{expected_basename}]]\""); - assert!( - fm.contains(&expected), - "structured child id must be sanitised to filename basename in wikilink; \ - expected line: {expected}; got:\n{fm}" - ); - // Raw colon-bearing id must NOT appear inside `[[…]]` — that wikilink - // would not resolve in Obsidian. - assert!( - !fm.contains(&format!("[[{child_id}]]")), - "raw structured id with colons must not appear inside wikilink; got:\n{fm}" - ); - } - - #[test] - fn compose_global_summary_alias_format() { - let input = sample_summary_input(SummaryTreeKind::Global, "global", 0); - let composed = compose_summary_md(&input); - assert!( - composed.front_matter.contains("tree_kind: global"), - "must have tree_kind: global" - ); - assert!( - composed.front_matter.contains("global digest"), - "alias must mention 'global digest'" - ); - } - - #[test] - fn compose_topic_summary_alias_format() { - let input = sample_summary_input(SummaryTreeKind::Topic, "person:alex-johnson", 1); - let composed = compose_summary_md(&input); - assert!( - composed.front_matter.contains("tree_kind: topic"), - "must have tree_kind: topic" - ); - assert!( - composed.front_matter.contains("topic"), - "alias must mention topic entity" - ); - } - - #[test] - fn compose_summary_with_zero_children() { - let ts = chrono::Utc.timestamp_millis_opt(1_700_000_000_000).unwrap(); - let input = SummaryComposeInput { - summary_id: "summary:L0:empty", - tree_kind: SummaryTreeKind::Source, - tree_id: "t1", - tree_scope: "gmail:alice@x.com", - level: 0, - child_ids: &[], - child_basenames: None, - child_count: 0, - time_range_start: ts, - time_range_end: ts, - sealed_at: ts, - body: "empty", - }; - let composed = compose_summary_md(&input); - assert!(composed.front_matter.contains("children: []")); - assert!(composed.front_matter.contains("child_count: 0")); - } - - #[test] - fn compose_summary_same_start_end_date_single_date_alias() { - let ts = chrono::Utc.timestamp_millis_opt(1_700_000_000_000).unwrap(); - let input = SummaryComposeInput { - summary_id: "summary:L1:sameday", - tree_kind: SummaryTreeKind::Global, - tree_id: "t1", - tree_scope: "global", - level: 1, - child_ids: &["child-a".to_string()], - child_basenames: None, - child_count: 1, - time_range_start: ts, - time_range_end: ts, // same as start - sealed_at: ts, - body: "day recap", - }; - let composed = compose_summary_md(&input); - // Alias must contain just one date, not "date–date" - let alias_line = composed - .front_matter - .lines() - .find(|l| l.contains("L1") && l.contains("global digest")) - .expect("alias line must be present"); - // The date should appear exactly once (no en-dash range) - let date_str = ts.format("%Y-%m-%d").to_string(); - assert!( - alias_line.contains(&date_str), - "alias must contain the date; got: {alias_line}" - ); - // Must not contain an en-dash (range indicator) - assert!( - !alias_line.contains('\u{2013}'), - "same-day alias must not have en-dash range; got: {alias_line}" - ); - } - - #[test] - fn scope_short_label_two_participants() { - let label = scope_short_label("gmail:alice@x.com|bob@y.com"); - assert_eq!(label, "alice@x.com \u{2194} bob@y.com"); - } - - #[test] - fn scope_short_label_many_participants() { - let label = scope_short_label("gmail:alice@x.com|bob@y.com|carol@z.com"); - assert_eq!(label, "alice@x.com + 2 others"); - } - - #[test] - fn scope_short_label_non_gmail_returns_raw() { - let label = scope_short_label("slack:#general"); - assert_eq!(label, "slack:#general"); - } - - #[test] - fn rewrite_summary_tags_delegates_to_rewrite_tags() { - // compose a summary, then rewrite its tags — body must stay unchanged. - let ts = chrono::Utc.timestamp_millis_opt(1_700_000_000_000).unwrap(); - let input = SummaryComposeInput { - summary_id: "sum:L1:rwttest", - tree_kind: SummaryTreeKind::Source, - tree_id: "t1", - tree_scope: "gmail:alice@x.com", - level: 1, - child_ids: &["c1".to_string()], - child_basenames: None, - child_count: 1, - time_range_start: ts, - time_range_end: ts, - sealed_at: ts, - body: "summary body text", - }; - let composed = compose_summary_md(&input); - let file_bytes = composed.full.as_bytes(); - let new_tags = vec!["person/Alice-Smith".to_string(), "topic/Memory".to_string()]; - let rewritten = rewrite_summary_tags(file_bytes, &new_tags).unwrap(); - let rewritten_str = std::str::from_utf8(&rewritten).unwrap(); - assert!(rewritten_str.contains(" - person/Alice-Smith")); - assert!(rewritten_str.contains(" - topic/Memory")); - assert!(!rewritten_str.contains("tags: []")); - assert!(rewritten_str.contains(&format!( - "openhuman_core_version: {}", - OPENHUMAN_CORE_VERSION - ))); - assert!(rewritten_str.contains(&format!( - "memory_artifact_format: {}", - MEMORY_ARTIFACT_FORMAT - ))); - // Body must be unchanged - assert!(rewritten_str.ends_with("summary body text")); - } - - #[test] - fn rewrite_summary_tags_backfills_missing_provenance() { - let file = - b"---\nid: legacy\nkind: summary\ntags: []\naliases:\n - legacy\n---\nlegacy body"; - let rewritten = rewrite_summary_tags(file, &["person/Alice".to_string()]).unwrap(); - let rewritten_str = std::str::from_utf8(&rewritten).unwrap(); - assert!(rewritten_str.contains(&format!( - "openhuman_core_version: {}", - OPENHUMAN_CORE_VERSION - ))); - assert!(rewritten_str.contains(&format!( - "memory_artifact_format: {}", - MEMORY_ARTIFACT_FORMAT - ))); - assert!(rewritten_str.ends_with("legacy body")); - } -} diff --git a/src/openhuman/memory_store/content/compose/chunk.rs b/src/openhuman/memory_store/content/compose/chunk.rs new file mode 100644 index 0000000000..790df59897 --- /dev/null +++ b/src/openhuman/memory_store/content/compose/chunk.rs @@ -0,0 +1,209 @@ +//! Chunk `.md` file composition and tag rewriting. + +use crate::openhuman::memory_store::chunks::types::{Chunk, SourceKind}; +use crate::openhuman::memory_store::content::compose::yaml::{ + split_front_matter, with_source_tag, yaml_scalar, +}; + +/// Compose the full file content (front-matter + body) for `chunk`. +/// +/// Returns `(full_file_bytes, body_bytes)`. The caller writes `full_file_bytes` +/// to disk; `body_bytes` is what the SHA-256 is computed over. +pub fn compose_chunk_file(chunk: &Chunk) -> (Vec, Vec) { + let front_matter = build_front_matter(chunk); + let body = chunk.content.as_bytes().to_vec(); + + let mut full = Vec::with_capacity(front_matter.len() + body.len()); + full.extend_from_slice(&front_matter); + full.extend_from_slice(&body); + + (full, body) +} + +/// Build the YAML front-matter block (including delimiters) as UTF-8 bytes. +fn build_front_matter(chunk: &Chunk) -> Vec { + let meta = &chunk.metadata; + let ts = meta.timestamp.to_rfc3339(); + let ts_start = meta.time_range.0.to_rfc3339(); + let ts_end = meta.time_range.1.to_rfc3339(); + + let mut fm = String::new(); + fm.push_str("---\n"); + fm.push_str(&format!("source_kind: {}\n", meta.source_kind.as_str())); + // Escape backslashes and quotes in source_id for safety. + fm.push_str(&format!("source_id: {}\n", yaml_scalar(&meta.source_id))); + if let Some(path_scope) = meta.path_scope.as_deref() { + fm.push_str(&format!("path_scope: {}\n", yaml_scalar(path_scope))); + } + fm.push_str(&format!("seq: {}\n", chunk.seq_in_source)); + fm.push_str(&format!("owner: {}\n", yaml_scalar(&meta.owner))); + fm.push_str(&format!("timestamp: {ts}\n")); + fm.push_str(&format!("time_range_start: {ts_start}\n")); + fm.push_str(&format!("time_range_end: {ts_end}\n")); + + if let Some(ref sr) = meta.source_ref { + fm.push_str(&format!("source_ref: {}\n", yaml_scalar(&sr.value))); + } + + // Always seed the source tag so the Obsidian graph filter can pick + // up `source/` for every chunk regardless of what the + // ingest-side tag list contained. + let source_scope = meta.path_scope.as_deref().unwrap_or(&meta.source_id); + log::debug!( + "[content_store::compose] seeding source tag source_id={} source_scope={} path_scope={}", + crate::openhuman::memory::util::redact::redact(&meta.source_id), + crate::openhuman::memory::util::redact::redact(source_scope), + meta.path_scope.is_some() + ); + let seeded_tags = with_source_tag(source_scope, &meta.tags); + fm.push_str("tags:\n"); + for tag in &seeded_tags { + fm.push_str(&format!(" - {}\n", yaml_scalar(tag))); + } + + // Email-specific fields: participants list + Obsidian alias. + // Parsed from source_id which is `gmail:{participants}` for Gmail-ingested + // chunks, where participants is `addr1|addr2|...` (sorted, deduped). + // If the format doesn't match, these fields are omitted. + if meta.source_kind == SourceKind::Email { + if let Some(addrs) = parse_gmail_participants_source_id(&meta.source_id) { + // participants: YAML list + fm.push_str("participants:\n"); + for addr in &addrs { + fm.push_str(&format!(" - {}\n", yaml_scalar(addr))); + } + // aliases: human-readable conversation label for Obsidian + let alias = build_participants_alias(&addrs, chunk.seq_in_source); + fm.push_str("aliases:\n"); + fm.push_str(&format!(" - {}\n", yaml_scalar(&alias))); + } + } + + fm.push_str("---\n"); + fm.into_bytes() +} + +/// Parse a `gmail:{participants}` source_id into the list of participant addresses. +/// +/// `participants` is `addr1|addr2|...` (sorted, deduped, pipe-separated). +/// Returns `Some(Vec)` when the source_id has exactly two +/// colon-separated segments (`gmail` prefix + non-empty participants). Returns +/// `None` for legacy or malformed source_ids. +fn parse_gmail_participants_source_id(source_id: &str) -> Option> { + let (prefix, participants) = source_id.split_once(':')?; + if prefix != "gmail" || participants.is_empty() { + return None; + } + let addrs: Vec = participants + .split('|') + .map(|s| s.trim().to_string()) + .filter(|s| !s.is_empty()) + .collect(); + if addrs.is_empty() { + None + } else { + Some(addrs) + } +} + +/// Build a human-readable alias for an email chunk suitable for Obsidian's +/// `aliases:` field. +/// +/// For two participants: `"alice@x.com <-> bob@y.com: chunk 0"` +/// For more than two: `"alice@x.com <-> 2 others: chunk 0"` +/// (where `alice@x.com` is the first in sorted order) +/// +/// The alias is kept under ~80 characters to avoid YAML rendering issues. +fn build_participants_alias(addrs: &[String], seq: u32) -> String { + let label = match addrs { + [] => "unknown".to_string(), + [only] => only.clone(), + [first, second] => format!("{} <-> {}", first, second), + [first, rest @ ..] => format!("{} <-> {} others", first, rest.len()), + }; + format!("{}: chunk {}", label, seq) +} + +/// Rewrite the `tags:` block in an existing file's front-matter, replacing it +/// with the new tag list while leaving the body unchanged. +/// +/// Returns the new full file bytes. Errors if the front-matter delimiters +/// cannot be found. +pub fn rewrite_tags(file_bytes: &[u8], new_tags: &[String]) -> Result, String> { + let content = + std::str::from_utf8(file_bytes).map_err(|e| format!("file is not valid UTF-8: {e}"))?; + + let (front_matter, body) = split_front_matter(content) + .ok_or_else(|| "cannot find front-matter delimiters".to_string())?; + + // Rewrite tags: block in the front-matter string. + let new_fm = replace_tags_in_front_matter(front_matter, new_tags)?; + + let mut out = Vec::with_capacity(new_fm.len() + body.len() + 4); + out.extend_from_slice(new_fm.as_bytes()); + out.extend_from_slice(body.as_bytes()); + Ok(out) +} + +/// Replace the `tags:` stanza in a front-matter string. Returns the new +/// front-matter string (delimiters preserved). +fn replace_tags_in_front_matter(fm: &str, new_tags: &[String]) -> Result { + // Build the replacement block. + let replacement = if new_tags.is_empty() { + "tags: []".to_string() + } else { + let mut s = "tags:".to_string(); + for tag in new_tags { + s.push('\n'); + s.push_str(&format!(" - {}", yaml_scalar(tag))); + } + s + }; + + // Locate the `tags:` key and consume through the block. + let lines: Vec<&str> = fm.lines().collect(); + let mut out_lines: Vec<&str> = Vec::new(); + let mut i = 0; + let mut found = false; + + while i < lines.len() { + let line = lines[i]; + if line == "tags: []" || line == "tags:" { + found = true; + // Skip all subsequent lines that are tag list items (start with ` - `). + // The replacement will be inserted wholesale. + i += 1; + if line == "tags:" { + while i < lines.len() && lines[i].starts_with(" - ") { + i += 1; + } + } + // We've consumed the old block; we'll append replacement after the loop. + continue; + } + out_lines.push(line); + i += 1; + } + + if !found { + return Err("tags: key not found in front-matter".to_string()); + } + + // Rebuild: all non-tag lines + replacement + closing `---`. + // Front-matter was: `---\n...\ntags: ...\n---\n` + // After loop, out_lines has everything except the tags block. + // Insert replacement before the closing `---`. + let closing = out_lines + .iter() + .rposition(|l| *l == "---") + .unwrap_or(out_lines.len()); + + let mut result_lines: Vec = + out_lines[..closing].iter().map(|l| l.to_string()).collect(); + result_lines.push(replacement); + result_lines.push("---".to_string()); + + let mut result = result_lines.join("\n"); + result.push('\n'); + Ok(result) +} diff --git a/src/openhuman/memory_store/content/compose/mod.rs b/src/openhuman/memory_store/content/compose/mod.rs new file mode 100644 index 0000000000..bc4f51a1f2 --- /dev/null +++ b/src/openhuman/memory_store/content/compose/mod.rs @@ -0,0 +1,53 @@ +//! YAML front-matter + body composition for chunk `.md` files. +//! +//! Each file written to disk has the form: +//! ```text +//! --- +//! source_kind: chat +//! source_id: slack:#eng +//! seq: 0 +//! owner: alice@example.com +//! timestamp: 2026-04-28T10:00:00Z +//! time_range_start: 2026-04-28T10:00:00Z +//! time_range_end: 2026-04-28T10:05:00Z +//! source_ref: slack://permalink/… +//! tags: +//! - person/Alice-Smith +//! - project/Phoenix +//! --- +//! ## 2026-04-28T10:00:00Z — alice +//! Message body here. +//! ``` +//! +//! For email source_kind, additional fields are emitted: +//! ```text +//! participants: +//! - alice@example.com +//! - bob@example.com +//! aliases: +//! - "alice@example.com <-> bob@example.com: chunk 0" +//! ``` +//! These are parsed from the `source_id` field (format `gmail:{participants}` +//! where `participants` is `addr1|addr2|...` pipe-separated) at compose time. +//! `sender` and `thread_id` are no longer emitted — they are not meaningful +//! with participant-based bucketing. +//! +//! **SHA-256 is computed over the body bytes only** (everything after `---\n` +//! on the second delimiter line). This allows tags to be rewritten atomically +//! without invalidating the content hash. + +pub mod chunk; +pub mod summary; +pub mod yaml; + +#[cfg(test)] +mod tests; + +pub const MEMORY_ARTIFACT_FORMAT: u32 = 2; +pub const OPENHUMAN_CORE_VERSION: &str = env!("CARGO_PKG_VERSION"); + +// ── Re-exports (preserve original public API) ──────────────────────────────── + +pub use chunk::{compose_chunk_file, rewrite_tags}; +pub use summary::{compose_summary_md, rewrite_summary_tags, ComposedSummary, SummaryComposeInput}; +pub use yaml::{scan_fm_field, source_tag, split_front_matter, with_source_tag}; diff --git a/src/openhuman/memory_store/content/compose/summary.rs b/src/openhuman/memory_store/content/compose/summary.rs new file mode 100644 index 0000000000..47db6ae0f3 --- /dev/null +++ b/src/openhuman/memory_store/content/compose/summary.rs @@ -0,0 +1,298 @@ +//! Summary `.md` file composition and tag rewriting. + +use chrono::{DateTime, Utc}; + +use crate::openhuman::memory_store::content::compose::chunk::rewrite_tags; +use crate::openhuman::memory_store::content::compose::yaml::{ + source_tag, split_front_matter, yaml_scalar, +}; +use crate::openhuman::memory_store::content::compose::{ + MEMORY_ARTIFACT_FORMAT, OPENHUMAN_CORE_VERSION, +}; +use crate::openhuman::memory_store::content::paths::{summary_filename, SummaryTreeKind}; + +/// Input data required to compose a summary `.md` file. +pub struct SummaryComposeInput<'a> { + /// Stable id of the summary node (also used to derive the filename). + pub summary_id: &'a str, + /// Which tree (source / global / topic) this summary belongs to. + pub tree_kind: SummaryTreeKind, + /// Owning tree id (FK into `mem_tree_trees`). + pub tree_id: &'a str, + /// Raw tree scope string, e.g. `"gmail:alice@x.com|bob@y.com"` or `"global"`. + pub tree_scope: &'a str, + /// Level in the tree (L0 = leaves, L1+ = summaries). + pub level: u32, + /// Child ids (chunk_ids at L0 → L1, summary_ids for cascades). + pub child_ids: &'a [String], + /// Optional per-child wikilink basename overrides, aligned with + /// `child_ids` by index. When `Some(basename)` is provided for a + /// child, the front-matter `children: [[…]]` wikilink uses that + /// basename instead of `sanitize_filename(child_id)`. + /// + /// Used to point chunk-level children at their **raw archive** + /// files when the chunk store no longer stages on-disk `.md` + /// files (today: email, since email chunks live as byte ranges + /// inside `raw//_.md` instead of + /// `email//.md`). Without this, Obsidian + /// wikilinks resolve to a non-existent `[[]]` + /// target and the graph view stops drawing edges from L1 + /// summaries down to leaves. + /// + /// `None` (or `Some` entries that are themselves `None`) falls + /// back to the default `sanitize_filename(child_id)` behaviour, + /// which is correct for L≥2 (children are summary ids that map + /// to actual `summaries/...md` files) and for legacy chunks + /// still staged on-disk. + pub child_basenames: Option<&'a [Option]>, + /// Total child count (== child_ids.len() unless truncated). + pub child_count: usize, + /// Start of the time range covered by this summary's children. + pub time_range_start: DateTime, + /// End of the time range covered by this summary's children. + pub time_range_end: DateTime, + /// When the buffer was sealed into this summary node. + pub sealed_at: DateTime, + /// Raw summariser output text — the body written to disk. + pub body: &'a str, +} + +/// The composed front-matter, body, and full file content for a summary. +/// +/// `body` is what the SHA-256 integrity hash is computed over. +pub struct ComposedSummary { + /// The YAML front-matter block (including `---` delimiters), UTF-8 string. + pub front_matter: String, + /// The body (summariser output), UTF-8 string. + pub body: String, + /// `front_matter + body` — what gets written to disk. + pub full: String, +} + +/// Compose the full `.md` content for a summary node. +/// +/// Returns a [`ComposedSummary`] whose `full` field is written to disk. +/// SHA-256 is computed over `body` bytes only, not `full`. +pub fn compose_summary_md(record: &SummaryComposeInput<'_>) -> ComposedSummary { + let fm = build_summary_front_matter(record); + let body = record.body.to_string(); + let full = format!("{}{}", fm, body); + ComposedSummary { + front_matter: fm, + body, + full, + } +} + +/// Build the YAML front-matter block for a summary node. +fn build_summary_front_matter(r: &SummaryComposeInput<'_>) -> String { + let tree_kind_str = match r.tree_kind { + SummaryTreeKind::Source => "source", + SummaryTreeKind::Global => "global", + SummaryTreeKind::Topic => "topic", + }; + + let trs = r.time_range_start.to_rfc3339(); + let tre = r.time_range_end.to_rfc3339(); + let sealed = r.sealed_at.to_rfc3339(); + + let mut fm = String::new(); + fm.push_str("---\n"); + fm.push_str(&format!("id: {}\n", yaml_scalar(r.summary_id))); + fm.push_str("kind: summary\n"); + fm.push_str(&format!("tree_kind: {tree_kind_str}\n")); + fm.push_str(&format!("tree_id: {}\n", yaml_scalar(r.tree_id))); + fm.push_str(&format!("tree_scope: {}\n", yaml_scalar(r.tree_scope))); + fm.push_str(&format!("level: {}\n", r.level)); + + // children: YAML list of Obsidian wikilinks (`[[]]`) so the + // graph view draws summary→child edges. The wikilink target must match + // the actual file basename — for chunks that's the raw chunk_id (a SHA + // hash with no illegal chars), but for child summaries the structured id + // `summary:L:UUID` is sanitised to `summary-L-UUID` by + // `summary_rel_path` (colons are illegal on Windows NTFS). We apply the + // same sanitisation here so the link resolves. `yaml_scalar` auto-quotes + // because of the leading `[`, emitting `"[[]]"`. + if r.child_ids.is_empty() { + fm.push_str("children: []\n"); + } else { + fm.push_str("children:\n"); + for (i, id) in r.child_ids.iter().enumerate() { + // Prefer a caller-supplied basename override (used for L1 + // chunk children that live in the raw archive instead of + // the chunk-store path); fall back to the sanitised + // chunk/summary id. + let basename: String = match r + .child_basenames + .and_then(|overrides| overrides.get(i)) + .and_then(|slot| slot.as_ref()) + { + Some(b) => b.clone(), + None => summary_filename(id), + }; + let wikilink = format!("[[{}]]", basename); + fm.push_str(&format!(" - {}\n", yaml_scalar(&wikilink))); + } + } + fm.push_str(&format!("child_count: {}\n", r.child_count)); + fm.push_str(&format!("time_range_start: {trs}\n")); + fm.push_str(&format!("time_range_end: {tre}\n")); + fm.push_str(&format!("sealed_at: {sealed}\n")); + fm.push_str(&format!( + "openhuman_core_version: {}\n", + yaml_scalar(OPENHUMAN_CORE_VERSION) + )); + fm.push_str(&format!( + "memory_artifact_format: {}\n", + MEMORY_ARTIFACT_FORMAT + )); + + // aliases: human-readable title + let alias = build_summary_alias(r); + fm.push_str("aliases:\n"); + fm.push_str(&format!(" - {}\n", yaml_scalar(&alias))); + + // Source-tree summaries get a `source/` seed tag for graph + // filtering. Global / topic trees aggregate across sources, so the + // `source/...` tag has no single value there — leave them untagged + // at compose time (LLM extraction adds entity tags later). + if matches!(r.tree_kind, SummaryTreeKind::Source) { + fm.push_str("tags:\n"); + fm.push_str(&format!(" - {}\n", yaml_scalar(&source_tag(r.tree_scope)))); + } else { + fm.push_str("tags: []\n"); + } + fm.push_str("---\n"); + fm +} + +/// Build a human-readable alias for the summary's `aliases:` front-matter field. +fn build_summary_alias(r: &SummaryComposeInput<'_>) -> String { + let date_range = format_date_range(r.time_range_start, r.time_range_end); + match r.tree_kind { + SummaryTreeKind::Source => { + let scope_short = scope_short_label(r.tree_scope); + format!( + "L{} \u{00b7} {} \u{00b7} {} children \u{00b7} {}", + r.level, scope_short, r.child_count, date_range + ) + } + SummaryTreeKind::Global => { + format!( + "L{} \u{00b7} global digest \u{00b7} {}", + r.level, date_range + ) + } + SummaryTreeKind::Topic => { + // Strip protocol prefix like "topic:" from scope for readability. + let entity = r + .tree_scope + .split_once(':') + .map(|(_, v)| v) + .unwrap_or(r.tree_scope); + format!( + "L{} \u{00b7} topic {} \u{00b7} {} children", + r.level, entity, r.child_count + ) + } + } +} + +/// Format the date range as `"yyyy-mm-dd"` (if start == end date) or +/// `"yyyy-mm-dd–yyyy-mm-dd"`. +fn format_date_range(start: DateTime, end: DateTime) -> String { + let s = start.format("%Y-%m-%d").to_string(); + let e = end.format("%Y-%m-%d").to_string(); + if s == e { + s + } else { + format!("{s}\u{2013}{e}") // en dash + } +} + +/// Build a short human-readable label for the tree scope used in aliases. +/// +/// For Gmail source scopes like `"gmail:alice@x.com|bob@y.com"`: +/// - 2 participants → `"alice@x.com ↔ bob@y.com"` +/// - N > 2 → `"alice@x.com + N-1 others"` +/// - Otherwise → the raw scope (e.g. `"slack:#eng"`) +pub fn scope_short_label(scope: &str) -> String { + if let Some((prefix, participants)) = scope.split_once(':') { + if prefix == "gmail" && !participants.is_empty() { + let addrs: Vec<&str> = participants.split('|').collect(); + return match addrs.as_slice() { + [] => scope.to_string(), + [only] => only.to_string(), + [first, second] => format!("{} \u{2194} {}", first, second), // ↔ + [first, rest @ ..] => format!("{} + {} others", first, rest.len()), + }; + } + } + scope.to_string() +} + +/// Rewrite the `tags:` block in a summary file's front-matter, replacing it +/// with the new tag list while leaving the body unchanged. +/// +/// Reuses the generic [`rewrite_tags`] function — the front-matter structure +/// is identical for both chunk and summary `.md` files. +pub fn rewrite_summary_tags(file_bytes: &[u8], new_tags: &[String]) -> Result, String> { + let rewritten = rewrite_tags(file_bytes, new_tags)?; + let content = + std::str::from_utf8(&rewritten).map_err(|e| format!("file is not valid UTF-8: {e}"))?; + let (front_matter, body) = split_front_matter(content) + .ok_or_else(|| "cannot find front-matter delimiters".to_string())?; + let front_matter = upsert_summary_provenance(front_matter); + + let mut out = Vec::with_capacity(front_matter.len() + body.len()); + out.extend_from_slice(front_matter.as_bytes()); + out.extend_from_slice(body.as_bytes()); + Ok(out) +} + +fn upsert_summary_provenance(front_matter: &str) -> String { + let mut lines: Vec = Vec::new(); + let mut inserted = false; + + for raw in front_matter.lines() { + if raw.starts_with("openhuman_core_version: ") + || raw.starts_with("memory_artifact_format: ") + { + continue; + } + if !inserted && raw == "aliases:" { + lines.push(format!( + "openhuman_core_version: {}", + yaml_scalar(OPENHUMAN_CORE_VERSION) + )); + lines.push(format!( + "memory_artifact_format: {}", + MEMORY_ARTIFACT_FORMAT + )); + inserted = true; + } + lines.push(raw.to_string()); + } + + if !inserted { + let insert_at = lines + .iter() + .rposition(|line| line == "---") + .unwrap_or(lines.len()); + lines.insert( + insert_at, + format!( + "openhuman_core_version: {}", + yaml_scalar(OPENHUMAN_CORE_VERSION) + ), + ); + lines.insert( + insert_at + 1, + format!("memory_artifact_format: {}", MEMORY_ARTIFACT_FORMAT), + ); + } + + let mut result = lines.join("\n"); + result.push('\n'); + result +} diff --git a/src/openhuman/memory_store/content/compose/tests.rs b/src/openhuman/memory_store/content/compose/tests.rs new file mode 100644 index 0000000000..df7e06bfb2 --- /dev/null +++ b/src/openhuman/memory_store/content/compose/tests.rs @@ -0,0 +1,622 @@ +#[cfg(test)] +mod tests { + use crate::openhuman::memory_store::chunks::types::{Chunk, Metadata, SourceKind, SourceRef}; + use crate::openhuman::memory_store::content::compose::chunk::{ + compose_chunk_file, rewrite_tags, + }; + use crate::openhuman::memory_store::content::compose::summary::{ + compose_summary_md, rewrite_summary_tags, scope_short_label, SummaryComposeInput, + }; + use crate::openhuman::memory_store::content::compose::yaml::{split_front_matter, yaml_scalar}; + use crate::openhuman::memory_store::content::compose::{ + MEMORY_ARTIFACT_FORMAT, OPENHUMAN_CORE_VERSION, + }; + use crate::openhuman::memory_store::content::paths::SummaryTreeKind; + use chrono::TimeZone; + + fn sample_chunk() -> Chunk { + let ts = chrono::Utc.timestamp_millis_opt(1_700_000_000_000).unwrap(); + Chunk { + id: "abc123".into(), + content: "## 2026-01-01T00:00:00Z — alice\nhello world".into(), + metadata: Metadata { + source_kind: SourceKind::Chat, + source_id: "slack:#eng".into(), + owner: "alice@example.com".into(), + timestamp: ts, + time_range: (ts, ts), + tags: vec!["person/Alice".into(), "org/Acme".into()], + source_ref: Some(SourceRef::new("slack://m1".to_string())), + path_scope: None, + }, + token_count: 10, + seq_in_source: 0, + created_at: ts, + partial_message: false, + } + } + + #[test] + fn compose_produces_front_matter_and_body() { + let chunk = sample_chunk(); + let (full, body) = compose_chunk_file(&chunk); + let full_str = std::str::from_utf8(&full).unwrap(); + assert!(full_str.starts_with("---\n"), "must start with ---"); + assert!(full_str.contains("source_kind: chat")); + assert!(full_str.contains("source_id: \"slack:#eng\"")); + assert!(full_str.contains("seq: 0")); + assert!(full_str.contains("tags:")); + assert!(full_str.contains(" - person/Alice")); + assert!(full_str.ends_with("hello world")); + assert_eq!( + body, + b"## 2026-01-01T00:00:00Z \xe2\x80\x94 alice\nhello world" + ); + } + + #[test] + fn compose_persists_path_scope_and_seeds_scoped_source_tag() { + let mut chunk = sample_chunk(); + chunk.metadata.source_id = "notion:conn-1:page-123".into(); + chunk.metadata.path_scope = Some("notion:conn-1".into()); + + let (full, _) = compose_chunk_file(&chunk); + let full_str = std::str::from_utf8(&full).unwrap(); + + assert!(full_str.contains("source_id: \"notion:conn-1:page-123\"")); + assert!(full_str.contains("path_scope: \"notion:conn-1\"")); + assert!(full_str.contains(" - source/notion-conn-1")); + assert!(!full_str.contains(" - source/notion-conn-1-page-123")); + } + + #[test] + fn split_front_matter_round_trips() { + let chunk = sample_chunk(); + let (full, body) = compose_chunk_file(&chunk); + let full_str = std::str::from_utf8(&full).unwrap(); + let (fm, b) = split_front_matter(full_str).expect("split must succeed"); + assert!(fm.starts_with("---\n")); + assert!(fm.ends_with("---\n")); + assert_eq!(b.as_bytes(), body.as_slice()); + } + + #[test] + fn rewrite_tags_preserves_body() { + let chunk = sample_chunk(); + let (full, body) = compose_chunk_file(&chunk); + let new_tags = vec!["person/Bob".into(), "project/Phoenix".into()]; + let rewritten = rewrite_tags(&full, &new_tags).unwrap(); + let rewritten_str = std::str::from_utf8(&rewritten).unwrap(); + assert!(rewritten_str.contains(" - person/Bob")); + assert!(!rewritten_str.contains(" - person/Alice")); + // Body must be unchanged. + assert!(rewritten_str.ends_with(std::str::from_utf8(&body).unwrap())); + } + + #[test] + fn rewrite_tags_empty_list() { + let chunk = sample_chunk(); + let (full, _) = compose_chunk_file(&chunk); + let rewritten = rewrite_tags(&full, &[]).unwrap(); + let s = std::str::from_utf8(&rewritten).unwrap(); + assert!(s.contains("tags: []")); + assert!(!s.contains(" - person/")); + } + + #[test] + fn yaml_scalar_quotes_special_characters() { + assert_eq!(yaml_scalar("slack:#eng"), "\"slack:#eng\""); + assert_eq!(yaml_scalar("hello world"), "hello world"); + assert_eq!(yaml_scalar(""), "\"\""); + } + + fn sample_email_chunk() -> Chunk { + let ts = chrono::Utc.timestamp_millis_opt(1_700_000_000_000).unwrap(); + Chunk { + id: "emailchunk1".into(), + content: "---\nFrom: alice@example.com\nSubject: Hello\n\nHello there.".into(), + metadata: Metadata { + source_kind: SourceKind::Email, + source_id: "gmail:alice@example.com|bob@example.com".into(), + owner: "owner@example.com".into(), + timestamp: ts, + time_range: (ts, ts), + tags: vec!["gmail".into()], + source_ref: None, + path_scope: None, + }, + token_count: 15, + seq_in_source: 0, + created_at: ts, + partial_message: false, + } + } + + #[test] + fn email_chunk_has_participants_list_and_alias() { + let chunk = sample_email_chunk(); + let (full, _body) = compose_chunk_file(&chunk); + let full_str = std::str::from_utf8(&full).unwrap(); + // participants block must be a YAML list + assert!( + full_str.contains("participants:"), + "email chunk must have participants field; got:\n{full_str}" + ); + assert!( + full_str.contains(" - alice@example.com"), + "alice must appear as list item; got:\n{full_str}" + ); + assert!( + full_str.contains(" - bob@example.com"), + "bob must appear as list item; got:\n{full_str}" + ); + // aliases block must be present + assert!( + full_str.contains("aliases:"), + "email chunk must have aliases field; got:\n{full_str}" + ); + assert!( + full_str.contains("alice@example.com <-> bob@example.com: chunk 0"), + "alias must encode participants; got:\n{full_str}" + ); + // sender and thread_id must NOT appear + assert!( + !full_str.contains("sender:"), + "email chunk must NOT have sender field; got:\n{full_str}" + ); + assert!( + !full_str.contains("thread_id:"), + "email chunk must NOT have thread_id field; got:\n{full_str}" + ); + } + + #[test] + fn email_chunk_many_participants_alias_summarises() { + let ts = chrono::Utc.timestamp_millis_opt(1_700_000_000_000).unwrap(); + let chunk = Chunk { + id: "em2".into(), + content: "body".into(), + metadata: Metadata { + source_kind: SourceKind::Email, + source_id: "gmail:alice@x.com|bob@y.com|carol@z.com".into(), + owner: "owner".into(), + timestamp: ts, + time_range: (ts, ts), + tags: vec![], + source_ref: None, + path_scope: None, + }, + token_count: 1, + seq_in_source: 3, + created_at: ts, + partial_message: false, + }; + let (full, _) = compose_chunk_file(&chunk); + let full_str = std::str::from_utf8(&full).unwrap(); + assert!( + full_str.contains("participants:"), + "three-party chunk needs participants list; got:\n{full_str}" + ); + // With 3 participants: first + "2 others" + assert!( + full_str.contains("alice@x.com <-> 2 others: chunk 3"), + "alias with 3 participants must summarise; got:\n{full_str}" + ); + } + + #[test] + fn email_chunk_body_bytes_unchanged_by_extra_fields() { + // Adding participants/aliases to front-matter must not affect body_bytes + // (SHA-256 invariant: the hash is over body only, not front-matter). + let chunk = sample_email_chunk(); + let (full, body) = compose_chunk_file(&chunk); + let full_str = std::str::from_utf8(&full).unwrap(); + // Body must still appear at the end unmodified. + assert!( + full_str.ends_with(std::str::from_utf8(&body).unwrap()), + "body bytes must appear unmodified after front-matter" + ); + // body must equal chunk.content bytes + assert_eq!(body, chunk.content.as_bytes()); + } + + #[test] + fn chat_chunk_has_no_email_specific_fields() { + let chunk = sample_chunk(); // source_kind = Chat + let (full, _) = compose_chunk_file(&chunk); + let full_str = std::str::from_utf8(&full).unwrap(); + assert!( + !full_str.contains("aliases:"), + "chat chunk must not have aliases field" + ); + assert!( + !full_str.contains("participants:"), + "chat chunk must not have participants field" + ); + assert!( + !full_str.contains("sender:"), + "chat chunk must not have sender field" + ); + assert!( + !full_str.contains("thread_id:"), + "chat chunk must not have thread_id field" + ); + } + + #[test] + fn email_chunk_with_malformed_source_id_omits_extra_fields() { + let ts = chrono::Utc.timestamp_millis_opt(1_700_000_000_000).unwrap(); + let chunk = Chunk { + id: "xyz".into(), + content: "body".into(), + metadata: Metadata { + source_kind: SourceKind::Email, + source_id: "legacysourceid".into(), // no `gmail:` prefix → parse fails + owner: "owner".into(), + timestamp: ts, + time_range: (ts, ts), + tags: vec![], + source_ref: None, + path_scope: None, + }, + token_count: 1, + seq_in_source: 0, + created_at: ts, + partial_message: false, + }; + let (full, _) = compose_chunk_file(&chunk); + let full_str = std::str::from_utf8(&full).unwrap(); + // Malformed source_id → no email extras, no panic. + assert!(!full_str.contains("aliases:")); + assert!(!full_str.contains("participants:")); + assert!(!full_str.contains("sender:")); + } + + // ─── summary compose tests ──────────────────────────────────────────────── + + fn sample_summary_input( + tree_kind: SummaryTreeKind, + scope: &str, + level: u32, + ) -> SummaryComposeInput<'static> { + let ts_start = chrono::Utc.timestamp_millis_opt(1_700_000_000_000).unwrap(); + let ts_end = chrono::Utc.timestamp_millis_opt(1_700_086_400_000).unwrap(); + let sealed = chrono::Utc.timestamp_millis_opt(1_700_090_000_000).unwrap(); + // Leak the strings so they have 'static lifetime for this test helper. + // Only used in tests, not production code. + let scope: &'static str = Box::leak(scope.to_string().into_boxed_str()); + SummaryComposeInput { + summary_id: "summary:L1:abc", + tree_kind, + tree_id: "tree-id-001", + tree_scope: scope, + level, + child_ids: Box::leak( + vec!["child-1".to_string(), "child-2".to_string()].into_boxed_slice(), + ), + child_basenames: None, + child_count: 2, + time_range_start: ts_start, + time_range_end: ts_end, + sealed_at: sealed, + body: "This is the summariser output.\n", + } + } + + #[test] + fn compose_source_summary_has_required_front_matter() { + let input = sample_summary_input(SummaryTreeKind::Source, "gmail:alice@x.com|bob@y.com", 1); + let composed = compose_summary_md(&input); + let fm = &composed.front_matter; + assert!(fm.starts_with("---\n"), "front-matter must start with ---"); + assert!(fm.ends_with("---\n"), "front-matter must end with ---\\n"); + assert!(fm.contains("kind: summary"), "must have kind: summary"); + assert!( + fm.contains("tree_kind: source"), + "must have tree_kind: source" + ); + assert!(fm.contains("level: 1"), "must have level"); + assert!(fm.contains("child_count: 2"), "must have child_count"); + assert!( + fm.contains(&format!( + "openhuman_core_version: {}", + OPENHUMAN_CORE_VERSION + )), + "must stamp the core version" + ); + assert!( + fm.contains(&format!( + "memory_artifact_format: {}", + MEMORY_ARTIFACT_FORMAT + )), + "must stamp the artifact format epoch" + ); + assert!( + fm.contains(" - \"[[child-1]]\""), + "must list child ids as Obsidian wikilinks; got:\n{fm}" + ); + assert!( + fm.contains(" - \"[[child-2]]\""), + "must list child ids as Obsidian wikilinks; got:\n{fm}" + ); + assert!( + fm.contains(" - source/"), + "source-tree summary must seed source tag; got:\n{fm}" + ); + // aliases must mention the scope + assert!(fm.contains("aliases:"), "must have aliases"); + assert!( + composed.body == "This is the summariser output.\n", + "body must be the summariser text" + ); + assert!(composed.full.ends_with("This is the summariser output.\n")); + } + + #[test] + fn children_are_emitted_as_obsidian_wikilinks() { + // Contract: every entry in `children:` must be wrapped in `[[…]]` so + // Obsidian's graph view draws a summary→child edge. The YAML scalar is + // quoted because of the leading `[` — both forms below are required. + let input = sample_summary_input(SummaryTreeKind::Source, "gmail:alice@x.com", 1); + let composed = compose_summary_md(&input); + let fm = &composed.front_matter; + for id in ["child-1", "child-2"] { + let expected = format!(" - \"[[{id}]]\""); + assert!( + fm.contains(&expected), + "child id {id} must be emitted as a quoted wikilink ({expected}); got:\n{fm}" + ); + // Belt-and-braces: the bare id must NOT appear as a plain scalar + // (i.e. unwrapped). The wikilink form contains the id, so we + // search for the bare list-item form. + let plain = format!(" - {id}\n"); + assert!( + !fm.contains(&plain), + "child id {id} must not be emitted as a plain scalar; got:\n{fm}" + ); + } + } + + #[test] + fn child_basename_overrides_replace_chunk_id_in_wikilink() { + // L1 seals: each child's wikilink should point at the + // raw archive file basename, not the chunk_id hash. Without + // this override the link would be `[[<32-char hex>]]` and + // Obsidian wouldn't find a matching file (the chunk-store + // copy under `email//...` is gone after the + // raw_refs migration). + let ts = chrono::Utc.timestamp_millis_opt(1_700_000_000_000).unwrap(); + let child_ids = vec!["abc123hash".to_string(), "def456hash".to_string()]; + let overrides: Vec> = vec![ + Some("1700000000000_msg-id-1".into()), + None, // second child has no override → falls back to sanitize_filename + ]; + let input = SummaryComposeInput { + summary_id: "summary:L1:test", + tree_kind: SummaryTreeKind::Source, + tree_id: "t1", + tree_scope: "gmail:alice@x.com", + level: 1, + child_ids: &child_ids, + child_basenames: Some(&overrides), + child_count: 2, + time_range_start: ts, + time_range_end: ts, + sealed_at: ts, + body: "L1 body", + }; + let composed = compose_summary_md(&input); + let fm = &composed.front_matter; + // First child uses the override (raw archive basename). + assert!( + fm.contains(r#" - "[[1700000000000_msg-id-1]]""#), + "first child must use override basename; got:\n{fm}" + ); + // Second child has None override — fall back to chunk_id. + assert!( + fm.contains(r#" - "[[def456hash]]""#), + "None override must fall back to sanitize_filename; got:\n{fm}" + ); + } + + #[test] + fn structured_child_summary_id_is_sanitised_in_wikilink() { + // Real-world case: an L2 summary lists child L1 summaries by their + // structured id (e.g. `summary:L1:UUID`). Colons are illegal in + // Windows NTFS filenames, so `summary_rel_path` writes the file as + // `summary-L1-UUID.md`. The wikilink target must match that basename + // — i.e. colons must be converted to dashes — otherwise Obsidian + // cannot resolve the link and the graph stays disconnected. + let ts = chrono::Utc.timestamp_millis_opt(1_700_000_000_000).unwrap(); + let child_id = "summary:L1:b9fa5f08-bf79-41a7-a5c8-2d87883d5c01"; + let expected_basename = "summary-L1-b9fa5f08-bf79-41a7-a5c8-2d87883d5c01"; + let input = SummaryComposeInput { + summary_id: "summary:L2:cc9a1224", + tree_kind: SummaryTreeKind::Source, + tree_id: "t1", + tree_scope: "gmail:alice@x.com", + level: 2, + child_ids: &[child_id.to_string()], + child_basenames: None, + child_count: 1, + time_range_start: ts, + time_range_end: ts, + sealed_at: ts, + body: "L2 body", + }; + let composed = compose_summary_md(&input); + let fm = &composed.front_matter; + let expected = format!(" - \"[[{expected_basename}]]\""); + assert!( + fm.contains(&expected), + "structured child id must be sanitised to filename basename in wikilink; \ + expected line: {expected}; got:\n{fm}" + ); + // Raw colon-bearing id must NOT appear inside `[[…]]` — that wikilink + // would not resolve in Obsidian. + assert!( + !fm.contains(&format!("[[{child_id}]]")), + "raw structured id with colons must not appear inside wikilink; got:\n{fm}" + ); + } + + #[test] + fn compose_global_summary_alias_format() { + let input = sample_summary_input(SummaryTreeKind::Global, "global", 0); + let composed = compose_summary_md(&input); + assert!( + composed.front_matter.contains("tree_kind: global"), + "must have tree_kind: global" + ); + assert!( + composed.front_matter.contains("global digest"), + "alias must mention 'global digest'" + ); + } + + #[test] + fn compose_topic_summary_alias_format() { + let input = sample_summary_input(SummaryTreeKind::Topic, "person:alex-johnson", 1); + let composed = compose_summary_md(&input); + assert!( + composed.front_matter.contains("tree_kind: topic"), + "must have tree_kind: topic" + ); + assert!( + composed.front_matter.contains("topic"), + "alias must mention topic entity" + ); + } + + #[test] + fn compose_summary_with_zero_children() { + let ts = chrono::Utc.timestamp_millis_opt(1_700_000_000_000).unwrap(); + let input = SummaryComposeInput { + summary_id: "summary:L0:empty", + tree_kind: SummaryTreeKind::Source, + tree_id: "t1", + tree_scope: "gmail:alice@x.com", + level: 0, + child_ids: &[], + child_basenames: None, + child_count: 0, + time_range_start: ts, + time_range_end: ts, + sealed_at: ts, + body: "empty", + }; + let composed = compose_summary_md(&input); + assert!(composed.front_matter.contains("children: []")); + assert!(composed.front_matter.contains("child_count: 0")); + } + + #[test] + fn compose_summary_same_start_end_date_single_date_alias() { + let ts = chrono::Utc.timestamp_millis_opt(1_700_000_000_000).unwrap(); + let input = SummaryComposeInput { + summary_id: "summary:L1:sameday", + tree_kind: SummaryTreeKind::Global, + tree_id: "t1", + tree_scope: "global", + level: 1, + child_ids: &["child-a".to_string()], + child_basenames: None, + child_count: 1, + time_range_start: ts, + time_range_end: ts, // same as start + sealed_at: ts, + body: "day recap", + }; + let composed = compose_summary_md(&input); + // Alias must contain just one date, not "date–date" + let alias_line = composed + .front_matter + .lines() + .find(|l| l.contains("L1") && l.contains("global digest")) + .expect("alias line must be present"); + // The date should appear exactly once (no en-dash range) + let date_str = ts.format("%Y-%m-%d").to_string(); + assert!( + alias_line.contains(&date_str), + "alias must contain the date; got: {alias_line}" + ); + // Must not contain an en-dash (range indicator) + assert!( + !alias_line.contains('\u{2013}'), + "same-day alias must not have en-dash range; got: {alias_line}" + ); + } + + #[test] + fn scope_short_label_two_participants() { + let label = scope_short_label("gmail:alice@x.com|bob@y.com"); + assert_eq!(label, "alice@x.com \u{2194} bob@y.com"); + } + + #[test] + fn scope_short_label_many_participants() { + let label = scope_short_label("gmail:alice@x.com|bob@y.com|carol@z.com"); + assert_eq!(label, "alice@x.com + 2 others"); + } + + #[test] + fn scope_short_label_non_gmail_returns_raw() { + let label = scope_short_label("slack:#general"); + assert_eq!(label, "slack:#general"); + } + + #[test] + fn rewrite_summary_tags_delegates_to_rewrite_tags() { + // compose a summary, then rewrite its tags — body must stay unchanged. + let ts = chrono::Utc.timestamp_millis_opt(1_700_000_000_000).unwrap(); + let input = SummaryComposeInput { + summary_id: "sum:L1:rwttest", + tree_kind: SummaryTreeKind::Source, + tree_id: "t1", + tree_scope: "gmail:alice@x.com", + level: 1, + child_ids: &["c1".to_string()], + child_basenames: None, + child_count: 1, + time_range_start: ts, + time_range_end: ts, + sealed_at: ts, + body: "summary body text", + }; + let composed = compose_summary_md(&input); + let file_bytes = composed.full.as_bytes(); + let new_tags = vec!["person/Alice-Smith".to_string(), "topic/Memory".to_string()]; + let rewritten = rewrite_summary_tags(file_bytes, &new_tags).unwrap(); + let rewritten_str = std::str::from_utf8(&rewritten).unwrap(); + assert!(rewritten_str.contains(" - person/Alice-Smith")); + assert!(rewritten_str.contains(" - topic/Memory")); + assert!(!rewritten_str.contains("tags: []")); + assert!(rewritten_str.contains(&format!( + "openhuman_core_version: {}", + OPENHUMAN_CORE_VERSION + ))); + assert!(rewritten_str.contains(&format!( + "memory_artifact_format: {}", + MEMORY_ARTIFACT_FORMAT + ))); + // Body must be unchanged + assert!(rewritten_str.ends_with("summary body text")); + } + + #[test] + fn rewrite_summary_tags_backfills_missing_provenance() { + let file = + b"---\nid: legacy\nkind: summary\ntags: []\naliases:\n - legacy\n---\nlegacy body"; + let rewritten = rewrite_summary_tags(file, &["person/Alice".to_string()]).unwrap(); + let rewritten_str = std::str::from_utf8(&rewritten).unwrap(); + assert!(rewritten_str.contains(&format!( + "openhuman_core_version: {}", + OPENHUMAN_CORE_VERSION + ))); + assert!(rewritten_str.contains(&format!( + "memory_artifact_format: {}", + MEMORY_ARTIFACT_FORMAT + ))); + assert!(rewritten_str.ends_with("legacy body")); + } +} diff --git a/src/openhuman/memory_store/content/compose/yaml.rs b/src/openhuman/memory_store/content/compose/yaml.rs new file mode 100644 index 0000000000..0a92da93ce --- /dev/null +++ b/src/openhuman/memory_store/content/compose/yaml.rs @@ -0,0 +1,96 @@ +//! YAML scalar helpers and front-matter parsing utilities. + +/// Build the canonical Obsidian `source/` tag for a given +/// source scope. Used to seed the `tags:` block on every chunk and +/// every source-tree summary so the Obsidian graph view can filter by +/// source. +/// +/// Slug rules match `slugify_source_id` (lowercase ASCII, `-` separators, +/// alphanumerics + `_` preserved) so the tag matches the on-disk +/// `raw//...` directory name byte-for-byte. +pub fn source_tag(scope: &str) -> String { + use crate::openhuman::memory_store::content::paths::slugify_source_id; + format!("source/{}", slugify_source_id(scope)) +} + +/// Prepend the source tag to `tags`, dedup, and return the new list. +/// Order is preserved otherwise — `source/...` always comes first so +/// it shows up at the top of the YAML block. +pub fn with_source_tag(scope: &str, tags: &[String]) -> Vec { + let st = source_tag(scope); + let mut out = Vec::with_capacity(tags.len() + 1); + out.push(st.clone()); + for t in tags { + if t != &st { + out.push(t.clone()); + } + } + out +} + +/// Parse the value of a top-level YAML scalar field (e.g. `source_id`, +/// `tree_scope`, `tree_kind`) from a frontmatter string. Strips +/// surrounding double-quotes if present so the returned slice matches +/// what the original composer passed in. Returns `None` if the key is +/// not present at the top level of the frontmatter. +pub fn scan_fm_field(fm: &str, key: &str) -> Option { + let prefix = format!("{key}: "); + for raw in fm.lines() { + // Skip indented lines (those are list items / nested mappings). + if raw.starts_with(' ') || raw.starts_with('\t') { + continue; + } + if let Some(rest) = raw.strip_prefix(&prefix) { + let trimmed = rest.trim(); + if let Some(inner) = trimmed.strip_prefix('"').and_then(|s| s.strip_suffix('"')) { + return Some(inner.replace("\\\"", "\"").replace("\\\\", "\\")); + } + return Some(trimmed.to_string()); + } + } + None +} + +/// Split a file into `(front_matter, body)` at the second `---` delimiter. +/// +/// Returns `None` if the file does not have the expected `---\n...\n---\n` form. +pub fn split_front_matter(content: &str) -> Option<(&str, &str)> { + // The file must start with `---\n`. + if !content.starts_with("---\n") { + return None; + } + // Find the closing `---` line (must be `---` alone on a line after the first line). + let rest = &content[4..]; // skip the opening `---\n` + let close_idx = rest.find("\n---\n").or_else(|| { + // Could be at the very end (no body). + rest.strip_suffix("\n---").map(|r| r.len()) + })?; + let fm_end = 4 + close_idx + 5; // include `\n---\n` + debug_assert!(content.is_char_boundary(fm_end)); + Some((&content[..fm_end], &content[fm_end..])) +} + +/// Format a string as an unquoted YAML scalar when safe, or as a +/// double-quoted string when it contains special characters. +/// +/// We conservatively quote strings containing `:`, `#`, `[`, `]`, `{`, `}`, +/// `"`, `'`, `\`, leading/trailing whitespace, or that start with special +/// YAML indicator characters. +pub fn yaml_scalar(s: &str) -> String { + let needs_quoting = s.is_empty() + || s.trim() != s + || s.starts_with(|c: char| { + matches!( + c, + '&' | '*' | '?' | '|' | '-' | '<' | '>' | '=' | '!' | '%' | '@' | '`' + ) + }) + || s.contains([':', '#', '[', ']', '{', '}', '"', '\'']); + + if needs_quoting { + let escaped = s.replace('\\', "\\\\").replace('"', "\\\""); + format!("\"{escaped}\"") + } else { + s.to_string() + } +} diff --git a/src/openhuman/security/policy.rs b/src/openhuman/security/policy.rs deleted file mode 100644 index 497b830660..0000000000 --- a/src/openhuman/security/policy.rs +++ /dev/null @@ -1,1426 +0,0 @@ -use parking_lot::Mutex; -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; -use std::path::{Path, PathBuf}; -use std::sync::Arc; -use std::time::Instant; -use tokio::sync::OnceCell; - -use crate::openhuman::util::floor_char_boundary; - -/// Stable, machine-recognizable marker prefixing a **permanent** policy -/// rejection: the identical `(tool, args)` call can never succeed in the -/// current tier (read-only blocking a write, a forbidden/credential path, a -/// disallowed high-risk or hidden-execution command, an off-allowlist command). -/// The agent harness ([`crate::openhuman::agent::harness::tool_loop`]) detects -/// this and halts on the **first verbatim repeat** rather than reiterating a -/// provably-futile call. Kept short and bracketed so it survives the -/// `Error: …` wrapping the tool layer adds and is easy to grep in logs. -pub const POLICY_BLOCKED_MARKER: &str = "[policy-blocked]"; - -/// Stable marker prefixing a **this-turn denial** — the user answered "no" to -/// an approval prompt, or the prompt timed out / its channel dropped. Unlike a -/// block this isn't permanent across turns, but re-issuing the *same* call this -/// turn just re-prompts the user, so the harness records it in the circuit -/// breaker and stops the agent from re-asking the identical call. -pub const POLICY_DENIED_MARKER: &str = "[policy-denied]"; - -/// How much autonomy the agent has -#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize, JsonSchema)] -#[serde(rename_all = "lowercase")] -pub enum AutonomyLevel { - /// Read-only: can observe but not act - ReadOnly, - /// Supervised: acts but requires approval for risky operations - #[default] - Supervised, - /// Full: autonomous execution within policy bounds - Full, -} - -/// Access level granted to a trusted root outside the workspace. -#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize, JsonSchema)] -#[serde(rename_all = "lowercase")] -pub enum TrustedAccess { - /// Read + list only. - #[default] - Read, - /// Read and write/edit. - ReadWrite, -} - -/// A directory outside the workspace the agent is explicitly granted access to. -/// Takes precedence over `workspace_only` and `forbidden_paths` for its subtree, -/// except for credential stores (see `SecurityPolicy::is_always_forbidden`). -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)] -pub struct TrustedRoot { - /// Absolute path (a leading `~` is expanded to the user's home). - pub path: String, - /// Whether the agent may write within this root. - #[serde(default)] - pub access: TrustedAccess, -} - -/// Risk score for shell command execution. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum CommandRiskLevel { - Low, - Medium, - High, -} - -/// Coarse permission bucket the harness approval gate keys on. -/// -/// Classification is **fail-closed**: a command that is not provably read-only -/// (and not a recognized network/destructive command) is treated as at least -/// [`CommandClass::Write`]. Across multiple shell segments the **highest** class -/// wins (so `ls | curl …` is `Network`). Variants are ordered low→high so -/// [`Ord`] / [`Iterator::max`] compose them directly. -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] -pub enum CommandClass { - /// Provably read-only / observational (curated safe-read allowlist). - Read, - /// State-changing but not inherently catastrophic — the fail-closed default - /// for anything not recognized as read/network/destructive. - Write, - /// Reaches the network (curl/wget/ssh/scp/…). Always prompts, every tier. - Network, - /// Installs an OS / language package (system package manager, or a *global* - /// npm/pnpm/yarn/cargo/pip install). Always-ask in every acting tier, - /// including Full — mirrors the dedicated `install_tool` gate so shell - /// installs can't slip past it. Project-local installs are ordinary `Write`. - Install, - /// Catastrophic / irreversible / privilege-escalating / system-control. - /// Always prompts, even in Full. - Destructive, -} - -/// What the harness should do with an acting tool call of a given -/// [`CommandClass`] under the session's [`AutonomyLevel`]. Computed by -/// [`SecurityPolicy::gate_decision`]; the harness translates `Prompt` into an -/// `ApprovalGate` round-trip *before* the tool's `execute()` runs. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum GateDecision { - /// Run without prompting. - Allow, - /// Require explicit human approval before running. - Prompt, - /// Refuse outright — no in-tier prompt can authorize it (e.g. any act in - /// read-only mode). - Block, -} - -/// Classifies whether a tool operation is read-only or side-effecting. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum ToolOperation { - Read, - Act, -} - -/// Sliding-window action tracker for rate limiting. -#[derive(Debug)] -pub struct ActionTracker { - /// Timestamps of recent actions (kept within the last hour). - actions: Mutex>, -} - -impl Default for ActionTracker { - fn default() -> Self { - Self::new() - } -} - -impl ActionTracker { - pub fn new() -> Self { - Self { - actions: Mutex::new(Vec::new()), - } - } - - /// Record an action and return the current count within the window. - pub fn record(&self) -> usize { - let mut actions = self.actions.lock(); - let cutoff = Instant::now() - .checked_sub(std::time::Duration::from_secs(3600)) - .unwrap_or_else(Instant::now); - actions.retain(|t| *t > cutoff); - actions.push(Instant::now()); - actions.len() - } - - /// Count of actions in the current window without recording. - pub fn count(&self) -> usize { - let mut actions = self.actions.lock(); - let cutoff = Instant::now() - .checked_sub(std::time::Duration::from_secs(3600)) - .unwrap_or_else(Instant::now); - actions.retain(|t| *t > cutoff); - actions.len() - } -} - -impl Clone for ActionTracker { - fn clone(&self) -> Self { - let actions = self.actions.lock(); - Self { - actions: Mutex::new(actions.clone()), - } - } -} - -/// Subdirectories under `workspace_dir` that hold internal application state -/// (memory DBs, sessions, tokens, etc.) and must not be writable by agent tools. -const WORKSPACE_INTERNAL_DIRS: &[&str] = &[ - "memory", - "memory_tree", - "state", - "approval", - "sessions", - "session_raw", - "cron", - "devices", - "mcp_clients", - "subconscious", - "vault", - "task_sources", - "whatsapp_data", - "redirect_links", - "codegraph", - ".openhuman", -]; - -/// Files directly under `workspace_dir` that hold secrets or persona config -/// and must not be writable by agent tools. -const WORKSPACE_INTERNAL_FILES: &[&str] = &[ - "core.token", - "dev-keychain.json", - ".env", - "SOUL.md", - "IDENTITY.md", - "HEARTBEAT.md", - "PROFILE.md", -]; - -/// Security policy enforced on all tool executions -#[derive(Debug, Clone)] -pub struct SecurityPolicy { - pub autonomy: AutonomyLevel, - pub workspace_dir: PathBuf, - /// Agent action sandbox root — tools resolve relative paths and default - /// their cwd here instead of `workspace_dir`. Kept separate so internal - /// state (memory DBs, sessions, tokens) under `workspace_dir` is not - /// reachable from agent tool calls. - pub action_dir: PathBuf, - pub workspace_only: bool, - pub allowed_commands: Vec, - pub forbidden_paths: Vec, - pub max_actions_per_hour: u32, - pub max_cost_per_day_cents: u32, - pub require_approval_for_medium_risk: bool, - pub block_high_risk_commands: bool, - /// Directories outside the workspace the agent may access (read or read-write). - pub trusted_roots: Vec, - /// Whether the agent may install OS packages via the `install_tool` tool. - pub allow_tool_install: bool, - /// Tool names the user has pre-approved ("Always allow"). The `ApprovalGate` - /// skips the interactive prompt for any tool in this set. Sourced from - /// `autonomy.auto_approve`; populated/cleared via `config.update_autonomy_settings` - /// (or an "Always allow" decision) and observed live via `live_policy`. - pub auto_approve: Vec, - pub tracker: ActionTracker, - /// Lazily-cached canonical form of [`workspace_dir`]. - /// - /// `validate_path` / `validate_parent_path` use the canonical workspace - /// root to check resolved paths against `forbidden_paths`. Without a cache - /// each call invokes `tokio::fs::canonicalize(&workspace_dir)` — one - /// `stat(2)` + symlink walk on the same path on every file op. A single - /// agent turn doing tens of read/edit/shell-path validations hits this - /// repeatedly with identical input. - /// - /// `workspace_dir` is effectively immutable for a given `SecurityPolicy` - /// (a config update builds a *new* policy via `from_config` and swaps the - /// `Arc` in [`live_policy`]), so caching the resolved value is safe and - /// stays correct across config updates. - /// - /// `Arc>` so the struct stays `Clone` (clone the `Arc`) and - /// init happens lazily on the first async call site without blocking - /// constructors. Fallback (raw `workspace_dir` if canonicalize fails) - /// matches the previous inline behavior exactly. - /// - /// Visibility is `pub` to match every other field on the struct: external - /// crates (Cargo examples, downstream consumers) construct - /// `SecurityPolicy` with the `..SecurityPolicy::default()` functional-update - /// spread, and Rust requires every field of the target struct to be - /// visible to the caller in that syntax — even fields supplied by the - /// default. `pub(crate)` was an over-tight first cut that broke - /// `examples/mouse_smoke.rs` with E0451. - pub canonical_workspace: Arc>, -} - -impl Default for SecurityPolicy { - fn default() -> Self { - Self { - autonomy: AutonomyLevel::Supervised, - workspace_dir: PathBuf::from("."), - action_dir: PathBuf::from("."), - workspace_only: true, - // When adding a new entry to this allowlist, re-audit - // `DANGEROUS_ENV_PREFIXES` (see below). Every newly-allowed binary - // may introduce its own env-driven subprocess hooks (pager, editor, - // loader override, SSH/diff helper, preprocessor) — those names - // must be added to the prefix denylist so that the - // `KEY=cmd ` shape cannot bypass allowlisting via - // `skip_env_assignments` in `is_command_allowed`. Cross-ref #2636. - allowed_commands: vec![ - // Version control - "git".into(), - // Package managers / build systems - "npm".into(), - "pnpm".into(), - "yarn".into(), - "cargo".into(), - "make".into(), - "cmake".into(), - // Directory / file inspection (read-only, low-risk) - "ls".into(), - "cat".into(), - "grep".into(), - "find".into(), - "echo".into(), - "pwd".into(), - "wc".into(), - "head".into(), - "tail".into(), - "date".into(), - "sort".into(), - "uniq".into(), - "diff".into(), - "which".into(), - "uname".into(), - "basename".into(), - "dirname".into(), - "tr".into(), - "cut".into(), - "realpath".into(), - "readlink".into(), - "stat".into(), - "file".into(), - // Filesystem mutations (medium-risk — require approval in Supervised mode) - "mkdir".into(), - "touch".into(), - "cp".into(), - "mv".into(), - "ln".into(), - // Windows read-only equivalents for the same basic - // inspection workflows as ls/cat/grep/which. - "dir".into(), - "type".into(), - "where".into(), - "findstr".into(), - "more".into(), - ], - forbidden_paths: vec![ - // System directories (blocked even when workspace_only=false) - "/etc".into(), - "/root".into(), - "/home".into(), - "/usr".into(), - "/bin".into(), - "/sbin".into(), - "/lib".into(), - "/opt".into(), - "/boot".into(), - "/dev".into(), - "/proc".into(), - "/sys".into(), - "/var".into(), - "/tmp".into(), - // Sensitive dotfiles - "~/.ssh".into(), - "~/.gnupg".into(), - "~/.aws".into(), - "~/.config".into(), - ], - // Effectively unlimited — matches AutonomyConfig::default_max_actions_per_hour(). - // The rate-limiter check is `count <= max`, so u32::MAX is functionally - // infinite without requiring an Option sentinel on the field type. - max_actions_per_hour: u32::MAX, - max_cost_per_day_cents: 500, - require_approval_for_medium_risk: true, - block_high_risk_commands: true, - trusted_roots: Vec::new(), - allow_tool_install: false, - auto_approve: Vec::new(), - tracker: ActionTracker::new(), - canonical_workspace: Arc::new(OnceCell::new()), - } - } -} - -#[path = "policy_command.rs"] -mod policy_command; -use policy_command::{ - classify_segment, command_basename, contains_unquoted_char, contains_unquoted_single_ampersand, - has_dangerous_env_prefix, has_hidden_execution, has_leading_env_assignment, - is_command_executor, normalized_command_name, skip_env_assignments, split_unquoted_segments, -}; - -impl SecurityPolicy { - /// Classify command risk. Any high-risk segment marks the whole command high. - pub fn command_risk_level(&self, command: &str) -> CommandRiskLevel { - let mut saw_medium = false; - - for segment in split_unquoted_segments(command) { - let cmd_part = skip_env_assignments(&segment); - let mut words = cmd_part.split_whitespace(); - let Some(base_raw) = words.next() else { - continue; - }; - - let base = normalized_command_name(base_raw); - - let args: Vec = words.map(|w| w.to_ascii_lowercase()).collect(); - let joined_segment = cmd_part.to_ascii_lowercase(); - - // High-risk = catastrophic / irreversible / privilege-escalating / - // system-control commands ONLY. Interpreters (python/bash/…), - // network tools (curl/wget/ssh/…), and ordinary rm/chmod/chown are - // deliberately NOT high-risk: they are routine for a coding agent and - // are treated as medium-risk below (prompted in Supervised, run in - // Full). This keeps "Full access" actually able to run code while - // still guarding the few irreversible / system-destroying commands. - if matches!( - base.as_str(), - "mkfs" - | "dd" - | "shutdown" - | "reboot" - | "halt" - | "poweroff" - | "sudo" - | "su" - | "mount" - | "umount" - | "iptables" - | "ufw" - | "firewall-cmd" - | "useradd" - | "userdel" - | "usermod" - | "passwd" - ) { - return CommandRiskLevel::High; - } - - if joined_segment.contains("rm -rf /") - || joined_segment.contains("rm -fr /") - || joined_segment.contains(":(){:|:&};:") - { - return CommandRiskLevel::High; - } - - // Medium-risk commands (state-changing, but not inherently destructive) - let medium = match base.as_str() { - "git" => args.first().is_some_and(|verb| { - matches!( - verb.as_str(), - "commit" - | "push" - | "reset" - | "clean" - | "rebase" - | "merge" - | "cherry-pick" - | "revert" - | "branch" - | "checkout" - | "switch" - | "tag" - ) - }), - "npm" | "pnpm" | "yarn" => args.first().is_some_and(|verb| { - matches!( - verb.as_str(), - "install" | "add" | "remove" | "uninstall" | "update" | "publish" - ) - }), - "cargo" => args.first().is_some_and(|verb| { - matches!( - verb.as_str(), - "add" | "remove" | "install" | "clean" | "publish" - ) - }), - "touch" | "mkdir" | "mv" | "cp" | "ln" | "rm" | "chmod" | "chown" | "curl" - | "wget" | "nc" | "ncat" | "netcat" | "scp" | "ssh" | "ftp" | "telnet" => true, - _ => false, - }; - - // Interpreters / code executors run arbitrary code — medium-risk - // (that is the job of a coding agent): prompted in Supervised, - // allowed in Full. They are no longer classified high-risk. - let medium = medium || is_command_executor(base.as_str()); - - saw_medium |= medium; - } - - if saw_medium { - CommandRiskLevel::Medium - } else { - CommandRiskLevel::Low - } - } - - /// Classify a shell command into a fail-closed [`CommandClass`]. The highest - /// class across all `;`/`|`/`&&`/`||`/newline-separated segments wins, and a - /// file redirect (`>`/`>>`) or `tee` lifts the class to at least `Write` no - /// matter how benign the base looks (`cat x > y` writes `y`). - /// - /// This is the deterministic floor the harness gate keys on; an LLM-declared - /// category may only *raise* it (`gate = max(rust_floor, llm_declared)`), - /// never lower it. - pub fn classify_command(&self, command: &str) -> CommandClass { - let mut class = CommandClass::Read; - for segment in split_unquoted_segments(command) { - let cmd_part = skip_env_assignments(&segment); - let mut words = cmd_part.split_whitespace(); - let Some(base_raw) = words.next() else { - continue; - }; - let base = normalized_command_name(base_raw); - let args: Vec = words.map(|w| w.to_ascii_lowercase()).collect(); - let joined = cmd_part.to_ascii_lowercase(); - class = class.max(classify_segment(&base, &args, &joined)); - } - // A redirect or `tee` writes a file regardless of the base command. - if contains_unquoted_char(command, '>') - || command - .split_whitespace() - .any(|w| w == "tee" || w.ends_with("/tee")) - { - class = class.max(CommandClass::Write); - } - class - } - - /// The gate decision for an acting tool call of `class` under this policy's - /// autonomy tier. The harness turns `Prompt` into an `ApprovalGate` - /// round-trip *before* the tool runs; `Block` is refused outright. - /// - /// Matrix: read-only allows only `Read`; ask-before-edit (`Supervised`) - /// prompts on every acting class; full runs `Read`/`Write` silently but - /// always prompts on `Network`/`Destructive`. - pub fn gate_decision(&self, class: CommandClass) -> GateDecision { - match self.autonomy { - AutonomyLevel::ReadOnly => match class { - CommandClass::Read => GateDecision::Allow, - _ => GateDecision::Block, - }, - AutonomyLevel::Supervised => match class { - CommandClass::Read => GateDecision::Allow, - _ => GateDecision::Prompt, - }, - AutonomyLevel::Full => match class { - CommandClass::Read | CommandClass::Write => GateDecision::Allow, - CommandClass::Network | CommandClass::Install | CommandClass::Destructive => { - GateDecision::Prompt - } - }, - } - } - - /// Defense-in-depth check for the harness-gated command flow (Option 2). - /// - /// The run / prompt / block decision is made by [`Self::gate_decision`] + - /// the process-global `ApprovalGate` (which prompts the human *before* - /// `execute()`), so by the time a tool calls this the command is either a - /// read or an already-approved act. This enforces what must still hold: - /// - /// - **Read-only**: only `Read`-class commands run (`Block` otherwise). - /// - **Supervised**: no *hidden execution* (command/process substitution, - /// backticks, background `&`) that could smuggle an unseen command past - /// the approval the human read. Plain redirects (`2>&1`, `> file`) and - /// pipes are fine here — `classify_command` already lifts redirects to - /// `Write` so the gate prompted on them, and the human approved the - /// literal command. Full is trusted and skips the structural guard. - /// - /// Returns the classified [`CommandClass`] on success. - pub fn check_gated_command(&self, command: &str) -> Result { - let class = self.classify_command(command); - if self.gate_decision(class) == GateDecision::Block { - return Err(format!( - "{POLICY_BLOCKED_MARKER} Security policy: read-only mode — only read commands are \ - permitted. Do not retry this command; use a read-only approach or report that it \ - cannot be done in this mode." - )); - } - if self.autonomy != AutonomyLevel::Full && has_hidden_execution(command) { - return Err(format!( - "{POLICY_BLOCKED_MARKER} Command blocked: command/process substitution ($(…), \ - <(…)), backticks, and background (&) are not allowed in this mode — they can run \ - a hidden command the approval prompt wouldn't show. Plain redirects like `2>&1` \ - are fine. Do not retry as-is; rewrite the command without these constructs." - )); - } - Ok(class) - } - - /// Parse an LLM-declared command category. This is an **escalate-only** - /// hint: callers combine it with the deterministic floor via - /// `classify_command(cmd).max(declared)`, so the model can *raise* the gate - /// (e.g. flag a `Write` as `Destructive` to request confirmation) but can - /// never lower what the runtime determined. Unknown / empty → `None`. - pub fn parse_declared_class(declared: &str) -> Option { - match declared.trim().to_ascii_lowercase().as_str() { - "read" => Some(CommandClass::Read), - "write" => Some(CommandClass::Write), - "network" => Some(CommandClass::Network), - "install" => Some(CommandClass::Install), - "destructive" => Some(CommandClass::Destructive), - _ => None, - } - } - - /// Validate full command execution policy (allowlist + risk gate). - pub fn validate_command_execution( - &self, - command: &str, - approved: bool, - ) -> Result { - if !self.is_command_allowed(command) { - // Truncate the command in BOTH the log and the Err return: the Err - // string is bubbled back to the frontend, and a full untruncated - // command can leak secrets in args (e.g. `curl -H "Authorization: - // Bearer …"`, `psql "postgres://user:pass@…"`). The 80-char cap - // matches the log truncation so a long base command with safe args - // still shows enough context to diagnose the block. - let truncated = &command[..floor_char_boundary(command, 80)]; - log::warn!( - "[openhuman:policy] Command blocked by allowlist: {}", - truncated - ); - return Err(format!( - "{POLICY_BLOCKED_MARKER} Command not allowed by security policy: {truncated}. \ - Do not retry this command; it is off the allowlist for this mode." - )); - } - - let risk = self.command_risk_level(command); - - if risk == CommandRiskLevel::High { - if self.block_high_risk_commands { - log::warn!( - "[openhuman:policy] High-risk command blocked: {}", - &command[..floor_char_boundary(command, 80)] - ); - return Err(format!( - "{POLICY_BLOCKED_MARKER} Command blocked: high-risk command is disallowed by \ - policy. Do not retry this command; choose a safer approach or report that it \ - cannot be done." - )); - } - if self.autonomy == AutonomyLevel::Supervised && !approved { - log::warn!( - "[openhuman:policy] High-risk command needs approval: {}", - &command[..floor_char_boundary(command, 80)] - ); - return Err( - "Command requires explicit approval (approved=true): high-risk operation" - .into(), - ); - } - } - - if risk == CommandRiskLevel::Medium - && self.autonomy == AutonomyLevel::Supervised - && self.require_approval_for_medium_risk - && !approved - { - log::info!( - "[openhuman:policy] Medium-risk command needs approval: {}", - &command[..floor_char_boundary(command, 80)] - ); - return Err( - "Command requires explicit approval (approved=true): medium-risk operation".into(), - ); - } - - log::debug!( - "[openhuman:policy] Command validated: risk={:?}, approved={}, cmd={}", - risk, - approved, - &command[..floor_char_boundary(command, 80)] - ); - Ok(risk) - } - - /// Check if a shell command is allowed. - /// - /// Validates the **entire** command string, not just the first word: - /// - Blocks subshell operators (`` ` ``, `$(`) that hide arbitrary execution - /// - Splits on command separators (`|`, `&&`, `||`, `;`, newlines) and - /// validates each sub-command against the allowlist - /// - Blocks single `&` background chaining (`&&` remains supported) - /// - Blocks output redirections (`>`, `>>`) that could write outside workspace - /// - Blocks dangerous arguments (e.g. `find -exec`, `git config`) - pub fn is_command_allowed(&self, command: &str) -> bool { - if self.autonomy == AutonomyLevel::ReadOnly { - return false; - } - - // Full access bypasses the command allowlist AND the structural guards - // (redirects, pipes, subshells, background) — a Full-access agent is - // trusted to run any command, including the `mkdir`/`node`/`python`/ - // redirect-using commands a coding workflow needs. The remaining safety - // net is `validate_command_execution`'s high-risk handling (still gated - // by `block_high_risk_commands`), plus path-level `forbidden_paths` and - // any configured sandbox. The allowlist + structural guards below stay - // in force for Supervised, which runs only curated commands. - if self.autonomy == AutonomyLevel::Full { - return true; - } - - // Block subshell/expansion operators — these allow hiding arbitrary - // commands inside an allowed command (e.g. `echo $(rm -rf /)`) - if command.contains('`') - || command.contains("$(") - || command.contains("${") - || command.contains("<(") - || command.contains(">(") - { - return false; - } - - // Block output redirections (`>`, `>>`) — they can write to arbitrary paths. - // Ignore quoted literals, e.g. `echo "a>b"`. - if contains_unquoted_char(command, '>') { - return false; - } - - // Block `tee` — it can write to arbitrary files, bypassing the - // redirect check above (e.g. `echo secret | tee /etc/crontab`) - if command - .split_whitespace() - .any(|w| w == "tee" || w.ends_with("/tee")) - { - return false; - } - - // Block background command chaining (`&`), which can hide extra - // sub-commands and outlive timeout expectations. Keep `&&` allowed. - if contains_unquoted_single_ampersand(command) { - return false; - } - - // Split on unquoted command separators and validate each sub-command. - let segments = split_unquoted_segments(command); - for segment in &segments { - // Reject ANY segment that prefixes the command with an env-var - // assignment, not just the known-dangerous names. Helper-style - // exec primitives (`GIT_SSH=./wrapper git ls-remote`, - // `SSH_ASKPASS=./prompt ssh user@host`, `LD_PRELOAD=./libx.so - // ls`, etc.) change which binary the allowed command actually - // resolves to — or change its behaviour via a hook — without - // any blocked command name ever appearing in the segment. The - // allowlist already names every command we want to permit, and - // none of those commands need an operator-set env var at - // invoke time, so the broader gate has no false-positive - // surface on the approved path. `has_dangerous_env_prefix` - // remains in the source for legacy tests and as the - // narrower-grained signal. - if has_leading_env_assignment(segment) || has_dangerous_env_prefix(segment) { - return false; - } - - // Strip leading env var assignments (e.g. FOO=bar cmd) - let cmd_part = skip_env_assignments(segment); - - let mut words = cmd_part.split_whitespace(); - let base_raw = words.next().unwrap_or(""); - let base_cmd = command_basename(base_raw); - - if base_cmd.is_empty() { - continue; - } - - if !self - .allowed_commands - .iter() - .any(|allowed| allowed == base_cmd) - { - return false; - } - - // Validate arguments for the command - let args: Vec = words.map(|w| w.to_ascii_lowercase()).collect(); - if !self.is_args_safe(base_cmd, &args) { - return false; - } - } - - // At least one command must be present - let has_cmd = segments.iter().any(|s| { - let s = skip_env_assignments(s.trim()); - s.split_whitespace().next().is_some_and(|w| !w.is_empty()) - }); - - has_cmd - } - - /// Check for dangerous arguments that allow sub-command execution. - fn is_args_safe(&self, base: &str, args: &[String]) -> bool { - let base = base.to_ascii_lowercase(); - if is_command_executor(base.as_str()) { - return false; - } - - match base.as_str() { - "find" => { - // -exec / -ok run a command per match. -execdir / -okdir do - // the same with the working directory set to the match's - // parent — same code-execution semantics, just with a - // different cwd, so they must be blocked alongside. - !args.iter().any(|arg| { - arg == "-exec" || arg == "-ok" || arg == "-execdir" || arg == "-okdir" - }) - } - "git" => { - // git config, alias, and -c can be used to set dangerous options - // (e.g. git config core.editor "rm -rf /") - !args.iter().any(|arg| { - arg == "config" - || arg.starts_with("config.") - || arg == "alias" - || arg.starts_with("alias.") - || arg == "-c" - }) - } - "date" => args.is_empty(), - _ => true, - } - } - - /// Expand a leading `~/` to the user's home directory. Delegates to - /// [`crate::openhuman::config::expand_tilde`] — the single source of truth — - /// so policy and config expand paths byte-for-byte identically (and both - /// produce platform-native separators; see issue #3353). - fn expand_tilde(&self, path: &str) -> String { - crate::openhuman::config::expand_tilde(path) - } - - /// String-only path check. Does NOT resolve symlinks. - /// Use `validate_path()` for any path that will be used for file I/O. - pub fn is_path_string_allowed(&self, path: &str) -> bool { - // Block null bytes (can truncate paths in C-backed syscalls) - if path.contains('\0') { - return false; - } - - // Block path traversal: check for ".." as a path component - if Path::new(path) - .components() - .any(|c| matches!(c, std::path::Component::ParentDir)) - { - return false; - } - - // Block URL-encoded traversal attempts (e.g. ..%2f) - let lower = path.to_lowercase(); - if lower.contains("..%2f") || lower.contains("%2f..") { - return false; - } - - // Expand tilde for comparison - let expanded = self.expand_tilde(path); - let expanded_path = Path::new(&expanded); - - // Credential stores are never reachable, even via a trusted-root grant. - if Self::is_always_forbidden(expanded_path) { - return false; - } - - // A trusted root grants access to its subtree, taking precedence over - // workspace_only and forbidden_paths. Read-vs-write is enforced by the - // operation-specific validators (validate_path / validate_parent_path). - let in_trusted_root = self.is_within_trusted_root(expanded_path, false); - - // Block agent access to internal state paths under workspace_dir - // (unless the path falls under an explicitly granted trusted root). - if !in_trusted_root { - let check = if expanded_path.is_absolute() { - expanded_path.to_path_buf() - } else { - self.workspace_dir.join(expanded_path) - }; - if self.is_workspace_internal_path(&check) { - log::trace!( - "[security:policy] path blocked: agent access to workspace-internal state (requested={}, resolved={})", - path, - check.display() - ); - return false; - } - } - - // Block absolute paths when workspace_only is set (unless trusted-rooted). - if self.workspace_only && expanded_path.is_absolute() && !in_trusted_root { - return false; - } - - // Block forbidden paths using path-component-aware matching (unless trusted-rooted). - if !in_trusted_root { - for forbidden in &self.forbidden_paths { - let forbidden_expanded = self.expand_tilde(forbidden); - let forbidden_path = Path::new(&forbidden_expanded); - if expanded_path.starts_with(forbidden_path) { - return false; - } - } - } - - // Symlink-safe check (#1927). The string-level checks above can be - // bypassed by creating a symlink inside the workspace that points to - // a forbidden tree (e.g. `evil -> /etc/shadow`). Canonicalize the - // path and re-validate `workspace_only` containment + forbidden_paths - // against the resolved location. - if let Some(canonical) = self.try_canonicalize_under_workspace(path) { - if Self::is_always_forbidden(&canonical) { - return false; - } - let workspace_root = self - .workspace_dir - .canonicalize() - .unwrap_or_else(|_| self.workspace_dir.clone()); - let canonical_in_trusted = self.is_within_trusted_root(&canonical, false); - if self.workspace_only - && !canonical.starts_with(&workspace_root) - && !canonical_in_trusted - { - log::trace!( - "[security:policy] path blocked: symlink escapes workspace (requested={}, resolved={}, workspace={})", - path, - canonical.display(), - workspace_root.display() - ); - return false; - } - // If the resolved path stays inside the workspace, trust the - // workspace boundary over forbidden_paths — otherwise a workspace - // that lives under e.g. `/tmp` (common in tests and sandboxes) - // would block every legitimate access. forbidden_paths is meant - // to catch escapes *outside* the workspace, which the workspace - // containment check above already validates. - let inside_workspace = canonical.starts_with(&workspace_root); - if !inside_workspace && !canonical_in_trusted { - for forbidden in &self.forbidden_paths { - let forbidden_expanded = if let Some(stripped) = forbidden.strip_prefix("~/") { - std::env::var("HOME") - .ok() - .map(|h| PathBuf::from(h).join(stripped)) - .unwrap_or_else(|| PathBuf::from(forbidden)) - } else { - PathBuf::from(forbidden) - }; - let forbidden_canonical = forbidden_expanded - .canonicalize() - .unwrap_or(forbidden_expanded); - if canonical.starts_with(&forbidden_canonical) { - log::trace!( - "[security:policy] path blocked: symlink resolves to forbidden tree (requested={}, resolved={}, forbidden={})", - path, - canonical.display(), - forbidden_canonical.display() - ); - return false; - } - } - } - } - - true - } - - /// Resolve a user-supplied path under the workspace, canonicalizing it - /// (or its parent) when present on disk. Used by [`Self::is_path_string_allowed`] - /// to defend against symlink-based escapes that pass the string-level - /// checks. Returns `None` only when neither the path nor its parent can - /// be resolved on disk — in that case the caller falls back to the - /// string-level checks alone (which is the safe default for fresh paths - /// whose entire chain does not yet exist). - fn try_canonicalize_under_workspace(&self, path: &str) -> Option { - let expanded = if let Some(stripped) = path.strip_prefix("~/") { - std::env::var("HOME") - .ok() - .map(|h| PathBuf::from(h).join(stripped))? - } else { - PathBuf::from(path) - }; - let absolute = if expanded.is_absolute() { - expanded - } else { - self.workspace_dir.join(&expanded) - }; - if let Ok(canonical) = absolute.canonicalize() { - return Some(canonical); - } - // Path itself does not exist (e.g. a write-to-new-file call). Try - // canonicalizing the parent + appending the basename so we still - // catch parent chains that resolve via symlink to a forbidden tree. - let parent = absolute.parent()?; - let name = absolute.file_name()?; - parent.canonicalize().ok().map(|p| p.join(name)) - } - - /// Return the canonical form of `workspace_dir`, hydrating the - /// `canonical_workspace` cache on the first call. - /// - /// `validate_path` / `validate_parent_path` both need the canonical - /// workspace root for forbidden-path containment checks. The underlying - /// `tokio::fs::canonicalize` is a `stat(2)` + symlink walk and was - /// previously invoked on every call with the same input. - /// - /// Falls back to the raw `workspace_dir` if `canonicalize` fails (e.g. - /// during early startup or in tests where the workspace doesn't exist on - /// disk), matching the inline behavior the callers used before the cache. - async fn workspace_root(&self) -> PathBuf { - self.canonical_workspace - .get_or_init(|| async { - tokio::fs::canonicalize(&self.workspace_dir) - .await - .unwrap_or_else(|_| self.workspace_dir.clone()) - }) - .await - .clone() - } - - /// Validate a path for file I/O: string checks, canonicalize, workspace containment, - /// and forbidden-path check on the resolved path. - /// Returns the canonical `PathBuf` on success. - pub async fn validate_path(&self, path: &str) -> Result { - if !self.is_path_string_allowed(path) { - return Err(format!( - "{POLICY_BLOCKED_MARKER} Path not allowed by security policy: {path}. Do not \ - retry this path; use an allowed location (the workspace or a granted folder)." - )); - } - let expanded = self.expand_tilde(path); - let full_path = if Path::new(&expanded).is_absolute() { - PathBuf::from(&expanded) - } else { - self.action_dir.join(&expanded) - }; - let resolved = tokio::fs::canonicalize(&full_path) - .await - .map_err(|e| format!("Failed to resolve path '{path}': {e}"))?; - if !self.is_resolved_path_allowed_for(&resolved, false) { - return Err(format!( - "{POLICY_BLOCKED_MARKER} Resolved path escapes workspace: {}", - resolved.display() - )); - } - let workspace_root = self.workspace_root().await; - self.check_resolved_against_forbidden(&resolved, &workspace_root)?; - log::debug!( - "[security] validate_path: '{}' resolved to '{}'", - path, - resolved.display() - ); - Ok(resolved) - } - - /// Like `validate_path` but canonicalizes the parent directory. - /// Use for write operations where the target file may not yet exist. - /// Does NOT require the parent directory to exist — walks up to the deepest - /// existing ancestor and checks that for symlink escapes. - /// Returns the canonical full path (parent resolved + filename appended). - pub async fn validate_parent_path(&self, path: &str) -> Result { - if !self.is_path_string_allowed(path) { - return Err(format!( - "{POLICY_BLOCKED_MARKER} Path not allowed by security policy: {path}. Do not \ - retry this path; use an allowed location (the workspace or a granted folder)." - )); - } - let expanded = self.expand_tilde(path); - let full_path = if Path::new(&expanded).is_absolute() { - PathBuf::from(&expanded) - } else { - self.action_dir.join(&expanded) - }; - let parent = full_path - .parent() - .ok_or_else(|| format!("Invalid path (no parent): {path}"))?; - let file_name = full_path - .file_name() - .ok_or_else(|| format!("Invalid path (no filename): {path}"))?; - - // Walk up to the deepest existing ancestor so we can canonicalize without - // requiring the full parent path to exist yet. This catches symlink escapes - // in existing path components even when deeper dirs are not created yet. - let mut existing_ancestor = parent.to_path_buf(); - loop { - if existing_ancestor.exists() { - break; - } - match existing_ancestor.parent() { - Some(p) => existing_ancestor = p.to_path_buf(), - None => break, - } - } - let canonical_ancestor = tokio::fs::canonicalize(&existing_ancestor) - .await - .map_err(|e| format!("Failed to resolve parent of '{path}': {e}"))?; - if !self.is_resolved_path_allowed_for(&canonical_ancestor, true) { - return Err(format!( - "{POLICY_BLOCKED_MARKER} Resolved parent path escapes workspace: {}", - canonical_ancestor.display() - )); - } - - // Build resolved result: canonical_ancestor + suffix from existing_ancestor to parent + filename. - // Since is_path_string_allowed blocked "..", all components between the ancestor - // and the intended parent are newly created dirs — no symlinks possible there. - let relative_suffix = parent - .strip_prefix(&existing_ancestor) - .unwrap_or(std::path::Path::new("")); - let resolved_parent = canonical_ancestor.join(relative_suffix); - let result = resolved_parent.join(file_name); - - let workspace_root = self.workspace_root().await; - self.check_resolved_against_forbidden(&canonical_ancestor, &workspace_root)?; - self.check_resolved_against_forbidden(&result, &workspace_root)?; - - log::debug!( - "[security] validate_parent_path: '{}' resolved parent to '{}'", - path, - resolved_parent.display() - ); - Ok(result) - } - - /// Returns `true` if `path` falls under one of the internal-state - /// subdirectories or files within `workspace_dir`. Agent tools must not - /// write to these locations — they contain memory DBs, session transcripts, - /// tokens, and other core persistence that is not part of the agent's - /// action surface. - pub fn is_workspace_internal_path(&self, path: &Path) -> bool { - // Try canonical forms first (handles symlinks), fall back to raw paths - // when they don't exist on disk yet. - let ws_canonical = self.workspace_dir.canonicalize(); - let path_canonical = path.canonicalize(); - let (ws, check_path) = match (&ws_canonical, &path_canonical) { - (Ok(w), Ok(p)) => (w.as_path(), p.as_path()), - _ => (self.workspace_dir.as_path(), path), - }; - if !check_path.starts_with(ws) { - return false; - } - let relative = match check_path.strip_prefix(ws) { - Ok(r) => r, - Err(_) => return false, - }; - let first_component = match relative.components().next() { - Some(std::path::Component::Normal(s)) => s.to_string_lossy(), - _ => return false, - }; - if WORKSPACE_INTERNAL_DIRS - .iter() - .any(|d| *d == first_component.as_ref()) - { - return true; - } - // Check single-file entries (only if the relative path is exactly one component) - if relative.components().count() == 1 - && WORKSPACE_INTERNAL_FILES - .iter() - .any(|f| *f == first_component.as_ref()) - { - return true; - } - false - } - - /// Paths that remain blocked even when a `trusted_root` grant would - /// otherwise reach them — credential stores and core OS directories. A - /// grant on a parent must never expose SSH/GPG/AWS/keychain secrets, nor - /// open `/etc`, `C:\Windows`, `/System`, etc. Matching is **case-insensitive** - /// (Windows/macOS filesystems are), so `.SSH` / `C:\WINDOWS` cannot slip - /// through. Gray-area dirs (`/usr`, `/opt`, `/var`, `~/Library`) stay in the - /// user-overridable `forbidden_paths` instead, so a grant can still reach - /// e.g. `/usr/local/...`. - pub(crate) fn is_always_forbidden(path: &Path) -> bool { - // Normalize separators + case BEFORE splitting: a Windows backslash - // path is a single component on POSIX (and vice-versa), so we segment - // the normalized string rather than rely on `Path::components()`. - let lc_path = path - .to_string_lossy() - .to_ascii_lowercase() - .replace('\\', "/"); - let segments: Vec<&str> = lc_path.split('/').filter(|s| !s.is_empty()).collect(); - - // (a) Credential stores — matched by path segment, location-independent - // (catches e.g. `C:\Users\x\.ssh` and `~/Library/Keychains`). - const SENSITIVE_COMPONENTS: &[&str] = - &[".ssh", ".gnupg", ".aws", ".azure", ".kube", "keychains"]; - if segments.iter().any(|s| SENSITIVE_COMPONENTS.contains(s)) { - return true; - } - // Windows DPAPI / credential stores live under `…\Microsoft\{Protect, - // Credentials,Crypto,Vault}` — match the pair so the generic second - // name can't false-positive an unrelated project directory. - if segments.windows(2).any(|w| { - w[0] == "microsoft" && matches!(w[1], "protect" | "credentials" | "crypto" | "vault") - }) { - return true; - } - - // (b) Core OS directories — matched by absolute prefix. Unconditional, - // unlike the user-overridable `forbidden_paths`. - const SYSTEM_PREFIXES: &[&str] = &[ - // POSIX - "/etc", - "/root", - "/boot", - "/proc", - "/sys", - // macOS (note: /private is intentionally NOT blocked — macOS temp - // dirs and /etc canonicalize under /private/var and /private/etc). - "/system", - // Windows - "c:/windows", - "c:/program files", - "c:/program files (x86)", - "c:/programdata", - ]; - SYSTEM_PREFIXES - .iter() - .any(|p| lc_path == *p || lc_path.starts_with(&format!("{p}/"))) - } - - /// True if `path` is within a configured trusted root. When `require_write` - /// is set, only `ReadWrite` roots match. Never matches credential stores. - pub fn is_within_trusted_root(&self, path: &Path, require_write: bool) -> bool { - if Self::is_always_forbidden(path) { - return false; - } - self.trusted_roots.iter().any(|root| { - if require_write && root.access != TrustedAccess::ReadWrite { - return false; - } - let root_path = PathBuf::from(self.expand_tilde(&root.path)); - let canonical_root = root_path - .canonicalize() - .unwrap_or_else(|_| root_path.clone()); - path.starts_with(&root_path) || path.starts_with(&canonical_root) - }) - } - - /// Validate that a resolved path is still inside the workspace. - /// Call this AFTER joining `workspace_dir` + relative path and canonicalizing. - pub fn is_resolved_path_allowed(&self, resolved: &Path) -> bool { - self.is_resolved_path_allowed_for(resolved, false) - } - - /// Operation-aware resolved-path check: allowed when under the workspace, or - /// within a trusted root (write roots only when `require_write`). Prefers the - /// canonical workspace root so `/a/../b` style config paths don't misfire. - pub fn is_resolved_path_allowed_for(&self, resolved: &Path, require_write: bool) -> bool { - if Self::is_always_forbidden(resolved) { - return false; - } - let workspace_root = self - .workspace_dir - .canonicalize() - .unwrap_or_else(|_| self.workspace_dir.clone()); - resolved.starts_with(&workspace_root) - || self.is_within_trusted_root(resolved, require_write) - } - - /// Check `resolved` against every entry in `forbidden_paths`, resolving relative - /// entries against `workspace_root`. Absolute entries whose prefix IS the workspace - /// root are skipped — the workspace containment check already covers them. - fn check_resolved_against_forbidden( - &self, - resolved: &Path, - workspace_root: &Path, - ) -> Result<(), String> { - // Credential stores are never reachable, even via a trusted-root grant. - if Self::is_always_forbidden(resolved) { - return Err(format!( - "{POLICY_BLOCKED_MARKER} Resolved path is a protected credential store: {}", - resolved.display() - )); - } - // A trusted-root grant takes precedence over forbidden_paths for its subtree. - if self.is_within_trusted_root(resolved, false) { - return Ok(()); - } - for forbidden in &self.forbidden_paths { - let forbidden_path = PathBuf::from(self.expand_tilde(forbidden)); - let forbidden_resolved = if forbidden_path.is_absolute() { - if workspace_root.starts_with(&forbidden_path) { - continue; - } - forbidden_path - } else { - workspace_root.join(forbidden_path) - }; - if resolved.starts_with(&forbidden_resolved) { - return Err(format!( - "{POLICY_BLOCKED_MARKER} Resolved path is inside a forbidden directory: {}", - forbidden_resolved.display() - )); - } - } - Ok(()) - } - - /// Check if autonomy level permits any action at all - pub fn can_act(&self) -> bool { - self.autonomy != AutonomyLevel::ReadOnly - } - - /// Enforce policy for a tool operation. - /// - /// Read operations are always allowed by autonomy/rate gates. - /// Act operations require non-readonly autonomy and available action budget. - pub fn enforce_tool_operation( - &self, - operation: ToolOperation, - operation_name: &str, - ) -> Result<(), String> { - match operation { - ToolOperation::Read => Ok(()), - ToolOperation::Act => { - if !self.can_act() { - log::warn!( - "[openhuman:policy] Operation '{}' blocked: read-only mode", - operation_name - ); - return Err(format!( - "{POLICY_BLOCKED_MARKER} Security policy: read-only mode, cannot perform \ - '{operation_name}'. Do not retry; this tier blocks all write actions." - )); - } - - if !self.record_action() { - log::warn!( - "[openhuman:policy] Operation '{}' blocked: rate limit exceeded", - operation_name - ); - return Err(format!( - "Rate limit exceeded: action budget exhausted ({} actions/hour). Increase the limit in Settings -> Advanced -> Agent autonomy or wait for the rolling one-hour window to refill.", - self.max_actions_per_hour - )); - } - - log::debug!( - "[openhuman:policy] Operation '{}' allowed (actions: {}/{})", - operation_name, - self.tracker.count(), - self.max_actions_per_hour - ); - Ok(()) - } - } - } - - /// Record an action and check if the rate limit has been exceeded. - /// Returns `true` if the action is allowed, `false` if rate-limited. - pub fn record_action(&self) -> bool { - let count = self.tracker.record(); - count <= self.max_actions_per_hour as usize - } - - /// Check if the rate limit would be exceeded without recording. - pub fn is_rate_limited(&self) -> bool { - self.tracker.count() >= self.max_actions_per_hour as usize - } - - /// Build from config sections - pub fn from_config( - autonomy_config: &crate::openhuman::config::AutonomyConfig, - workspace_dir: &Path, - action_dir: &Path, - ) -> Self { - log::info!( - "[openhuman:policy] SecurityPolicy created: autonomy={:?}, workspace_only={}, allowed_cmds={}, max_actions/hr={}", - autonomy_config.level, - autonomy_config.workspace_only, - autonomy_config.allowed_commands.len(), - autonomy_config.max_actions_per_hour - ); - - // `auto_approve` is the user's "Always allow" allowlist: the - // `ApprovalGate` reads it via `live_policy::current()` and skips the - // interactive prompt for any tool named in it. Tier + `CommandClass` - // (and the unconditional read-only / forbidden-path / high-risk denials) - // still run *before* the gate, so the allowlist can only suppress the - // human prompt — it can never override a hard policy denial. - - // The default projects home (`~/OpenHuman/projects`) is always a - // read-write trusted root so the coding agent can create/edit projects - // there regardless of tier or `workspace_only`. Injected here — the one - // autonomy→policy chokepoint every session goes through — because the - // channels-startup injection is skipped on cores with no listening - // integrations (web-chat-only), and a freshly reloaded config wouldn't - // carry an in-memory edit anyway. A user-granted entry is left as-is. - let mut trusted_roots = autonomy_config.trusted_roots.clone(); - let projects_path = crate::openhuman::config::default_projects_dir() - .to_string_lossy() - .to_string(); - if !trusted_roots.iter().any(|r| r.path == projects_path) { - trusted_roots.push(TrustedRoot { - path: projects_path, - access: TrustedAccess::ReadWrite, - }); - } - - Self { - autonomy: autonomy_config.level, - workspace_dir: workspace_dir.to_path_buf(), - action_dir: action_dir.to_path_buf(), - workspace_only: autonomy_config.workspace_only, - allowed_commands: autonomy_config.allowed_commands.clone(), - forbidden_paths: autonomy_config.forbidden_paths.clone(), - max_actions_per_hour: autonomy_config.max_actions_per_hour, - max_cost_per_day_cents: autonomy_config.max_cost_per_day_cents, - require_approval_for_medium_risk: autonomy_config.require_approval_for_medium_risk, - block_high_risk_commands: autonomy_config.block_high_risk_commands, - trusted_roots, - allow_tool_install: autonomy_config.allow_tool_install, - auto_approve: autonomy_config.auto_approve.clone(), - tracker: ActionTracker::new(), - canonical_workspace: Arc::new(OnceCell::new()), - } - } -} - -/// Validate that a file path resolves within a given root directory. -/// Canonicalizes both paths and checks that the resolved candidate -/// starts with the root. Callers should check `.is_file()` first -/// to avoid errors on non-existent paths (normal missing-file case). -/// -/// Used to prevent path traversal in agent definition TOML files and -/// other user-controllable file references. -pub fn validate_path_within_root( - candidate: &std::path::Path, - root: &std::path::Path, -) -> Result { - let resolved_root = root - .canonicalize() - .map_err(|e| format!("workspace root: {e}"))?; - let resolved = candidate - .canonicalize() - .map_err(|e| format!("{}: {e}", candidate.display()))?; - if !resolved.starts_with(&resolved_root) { - return Err(format!( - "path escapes root: {} is not under {}", - resolved.display(), - resolved_root.display() - )); - } - Ok(resolved) -} - -#[cfg(test)] -#[path = "policy_tests.rs"] -mod tests; diff --git a/src/openhuman/security/policy/command_checks.rs b/src/openhuman/security/policy/command_checks.rs new file mode 100644 index 0000000000..8296834438 --- /dev/null +++ b/src/openhuman/security/policy/command_checks.rs @@ -0,0 +1,446 @@ +use crate::openhuman::util::floor_char_boundary; + +use super::policy_command::{ + classify_segment, command_basename, contains_unquoted_char, contains_unquoted_single_ampersand, + has_dangerous_env_prefix, has_hidden_execution, has_leading_env_assignment, + is_command_executor, normalized_command_name, skip_env_assignments, split_unquoted_segments, +}; +use super::types::{ + AutonomyLevel, CommandClass, CommandRiskLevel, GateDecision, SecurityPolicy, + POLICY_BLOCKED_MARKER, +}; + +impl SecurityPolicy { + /// Classify command risk. Any high-risk segment marks the whole command high. + pub fn command_risk_level(&self, command: &str) -> CommandRiskLevel { + let mut saw_medium = false; + + for segment in split_unquoted_segments(command) { + let cmd_part = skip_env_assignments(&segment); + let mut words = cmd_part.split_whitespace(); + let Some(base_raw) = words.next() else { + continue; + }; + + let base = normalized_command_name(base_raw); + + let args: Vec = words.map(|w| w.to_ascii_lowercase()).collect(); + let joined_segment = cmd_part.to_ascii_lowercase(); + + // High-risk = catastrophic / irreversible / privilege-escalating / + // system-control commands ONLY. Interpreters (python/bash/…), + // network tools (curl/wget/ssh/…), and ordinary rm/chmod/chown are + // deliberately NOT high-risk: they are routine for a coding agent and + // are treated as medium-risk below (prompted in Supervised, run in + // Full). This keeps "Full access" actually able to run code while + // still guarding the few irreversible / system-destroying commands. + if matches!( + base.as_str(), + "mkfs" + | "dd" + | "shutdown" + | "reboot" + | "halt" + | "poweroff" + | "sudo" + | "su" + | "mount" + | "umount" + | "iptables" + | "ufw" + | "firewall-cmd" + | "useradd" + | "userdel" + | "usermod" + | "passwd" + ) { + return CommandRiskLevel::High; + } + + if joined_segment.contains("rm -rf /") + || joined_segment.contains("rm -fr /") + || joined_segment.contains(":(){:|:&};:") + { + return CommandRiskLevel::High; + } + + // Medium-risk commands (state-changing, but not inherently destructive) + let medium = match base.as_str() { + "git" => args.first().is_some_and(|verb| { + matches!( + verb.as_str(), + "commit" + | "push" + | "reset" + | "clean" + | "rebase" + | "merge" + | "cherry-pick" + | "revert" + | "branch" + | "checkout" + | "switch" + | "tag" + ) + }), + "npm" | "pnpm" | "yarn" => args.first().is_some_and(|verb| { + matches!( + verb.as_str(), + "install" | "add" | "remove" | "uninstall" | "update" | "publish" + ) + }), + "cargo" => args.first().is_some_and(|verb| { + matches!( + verb.as_str(), + "add" | "remove" | "install" | "clean" | "publish" + ) + }), + "touch" | "mkdir" | "mv" | "cp" | "ln" | "rm" | "chmod" | "chown" | "curl" + | "wget" | "nc" | "ncat" | "netcat" | "scp" | "ssh" | "ftp" | "telnet" => true, + _ => false, + }; + + // Interpreters / code executors run arbitrary code — medium-risk + // (that is the job of a coding agent): prompted in Supervised, + // allowed in Full. They are no longer classified high-risk. + let medium = medium || is_command_executor(base.as_str()); + + saw_medium |= medium; + } + + if saw_medium { + CommandRiskLevel::Medium + } else { + CommandRiskLevel::Low + } + } + + /// Classify a shell command into a fail-closed [`CommandClass`]. The highest + /// class across all `;`/`|`/`&&`/`||`/newline-separated segments wins, and a + /// file redirect (`>`/`>>`) or `tee` lifts the class to at least `Write` no + /// matter how benign the base looks (`cat x > y` writes `y`). + /// + /// This is the deterministic floor the harness gate keys on; an LLM-declared + /// category may only *raise* it (`gate = max(rust_floor, llm_declared)`), + /// never lower it. + pub fn classify_command(&self, command: &str) -> CommandClass { + let mut class = CommandClass::Read; + for segment in split_unquoted_segments(command) { + let cmd_part = skip_env_assignments(&segment); + let mut words = cmd_part.split_whitespace(); + let Some(base_raw) = words.next() else { + continue; + }; + let base = normalized_command_name(base_raw); + let args: Vec = words.map(|w| w.to_ascii_lowercase()).collect(); + let joined = cmd_part.to_ascii_lowercase(); + class = class.max(classify_segment(&base, &args, &joined)); + } + // A redirect or `tee` writes a file regardless of the base command. + if contains_unquoted_char(command, '>') + || command + .split_whitespace() + .any(|w| w == "tee" || w.ends_with("/tee")) + { + class = class.max(CommandClass::Write); + } + class + } + + /// The gate decision for an acting tool call of `class` under this policy's + /// autonomy tier. The harness turns `Prompt` into an `ApprovalGate` + /// round-trip *before* the tool runs; `Block` is refused outright. + /// + /// Matrix: read-only allows only `Read`; ask-before-edit (`Supervised`) + /// prompts on every acting class; full runs `Read`/`Write` silently but + /// always prompts on `Network`/`Destructive`. + pub fn gate_decision(&self, class: CommandClass) -> GateDecision { + match self.autonomy { + AutonomyLevel::ReadOnly => match class { + CommandClass::Read => GateDecision::Allow, + _ => GateDecision::Block, + }, + AutonomyLevel::Supervised => match class { + CommandClass::Read => GateDecision::Allow, + _ => GateDecision::Prompt, + }, + AutonomyLevel::Full => match class { + CommandClass::Read | CommandClass::Write => GateDecision::Allow, + CommandClass::Network | CommandClass::Install | CommandClass::Destructive => { + GateDecision::Prompt + } + }, + } + } + + /// Defense-in-depth check for the harness-gated command flow (Option 2). + /// + /// The run / prompt / block decision is made by [`Self::gate_decision`] + + /// the process-global `ApprovalGate` (which prompts the human *before* + /// `execute()`), so by the time a tool calls this the command is either a + /// read or an already-approved act. This enforces what must still hold: + /// + /// - **Read-only**: only `Read`-class commands run (`Block` otherwise). + /// - **Supervised**: no *hidden execution* (command/process substitution, + /// backticks, background `&`) that could smuggle an unseen command past + /// the approval the human read. Plain redirects (`2>&1`, `> file`) and + /// pipes are fine here — `classify_command` already lifts redirects to + /// `Write` so the gate prompted on them, and the human approved the + /// literal command. Full is trusted and skips the structural guard. + /// + /// Returns the classified [`CommandClass`] on success. + pub fn check_gated_command(&self, command: &str) -> Result { + let class = self.classify_command(command); + if self.gate_decision(class) == GateDecision::Block { + return Err(format!( + "{POLICY_BLOCKED_MARKER} Security policy: read-only mode — only read commands are \ + permitted. Do not retry this command; use a read-only approach or report that it \ + cannot be done in this mode." + )); + } + if self.autonomy != AutonomyLevel::Full && has_hidden_execution(command) { + return Err(format!( + "{POLICY_BLOCKED_MARKER} Command blocked: command/process substitution ($(…), \ + <(…)), backticks, and background (&) are not allowed in this mode — they can run \ + a hidden command the approval prompt wouldn't show. Plain redirects like `2>&1` \ + are fine. Do not retry as-is; rewrite the command without these constructs." + )); + } + Ok(class) + } + + /// Parse an LLM-declared command category. This is an **escalate-only** + /// hint: callers combine it with the deterministic floor via + /// `classify_command(cmd).max(declared)`, so the model can *raise* the gate + /// (e.g. flag a `Write` as `Destructive` to request confirmation) but can + /// never lower what the runtime determined. Unknown / empty → `None`. + pub fn parse_declared_class(declared: &str) -> Option { + match declared.trim().to_ascii_lowercase().as_str() { + "read" => Some(CommandClass::Read), + "write" => Some(CommandClass::Write), + "network" => Some(CommandClass::Network), + "install" => Some(CommandClass::Install), + "destructive" => Some(CommandClass::Destructive), + _ => None, + } + } + + /// Validate full command execution policy (allowlist + risk gate). + pub fn validate_command_execution( + &self, + command: &str, + approved: bool, + ) -> Result { + if !self.is_command_allowed(command) { + // Truncate the command in BOTH the log and the Err return: the Err + // string is bubbled back to the frontend, and a full untruncated + // command can leak secrets in args (e.g. `curl -H "Authorization: + // Bearer …"`, `psql "postgres://user:pass@…"`). The 80-char cap + // matches the log truncation so a long base command with safe args + // still shows enough context to diagnose the block. + let truncated = &command[..floor_char_boundary(command, 80)]; + log::warn!( + "[openhuman:policy] Command blocked by allowlist: {}", + truncated + ); + return Err(format!( + "{POLICY_BLOCKED_MARKER} Command not allowed by security policy: {truncated}. \ + Do not retry this command; it is off the allowlist for this mode." + )); + } + + let risk = self.command_risk_level(command); + + if risk == CommandRiskLevel::High { + if self.block_high_risk_commands { + log::warn!( + "[openhuman:policy] High-risk command blocked: {}", + &command[..floor_char_boundary(command, 80)] + ); + return Err(format!( + "{POLICY_BLOCKED_MARKER} Command blocked: high-risk command is disallowed by \ + policy. Do not retry this command; choose a safer approach or report that it \ + cannot be done." + )); + } + if self.autonomy == AutonomyLevel::Supervised && !approved { + log::warn!( + "[openhuman:policy] High-risk command needs approval: {}", + &command[..floor_char_boundary(command, 80)] + ); + return Err( + "Command requires explicit approval (approved=true): high-risk operation" + .into(), + ); + } + } + + if risk == CommandRiskLevel::Medium + && self.autonomy == AutonomyLevel::Supervised + && self.require_approval_for_medium_risk + && !approved + { + log::info!( + "[openhuman:policy] Medium-risk command needs approval: {}", + &command[..floor_char_boundary(command, 80)] + ); + return Err( + "Command requires explicit approval (approved=true): medium-risk operation".into(), + ); + } + + log::debug!( + "[openhuman:policy] Command validated: risk={:?}, approved={}, cmd={}", + risk, + approved, + &command[..floor_char_boundary(command, 80)] + ); + Ok(risk) + } + + /// Check if a shell command is allowed. + /// + /// Validates the **entire** command string, not just the first word: + /// - Blocks subshell operators (`` ` ``, `$(`) that hide arbitrary execution + /// - Splits on command separators (`|`, `&&`, `||`, `;`, newlines) and + /// validates each sub-command against the allowlist + /// - Blocks single `&` background chaining (`&&` remains supported) + /// - Blocks output redirections (`>`, `>>`) that could write outside workspace + /// - Blocks dangerous arguments (e.g. `find -exec`, `git config`) + pub fn is_command_allowed(&self, command: &str) -> bool { + if self.autonomy == AutonomyLevel::ReadOnly { + return false; + } + + // Full access bypasses the command allowlist AND the structural guards + // (redirects, pipes, subshells, background) — a Full-access agent is + // trusted to run any command, including the `mkdir`/`node`/`python`/ + // redirect-using commands a coding workflow needs. The remaining safety + // net is `validate_command_execution`'s high-risk handling (still gated + // by `block_high_risk_commands`), plus path-level `forbidden_paths` and + // any configured sandbox. The allowlist + structural guards below stay + // in force for Supervised, which runs only curated commands. + if self.autonomy == AutonomyLevel::Full { + return true; + } + + // Block subshell/expansion operators — these allow hiding arbitrary + // commands inside an allowed command (e.g. `echo $(rm -rf /)`) + if command.contains('`') + || command.contains("$(") + || command.contains("${") + || command.contains("<(") + || command.contains(">(") + { + return false; + } + + // Block output redirections (`>`, `>>`) — they can write to arbitrary paths. + // Ignore quoted literals, e.g. `echo "a>b"`. + if contains_unquoted_char(command, '>') { + return false; + } + + // Block `tee` — it can write to arbitrary files, bypassing the + // redirect check above (e.g. `echo secret | tee /etc/crontab`) + if command + .split_whitespace() + .any(|w| w == "tee" || w.ends_with("/tee")) + { + return false; + } + + // Block background command chaining (`&`), which can hide extra + // sub-commands and outlive timeout expectations. Keep `&&` allowed. + if contains_unquoted_single_ampersand(command) { + return false; + } + + // Split on unquoted command separators and validate each sub-command. + let segments = split_unquoted_segments(command); + for segment in &segments { + // Reject ANY segment that prefixes the command with an env-var + // assignment, not just the known-dangerous names. Helper-style + // exec primitives (`GIT_SSH=./wrapper git ls-remote`, + // `SSH_ASKPASS=./prompt ssh user@host`, `LD_PRELOAD=./libx.so + // ls`, etc.) change which binary the allowed command actually + // resolves to — or change its behaviour via a hook — without + // any blocked command name ever appearing in the segment. The + // allowlist already names every command we want to permit, and + // none of those commands need an operator-set env var at + // invoke time, so the broader gate has no false-positive + // surface on the approved path. `has_dangerous_env_prefix` + // remains in the source for legacy tests and as the + // narrower-grained signal. + if has_leading_env_assignment(segment) || has_dangerous_env_prefix(segment) { + return false; + } + + // Strip leading env var assignments (e.g. FOO=bar cmd) + let cmd_part = skip_env_assignments(segment); + + let mut words = cmd_part.split_whitespace(); + let base_raw = words.next().unwrap_or(""); + let base_cmd = command_basename(base_raw); + + if base_cmd.is_empty() { + continue; + } + + if !self + .allowed_commands + .iter() + .any(|allowed| allowed == base_cmd) + { + return false; + } + + // Validate arguments for the command + let args: Vec = words.map(|w| w.to_ascii_lowercase()).collect(); + if !self.is_args_safe(base_cmd, &args) { + return false; + } + } + + // At least one command must be present + let has_cmd = segments.iter().any(|s| { + let s = skip_env_assignments(s.trim()); + s.split_whitespace().next().is_some_and(|w| !w.is_empty()) + }); + + has_cmd + } + + /// Check for dangerous arguments that allow sub-command execution. + fn is_args_safe(&self, base: &str, args: &[String]) -> bool { + let base = base.to_ascii_lowercase(); + if is_command_executor(base.as_str()) { + return false; + } + + match base.as_str() { + "find" => { + // -exec / -ok run a command per match. -execdir / -okdir do + // the same with the working directory set to the match's + // parent — same code-execution semantics, just with a + // different cwd, so they must be blocked alongside. + !args.iter().any(|arg| { + arg == "-exec" || arg == "-ok" || arg == "-execdir" || arg == "-okdir" + }) + } + "git" => { + // git config, alias, and -c can be used to set dangerous options + // (e.g. git config core.editor "rm -rf /") + !args.iter().any(|arg| { + arg == "config" + || arg.starts_with("config.") + || arg == "alias" + || arg.starts_with("alias.") + || arg == "-c" + }) + } + "date" => args.is_empty(), + _ => true, + } + } +} diff --git a/src/openhuman/security/policy/enforcement.rs b/src/openhuman/security/policy/enforcement.rs new file mode 100644 index 0000000000..46f6635b4b --- /dev/null +++ b/src/openhuman/security/policy/enforcement.rs @@ -0,0 +1,157 @@ +use std::path::Path; + +use super::types::{ + ActionTracker, AutonomyLevel, SecurityPolicy, ToolOperation, TrustedAccess, TrustedRoot, + POLICY_BLOCKED_MARKER, +}; +use std::sync::Arc; +use tokio::sync::OnceCell; + +impl SecurityPolicy { + /// Check if autonomy level permits any action at all + pub fn can_act(&self) -> bool { + self.autonomy != AutonomyLevel::ReadOnly + } + + /// Enforce policy for a tool operation. + /// + /// Read operations are always allowed by autonomy/rate gates. + /// Act operations require non-readonly autonomy and available action budget. + pub fn enforce_tool_operation( + &self, + operation: ToolOperation, + operation_name: &str, + ) -> Result<(), String> { + match operation { + ToolOperation::Read => Ok(()), + ToolOperation::Act => { + if !self.can_act() { + log::warn!( + "[openhuman:policy] Operation '{}' blocked: read-only mode", + operation_name + ); + return Err(format!( + "{POLICY_BLOCKED_MARKER} Security policy: read-only mode, cannot perform \ + '{operation_name}'. Do not retry; this tier blocks all write actions." + )); + } + + if !self.record_action() { + log::warn!( + "[openhuman:policy] Operation '{}' blocked: rate limit exceeded", + operation_name + ); + return Err(format!( + "Rate limit exceeded: action budget exhausted ({} actions/hour). Increase the limit in Settings -> Advanced -> Agent autonomy or wait for the rolling one-hour window to refill.", + self.max_actions_per_hour + )); + } + + log::debug!( + "[openhuman:policy] Operation '{}' allowed (actions: {}/{})", + operation_name, + self.tracker.count(), + self.max_actions_per_hour + ); + Ok(()) + } + } + } + + /// Record an action and check if the rate limit has been exceeded. + /// Returns `true` if the action is allowed, `false` if rate-limited. + pub fn record_action(&self) -> bool { + let count = self.tracker.record(); + count <= self.max_actions_per_hour as usize + } + + /// Check if the rate limit would be exceeded without recording. + pub fn is_rate_limited(&self) -> bool { + self.tracker.count() >= self.max_actions_per_hour as usize + } + + /// Build from config sections + pub fn from_config( + autonomy_config: &crate::openhuman::config::AutonomyConfig, + workspace_dir: &Path, + action_dir: &Path, + ) -> Self { + log::info!( + "[openhuman:policy] SecurityPolicy created: autonomy={:?}, workspace_only={}, allowed_cmds={}, max_actions/hr={}", + autonomy_config.level, + autonomy_config.workspace_only, + autonomy_config.allowed_commands.len(), + autonomy_config.max_actions_per_hour + ); + + // `auto_approve` is the user's "Always allow" allowlist: the + // `ApprovalGate` reads it via `live_policy::current()` and skips the + // interactive prompt for any tool named in it. Tier + `CommandClass` + // (and the unconditional read-only / forbidden-path / high-risk denials) + // still run *before* the gate, so the allowlist can only suppress the + // human prompt — it can never override a hard policy denial. + + // The default projects home (`~/OpenHuman/projects`) is always a + // read-write trusted root so the coding agent can create/edit projects + // there regardless of tier or `workspace_only`. Injected here — the one + // autonomy→policy chokepoint every session goes through — because the + // channels-startup injection is skipped on cores with no listening + // integrations (web-chat-only), and a freshly reloaded config wouldn't + // carry an in-memory edit anyway. A user-granted entry is left as-is. + let mut trusted_roots = autonomy_config.trusted_roots.clone(); + let projects_path = crate::openhuman::config::default_projects_dir() + .to_string_lossy() + .to_string(); + if !trusted_roots.iter().any(|r| r.path == projects_path) { + trusted_roots.push(TrustedRoot { + path: projects_path, + access: TrustedAccess::ReadWrite, + }); + } + + Self { + autonomy: autonomy_config.level, + workspace_dir: workspace_dir.to_path_buf(), + action_dir: action_dir.to_path_buf(), + workspace_only: autonomy_config.workspace_only, + allowed_commands: autonomy_config.allowed_commands.clone(), + forbidden_paths: autonomy_config.forbidden_paths.clone(), + max_actions_per_hour: autonomy_config.max_actions_per_hour, + max_cost_per_day_cents: autonomy_config.max_cost_per_day_cents, + require_approval_for_medium_risk: autonomy_config.require_approval_for_medium_risk, + block_high_risk_commands: autonomy_config.block_high_risk_commands, + trusted_roots, + allow_tool_install: autonomy_config.allow_tool_install, + auto_approve: autonomy_config.auto_approve.clone(), + tracker: ActionTracker::new(), + canonical_workspace: Arc::new(OnceCell::new()), + } + } +} + +/// Validate that a file path resolves within a given root directory. +/// Canonicalizes both paths and checks that the resolved candidate +/// starts with the root. Callers should check `.is_file()` first +/// to avoid errors on non-existent paths (normal missing-file case). +/// +/// Used to prevent path traversal in agent definition TOML files and +/// other user-controllable file references. +pub fn validate_path_within_root( + candidate: &std::path::Path, + root: &std::path::Path, +) -> Result { + let resolved_root = root + .canonicalize() + .map_err(|e| format!("workspace root: {e}"))?; + let resolved = candidate + .canonicalize() + .map_err(|e| format!("{}: {e}", candidate.display()))?; + if !resolved.starts_with(&resolved_root) { + return Err(format!( + "path escapes root: {} is not under {}", + resolved.display(), + resolved_root.display() + )); + } + Ok(resolved) +} diff --git a/src/openhuman/security/policy/mod.rs b/src/openhuman/security/policy/mod.rs new file mode 100644 index 0000000000..0a60ff3f1e --- /dev/null +++ b/src/openhuman/security/policy/mod.rs @@ -0,0 +1,21 @@ +mod command_checks; +mod enforcement; +mod path_checks; + +#[path = "policy_command.rs"] +mod policy_command; + +mod types; + +pub use enforcement::validate_path_within_root; +pub use types::{ + ActionTracker, AutonomyLevel, CommandClass, CommandRiskLevel, GateDecision, SecurityPolicy, + ToolOperation, TrustedAccess, TrustedRoot, POLICY_BLOCKED_MARKER, POLICY_DENIED_MARKER, +}; + +#[cfg(test)] +use std::path::{Path, PathBuf}; + +#[cfg(test)] +#[path = "policy_tests.rs"] +mod tests; diff --git a/src/openhuman/security/policy/path_checks.rs b/src/openhuman/security/policy/path_checks.rs new file mode 100644 index 0000000000..fb930b05d2 --- /dev/null +++ b/src/openhuman/security/policy/path_checks.rs @@ -0,0 +1,480 @@ +use std::path::{Path, PathBuf}; + +use super::types::{SecurityPolicy, TrustedAccess, POLICY_BLOCKED_MARKER}; +use super::types::{WORKSPACE_INTERNAL_DIRS, WORKSPACE_INTERNAL_FILES}; + +impl SecurityPolicy { + /// Expand a leading `~/` to the user's home directory. Delegates to + /// [`crate::openhuman::config::expand_tilde`] — the single source of truth — + /// so policy and config expand paths byte-for-byte identically (and both + /// produce platform-native separators; see issue #3353). + pub(super) fn expand_tilde(&self, path: &str) -> String { + crate::openhuman::config::expand_tilde(path) + } + + /// String-only path check. Does NOT resolve symlinks. + /// Use `validate_path()` for any path that will be used for file I/O. + pub fn is_path_string_allowed(&self, path: &str) -> bool { + // Block null bytes (can truncate paths in C-backed syscalls) + if path.contains('\0') { + return false; + } + + // Block path traversal: check for ".." as a path component + if Path::new(path) + .components() + .any(|c| matches!(c, std::path::Component::ParentDir)) + { + return false; + } + + // Block URL-encoded traversal attempts (e.g. ..%2f) + let lower = path.to_lowercase(); + if lower.contains("..%2f") || lower.contains("%2f..") { + return false; + } + + // Expand tilde for comparison + let expanded = self.expand_tilde(path); + let expanded_path = Path::new(&expanded); + + // Credential stores are never reachable, even via a trusted-root grant. + if Self::is_always_forbidden(expanded_path) { + return false; + } + + // A trusted root grants access to its subtree, taking precedence over + // workspace_only and forbidden_paths. Read-vs-write is enforced by the + // operation-specific validators (validate_path / validate_parent_path). + let in_trusted_root = self.is_within_trusted_root(expanded_path, false); + + // Block agent access to internal state paths under workspace_dir + // (unless the path falls under an explicitly granted trusted root). + if !in_trusted_root { + let check = if expanded_path.is_absolute() { + expanded_path.to_path_buf() + } else { + self.workspace_dir.join(expanded_path) + }; + if self.is_workspace_internal_path(&check) { + log::trace!( + "[security:policy] path blocked: agent access to workspace-internal state (requested={}, resolved={})", + path, + check.display() + ); + return false; + } + } + + // Block absolute paths when workspace_only is set (unless trusted-rooted). + if self.workspace_only && expanded_path.is_absolute() && !in_trusted_root { + return false; + } + + // Block forbidden paths using path-component-aware matching (unless trusted-rooted). + if !in_trusted_root { + for forbidden in &self.forbidden_paths { + let forbidden_expanded = self.expand_tilde(forbidden); + let forbidden_path = Path::new(&forbidden_expanded); + if expanded_path.starts_with(forbidden_path) { + return false; + } + } + } + + // Symlink-safe check (#1927). The string-level checks above can be + // bypassed by creating a symlink inside the workspace that points to + // a forbidden tree (e.g. `evil -> /etc/shadow`). Canonicalize the + // path and re-validate `workspace_only` containment + forbidden_paths + // against the resolved location. + if let Some(canonical) = self.try_canonicalize_under_workspace(path) { + if Self::is_always_forbidden(&canonical) { + return false; + } + let workspace_root = self + .workspace_dir + .canonicalize() + .unwrap_or_else(|_| self.workspace_dir.clone()); + let canonical_in_trusted = self.is_within_trusted_root(&canonical, false); + if self.workspace_only + && !canonical.starts_with(&workspace_root) + && !canonical_in_trusted + { + log::trace!( + "[security:policy] path blocked: symlink escapes workspace (requested={}, resolved={}, workspace={})", + path, + canonical.display(), + workspace_root.display() + ); + return false; + } + // If the resolved path stays inside the workspace, trust the + // workspace boundary over forbidden_paths — otherwise a workspace + // that lives under e.g. `/tmp` (common in tests and sandboxes) + // would block every legitimate access. forbidden_paths is meant + // to catch escapes *outside* the workspace, which the workspace + // containment check above already validates. + let inside_workspace = canonical.starts_with(&workspace_root); + if !inside_workspace && !canonical_in_trusted { + for forbidden in &self.forbidden_paths { + let forbidden_expanded = if let Some(stripped) = forbidden.strip_prefix("~/") { + std::env::var("HOME") + .ok() + .map(|h| PathBuf::from(h).join(stripped)) + .unwrap_or_else(|| PathBuf::from(forbidden)) + } else { + PathBuf::from(forbidden) + }; + let forbidden_canonical = forbidden_expanded + .canonicalize() + .unwrap_or(forbidden_expanded); + if canonical.starts_with(&forbidden_canonical) { + log::trace!( + "[security:policy] path blocked: symlink resolves to forbidden tree (requested={}, resolved={}, forbidden={})", + path, + canonical.display(), + forbidden_canonical.display() + ); + return false; + } + } + } + } + + true + } + + /// Resolve a user-supplied path under the workspace, canonicalizing it + /// (or its parent) when present on disk. Used by [`Self::is_path_string_allowed`] + /// to defend against symlink-based escapes that pass the string-level + /// checks. Returns `None` only when neither the path nor its parent can + /// be resolved on disk — in that case the caller falls back to the + /// string-level checks alone (which is the safe default for fresh paths + /// whose entire chain does not yet exist). + fn try_canonicalize_under_workspace(&self, path: &str) -> Option { + let expanded = if let Some(stripped) = path.strip_prefix("~/") { + std::env::var("HOME") + .ok() + .map(|h| PathBuf::from(h).join(stripped))? + } else { + PathBuf::from(path) + }; + let absolute = if expanded.is_absolute() { + expanded + } else { + self.workspace_dir.join(&expanded) + }; + if let Ok(canonical) = absolute.canonicalize() { + return Some(canonical); + } + // Path itself does not exist (e.g. a write-to-new-file call). Try + // canonicalizing the parent + appending the basename so we still + // catch parent chains that resolve via symlink to a forbidden tree. + let parent = absolute.parent()?; + let name = absolute.file_name()?; + parent.canonicalize().ok().map(|p| p.join(name)) + } + + /// Return the canonical form of `workspace_dir`, hydrating the + /// `canonical_workspace` cache on the first call. + /// + /// `validate_path` / `validate_parent_path` both need the canonical + /// workspace root for forbidden-path containment checks. The underlying + /// `tokio::fs::canonicalize` is a `stat(2)` + symlink walk and was + /// previously invoked on every call with the same input. + /// + /// Falls back to the raw `workspace_dir` if `canonicalize` fails (e.g. + /// during early startup or in tests where the workspace doesn't exist on + /// disk), matching the inline behavior the callers used before the cache. + pub(super) async fn workspace_root(&self) -> PathBuf { + self.canonical_workspace + .get_or_init(|| async { + tokio::fs::canonicalize(&self.workspace_dir) + .await + .unwrap_or_else(|_| self.workspace_dir.clone()) + }) + .await + .clone() + } + + /// Validate a path for file I/O: string checks, canonicalize, workspace containment, + /// and forbidden-path check on the resolved path. + /// Returns the canonical `PathBuf` on success. + pub async fn validate_path(&self, path: &str) -> Result { + if !self.is_path_string_allowed(path) { + return Err(format!( + "{POLICY_BLOCKED_MARKER} Path not allowed by security policy: {path}. Do not \ + retry this path; use an allowed location (the workspace or a granted folder)." + )); + } + let expanded = self.expand_tilde(path); + let full_path = if Path::new(&expanded).is_absolute() { + PathBuf::from(&expanded) + } else { + self.action_dir.join(&expanded) + }; + let resolved = tokio::fs::canonicalize(&full_path) + .await + .map_err(|e| format!("Failed to resolve path '{path}': {e}"))?; + if !self.is_resolved_path_allowed_for(&resolved, false) { + return Err(format!( + "{POLICY_BLOCKED_MARKER} Resolved path escapes workspace: {}", + resolved.display() + )); + } + let workspace_root = self.workspace_root().await; + self.check_resolved_against_forbidden(&resolved, &workspace_root)?; + log::debug!( + "[security] validate_path: '{}' resolved to '{}'", + path, + resolved.display() + ); + Ok(resolved) + } + + /// Like `validate_path` but canonicalizes the parent directory. + /// Use for write operations where the target file may not yet exist. + /// Does NOT require the parent directory to exist — walks up to the deepest + /// existing ancestor and checks that for symlink escapes. + /// Returns the canonical full path (parent resolved + filename appended). + pub async fn validate_parent_path(&self, path: &str) -> Result { + if !self.is_path_string_allowed(path) { + return Err(format!( + "{POLICY_BLOCKED_MARKER} Path not allowed by security policy: {path}. Do not \ + retry this path; use an allowed location (the workspace or a granted folder)." + )); + } + let expanded = self.expand_tilde(path); + let full_path = if Path::new(&expanded).is_absolute() { + PathBuf::from(&expanded) + } else { + self.action_dir.join(&expanded) + }; + let parent = full_path + .parent() + .ok_or_else(|| format!("Invalid path (no parent): {path}"))?; + let file_name = full_path + .file_name() + .ok_or_else(|| format!("Invalid path (no filename): {path}"))?; + + // Walk up to the deepest existing ancestor so we can canonicalize without + // requiring the full parent path to exist yet. This catches symlink escapes + // in existing path components even when deeper dirs are not created yet. + let mut existing_ancestor = parent.to_path_buf(); + loop { + if existing_ancestor.exists() { + break; + } + match existing_ancestor.parent() { + Some(p) => existing_ancestor = p.to_path_buf(), + None => break, + } + } + let canonical_ancestor = tokio::fs::canonicalize(&existing_ancestor) + .await + .map_err(|e| format!("Failed to resolve parent of '{path}': {e}"))?; + if !self.is_resolved_path_allowed_for(&canonical_ancestor, true) { + return Err(format!( + "{POLICY_BLOCKED_MARKER} Resolved parent path escapes workspace: {}", + canonical_ancestor.display() + )); + } + + // Build resolved result: canonical_ancestor + suffix from existing_ancestor to parent + filename. + // Since is_path_string_allowed blocked "..", all components between the ancestor + // and the intended parent are newly created dirs — no symlinks possible there. + let relative_suffix = parent + .strip_prefix(&existing_ancestor) + .unwrap_or(std::path::Path::new("")); + let resolved_parent = canonical_ancestor.join(relative_suffix); + let result = resolved_parent.join(file_name); + + let workspace_root = self.workspace_root().await; + self.check_resolved_against_forbidden(&canonical_ancestor, &workspace_root)?; + self.check_resolved_against_forbidden(&result, &workspace_root)?; + + log::debug!( + "[security] validate_parent_path: '{}' resolved parent to '{}'", + path, + resolved_parent.display() + ); + Ok(result) + } + + /// Returns `true` if `path` falls under one of the internal-state + /// subdirectories or files within `workspace_dir`. Agent tools must not + /// write to these locations — they contain memory DBs, session transcripts, + /// tokens, and other core persistence that is not part of the agent's + /// action surface. + pub fn is_workspace_internal_path(&self, path: &Path) -> bool { + // Try canonical forms first (handles symlinks), fall back to raw paths + // when they don't exist on disk yet. + let ws_canonical = self.workspace_dir.canonicalize(); + let path_canonical = path.canonicalize(); + let (ws, check_path) = match (&ws_canonical, &path_canonical) { + (Ok(w), Ok(p)) => (w.as_path(), p.as_path()), + _ => (self.workspace_dir.as_path(), path), + }; + if !check_path.starts_with(ws) { + return false; + } + let relative = match check_path.strip_prefix(ws) { + Ok(r) => r, + Err(_) => return false, + }; + let first_component = match relative.components().next() { + Some(std::path::Component::Normal(s)) => s.to_string_lossy(), + _ => return false, + }; + if WORKSPACE_INTERNAL_DIRS + .iter() + .any(|d| *d == first_component.as_ref()) + { + return true; + } + // Check single-file entries (only if the relative path is exactly one component) + if relative.components().count() == 1 + && WORKSPACE_INTERNAL_FILES + .iter() + .any(|f| *f == first_component.as_ref()) + { + return true; + } + false + } + + /// Paths that remain blocked even when a `trusted_root` grant would + /// otherwise reach them — credential stores and core OS directories. A + /// grant on a parent must never expose SSH/GPG/AWS/keychain secrets, nor + /// open `/etc`, `C:\Windows`, `/System`, etc. Matching is **case-insensitive** + /// (Windows/macOS filesystems are), so `.SSH` / `C:\WINDOWS` cannot slip + /// through. Gray-area dirs (`/usr`, `/opt`, `/var`, `~/Library`) stay in the + /// user-overridable `forbidden_paths` instead, so a grant can still reach + /// e.g. `/usr/local/...`. + pub(crate) fn is_always_forbidden(path: &Path) -> bool { + // Normalize separators + case BEFORE splitting: a Windows backslash + // path is a single component on POSIX (and vice-versa), so we segment + // the normalized string rather than rely on `Path::components()`. + let lc_path = path + .to_string_lossy() + .to_ascii_lowercase() + .replace('\\', "/"); + let segments: Vec<&str> = lc_path.split('/').filter(|s| !s.is_empty()).collect(); + + // (a) Credential stores — matched by path segment, location-independent + // (catches e.g. `C:\Users\x\.ssh` and `~/Library/Keychains`). + const SENSITIVE_COMPONENTS: &[&str] = + &[".ssh", ".gnupg", ".aws", ".azure", ".kube", "keychains"]; + if segments.iter().any(|s| SENSITIVE_COMPONENTS.contains(s)) { + return true; + } + // Windows DPAPI / credential stores live under `…\Microsoft\{Protect, + // Credentials,Crypto,Vault}` — match the pair so the generic second + // name can't false-positive an unrelated project directory. + if segments.windows(2).any(|w| { + w[0] == "microsoft" && matches!(w[1], "protect" | "credentials" | "crypto" | "vault") + }) { + return true; + } + + // (b) Core OS directories — matched by absolute prefix. Unconditional, + // unlike the user-overridable `forbidden_paths`. + const SYSTEM_PREFIXES: &[&str] = &[ + // POSIX + "/etc", + "/root", + "/boot", + "/proc", + "/sys", + // macOS (note: /private is intentionally NOT blocked — macOS temp + // dirs and /etc canonicalize under /private/var and /private/etc). + "/system", + // Windows + "c:/windows", + "c:/program files", + "c:/program files (x86)", + "c:/programdata", + ]; + SYSTEM_PREFIXES + .iter() + .any(|p| lc_path == *p || lc_path.starts_with(&format!("{p}/"))) + } + + /// True if `path` is within a configured trusted root. When `require_write` + /// is set, only `ReadWrite` roots match. Never matches credential stores. + pub fn is_within_trusted_root(&self, path: &Path, require_write: bool) -> bool { + if Self::is_always_forbidden(path) { + return false; + } + self.trusted_roots.iter().any(|root| { + if require_write && root.access != TrustedAccess::ReadWrite { + return false; + } + let root_path = PathBuf::from(self.expand_tilde(&root.path)); + let canonical_root = root_path + .canonicalize() + .unwrap_or_else(|_| root_path.clone()); + path.starts_with(&root_path) || path.starts_with(&canonical_root) + }) + } + + /// Validate that a resolved path is still inside the workspace. + /// Call this AFTER joining `workspace_dir` + relative path and canonicalizing. + pub fn is_resolved_path_allowed(&self, resolved: &Path) -> bool { + self.is_resolved_path_allowed_for(resolved, false) + } + + /// Operation-aware resolved-path check: allowed when under the workspace, or + /// within a trusted root (write roots only when `require_write`). Prefers the + /// canonical workspace root so `/a/../b` style config paths don't misfire. + pub fn is_resolved_path_allowed_for(&self, resolved: &Path, require_write: bool) -> bool { + if Self::is_always_forbidden(resolved) { + return false; + } + let workspace_root = self + .workspace_dir + .canonicalize() + .unwrap_or_else(|_| self.workspace_dir.clone()); + resolved.starts_with(&workspace_root) + || self.is_within_trusted_root(resolved, require_write) + } + + /// Check `resolved` against every entry in `forbidden_paths`, resolving relative + /// entries against `workspace_root`. Absolute entries whose prefix IS the workspace + /// root are skipped — the workspace containment check already covers them. + pub(super) fn check_resolved_against_forbidden( + &self, + resolved: &Path, + workspace_root: &Path, + ) -> Result<(), String> { + // Credential stores are never reachable, even via a trusted-root grant. + if Self::is_always_forbidden(resolved) { + return Err(format!( + "{POLICY_BLOCKED_MARKER} Resolved path is a protected credential store: {}", + resolved.display() + )); + } + // A trusted-root grant takes precedence over forbidden_paths for its subtree. + if self.is_within_trusted_root(resolved, false) { + return Ok(()); + } + for forbidden in &self.forbidden_paths { + let forbidden_path = PathBuf::from(self.expand_tilde(forbidden)); + let forbidden_resolved = if forbidden_path.is_absolute() { + if workspace_root.starts_with(&forbidden_path) { + continue; + } + forbidden_path + } else { + workspace_root.join(forbidden_path) + }; + if resolved.starts_with(&forbidden_resolved) { + return Err(format!( + "{POLICY_BLOCKED_MARKER} Resolved path is inside a forbidden directory: {}", + forbidden_resolved.display() + )); + } + } + Ok(()) + } +} diff --git a/src/openhuman/security/policy_command.rs b/src/openhuman/security/policy/policy_command.rs similarity index 100% rename from src/openhuman/security/policy_command.rs rename to src/openhuman/security/policy/policy_command.rs diff --git a/src/openhuman/security/policy_tests.rs b/src/openhuman/security/policy/policy_tests.rs similarity index 100% rename from src/openhuman/security/policy_tests.rs rename to src/openhuman/security/policy/policy_tests.rs diff --git a/src/openhuman/security/policy/types.rs b/src/openhuman/security/policy/types.rs new file mode 100644 index 0000000000..1ce9c773d7 --- /dev/null +++ b/src/openhuman/security/policy/types.rs @@ -0,0 +1,356 @@ +use parking_lot::Mutex; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use std::path::PathBuf; +use std::sync::Arc; +use std::time::Instant; +use tokio::sync::OnceCell; + +/// Stable, machine-recognizable marker prefixing a **permanent** policy +/// rejection: the identical `(tool, args)` call can never succeed in the +/// current tier (read-only blocking a write, a forbidden/credential path, a +/// disallowed high-risk or hidden-execution command, an off-allowlist command). +/// The agent harness ([`crate::openhuman::agent::harness::tool_loop`]) detects +/// this and halts on the **first verbatim repeat** rather than reiterating a +/// provably-futile call. Kept short and bracketed so it survives the +/// `Error: …` wrapping the tool layer adds and is easy to grep in logs. +pub const POLICY_BLOCKED_MARKER: &str = "[policy-blocked]"; + +/// Stable marker prefixing a **this-turn denial** — the user answered "no" to +/// an approval prompt, or the prompt timed out / its channel dropped. Unlike a +/// block this isn't permanent across turns, but re-issuing the *same* call this +/// turn just re-prompts the user, so the harness records it in the circuit +/// breaker and stops the agent from re-asking the identical call. +pub const POLICY_DENIED_MARKER: &str = "[policy-denied]"; + +/// How much autonomy the agent has +#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize, JsonSchema)] +#[serde(rename_all = "lowercase")] +pub enum AutonomyLevel { + /// Read-only: can observe but not act + ReadOnly, + /// Supervised: acts but requires approval for risky operations + #[default] + Supervised, + /// Full: autonomous execution within policy bounds + Full, +} + +/// Access level granted to a trusted root outside the workspace. +#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize, JsonSchema)] +#[serde(rename_all = "lowercase")] +pub enum TrustedAccess { + /// Read + list only. + #[default] + Read, + /// Read and write/edit. + ReadWrite, +} + +/// A directory outside the workspace the agent is explicitly granted access to. +/// Takes precedence over `workspace_only` and `forbidden_paths` for its subtree, +/// except for credential stores (see `SecurityPolicy::is_always_forbidden`). +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)] +pub struct TrustedRoot { + /// Absolute path (a leading `~` is expanded to the user's home). + pub path: String, + /// Whether the agent may write within this root. + #[serde(default)] + pub access: TrustedAccess, +} + +/// Risk score for shell command execution. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum CommandRiskLevel { + Low, + Medium, + High, +} + +/// Coarse permission bucket the harness approval gate keys on. +/// +/// Classification is **fail-closed**: a command that is not provably read-only +/// (and not a recognized network/destructive command) is treated as at least +/// [`CommandClass::Write`]. Across multiple shell segments the **highest** class +/// wins (so `ls | curl …` is `Network`). Variants are ordered low→high so +/// [`Ord`] / [`Iterator::max`] compose them directly. +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +pub enum CommandClass { + /// Provably read-only / observational (curated safe-read allowlist). + Read, + /// State-changing but not inherently catastrophic — the fail-closed default + /// for anything not recognized as read/network/destructive. + Write, + /// Reaches the network (curl/wget/ssh/scp/…). Always prompts, every tier. + Network, + /// Installs an OS / language package (system package manager, or a *global* + /// npm/pnpm/yarn/cargo/pip install). Always-ask in every acting tier, + /// including Full — mirrors the dedicated `install_tool` gate so shell + /// installs can't slip past it. Project-local installs are ordinary `Write`. + Install, + /// Catastrophic / irreversible / privilege-escalating / system-control. + /// Always prompts, even in Full. + Destructive, +} + +/// What the harness should do with an acting tool call of a given +/// [`CommandClass`] under the session's [`AutonomyLevel`]. Computed by +/// [`SecurityPolicy::gate_decision`]; the harness translates `Prompt` into an +/// `ApprovalGate` round-trip *before* the tool's `execute()` runs. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum GateDecision { + /// Run without prompting. + Allow, + /// Require explicit human approval before running. + Prompt, + /// Refuse outright — no in-tier prompt can authorize it (e.g. any act in + /// read-only mode). + Block, +} + +/// Classifies whether a tool operation is read-only or side-effecting. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum ToolOperation { + Read, + Act, +} + +/// Sliding-window action tracker for rate limiting. +#[derive(Debug)] +pub struct ActionTracker { + /// Timestamps of recent actions (kept within the last hour). + actions: Mutex>, +} + +impl Default for ActionTracker { + fn default() -> Self { + Self::new() + } +} + +impl ActionTracker { + pub fn new() -> Self { + Self { + actions: Mutex::new(Vec::new()), + } + } + + /// Record an action and return the current count within the window. + pub fn record(&self) -> usize { + let mut actions = self.actions.lock(); + let cutoff = Instant::now() + .checked_sub(std::time::Duration::from_secs(3600)) + .unwrap_or_else(Instant::now); + actions.retain(|t| *t > cutoff); + actions.push(Instant::now()); + actions.len() + } + + /// Count of actions in the current window without recording. + pub fn count(&self) -> usize { + let mut actions = self.actions.lock(); + let cutoff = Instant::now() + .checked_sub(std::time::Duration::from_secs(3600)) + .unwrap_or_else(Instant::now); + actions.retain(|t| *t > cutoff); + actions.len() + } +} + +impl Clone for ActionTracker { + fn clone(&self) -> Self { + let actions = self.actions.lock(); + Self { + actions: Mutex::new(actions.clone()), + } + } +} + +/// Subdirectories under `workspace_dir` that hold internal application state +/// (memory DBs, sessions, tokens, etc.) and must not be writable by agent tools. +pub(super) const WORKSPACE_INTERNAL_DIRS: &[&str] = &[ + "memory", + "memory_tree", + "state", + "approval", + "sessions", + "session_raw", + "cron", + "devices", + "mcp_clients", + "subconscious", + "vault", + "task_sources", + "whatsapp_data", + "redirect_links", + "codegraph", + ".openhuman", +]; + +/// Files directly under `workspace_dir` that hold secrets or persona config +/// and must not be writable by agent tools. +pub(super) const WORKSPACE_INTERNAL_FILES: &[&str] = &[ + "core.token", + "dev-keychain.json", + ".env", + "SOUL.md", + "IDENTITY.md", + "HEARTBEAT.md", + "PROFILE.md", +]; + +/// Security policy enforced on all tool executions +#[derive(Debug, Clone)] +pub struct SecurityPolicy { + pub autonomy: AutonomyLevel, + pub workspace_dir: PathBuf, + /// Agent action sandbox root — tools resolve relative paths and default + /// their cwd here instead of `workspace_dir`. Kept separate so internal + /// state (memory DBs, sessions, tokens) under `workspace_dir` is not + /// reachable from agent tool calls. + pub action_dir: PathBuf, + pub workspace_only: bool, + pub allowed_commands: Vec, + pub forbidden_paths: Vec, + pub max_actions_per_hour: u32, + pub max_cost_per_day_cents: u32, + pub require_approval_for_medium_risk: bool, + pub block_high_risk_commands: bool, + /// Directories outside the workspace the agent may access (read or read-write). + pub trusted_roots: Vec, + /// Whether the agent may install OS packages via the `install_tool` tool. + pub allow_tool_install: bool, + /// Tool names the user has pre-approved ("Always allow"). The `ApprovalGate` + /// skips the interactive prompt for any tool in this set. Sourced from + /// `autonomy.auto_approve`; populated/cleared via `config.update_autonomy_settings` + /// (or an "Always allow" decision) and observed live via `live_policy`. + pub auto_approve: Vec, + pub tracker: ActionTracker, + /// Lazily-cached canonical form of [`workspace_dir`]. + /// + /// `validate_path` / `validate_parent_path` use the canonical workspace + /// root to check resolved paths against `forbidden_paths`. Without a cache + /// each call invokes `tokio::fs::canonicalize(&workspace_dir)` — one + /// `stat(2)` + symlink walk on the same path on every file op. A single + /// agent turn doing tens of read/edit/shell-path validations hits this + /// repeatedly with identical input. + /// + /// `workspace_dir` is effectively immutable for a given `SecurityPolicy` + /// (a config update builds a *new* policy via `from_config` and swaps the + /// `Arc` in [`live_policy`]), so caching the resolved value is safe and + /// stays correct across config updates. + /// + /// `Arc>` so the struct stays `Clone` (clone the `Arc`) and + /// init happens lazily on the first async call site without blocking + /// constructors. Fallback (raw `workspace_dir` if canonicalize fails) + /// matches the previous inline behavior exactly. + /// + /// Visibility is `pub` to match every other field on the struct: external + /// crates (Cargo examples, downstream consumers) construct + /// `SecurityPolicy` with the `..SecurityPolicy::default()` functional-update + /// spread, and Rust requires every field of the target struct to be + /// visible to the caller in that syntax — even fields supplied by the + /// default. `pub(crate)` was an over-tight first cut that broke + /// `examples/mouse_smoke.rs` with E0451. + pub canonical_workspace: Arc>, +} + +impl Default for SecurityPolicy { + fn default() -> Self { + Self { + autonomy: AutonomyLevel::Supervised, + workspace_dir: PathBuf::from("."), + action_dir: PathBuf::from("."), + workspace_only: true, + // When adding a new entry to this allowlist, re-audit + // `DANGEROUS_ENV_PREFIXES` (see below). Every newly-allowed binary + // may introduce its own env-driven subprocess hooks (pager, editor, + // loader override, SSH/diff helper, preprocessor) — those names + // must be added to the prefix denylist so that the + // `KEY=cmd ` shape cannot bypass allowlisting via + // `skip_env_assignments` in `is_command_allowed`. Cross-ref #2636. + allowed_commands: vec![ + // Version control + "git".into(), + // Package managers / build systems + "npm".into(), + "pnpm".into(), + "yarn".into(), + "cargo".into(), + "make".into(), + "cmake".into(), + // Directory / file inspection (read-only, low-risk) + "ls".into(), + "cat".into(), + "grep".into(), + "find".into(), + "echo".into(), + "pwd".into(), + "wc".into(), + "head".into(), + "tail".into(), + "date".into(), + "sort".into(), + "uniq".into(), + "diff".into(), + "which".into(), + "uname".into(), + "basename".into(), + "dirname".into(), + "tr".into(), + "cut".into(), + "realpath".into(), + "readlink".into(), + "stat".into(), + "file".into(), + // Filesystem mutations (medium-risk — require approval in Supervised mode) + "mkdir".into(), + "touch".into(), + "cp".into(), + "mv".into(), + "ln".into(), + // Windows read-only equivalents for the same basic + // inspection workflows as ls/cat/grep/which. + "dir".into(), + "type".into(), + "where".into(), + "findstr".into(), + "more".into(), + ], + forbidden_paths: vec![ + // System directories (blocked even when workspace_only=false) + "/etc".into(), + "/root".into(), + "/home".into(), + "/usr".into(), + "/bin".into(), + "/sbin".into(), + "/lib".into(), + "/opt".into(), + "/boot".into(), + "/dev".into(), + "/proc".into(), + "/sys".into(), + "/var".into(), + "/tmp".into(), + // Sensitive dotfiles + "~/.ssh".into(), + "~/.gnupg".into(), + "~/.aws".into(), + "~/.config".into(), + ], + // Effectively unlimited — matches AutonomyConfig::default_max_actions_per_hour(). + // The rate-limiter check is `count <= max`, so u32::MAX is functionally + // infinite without requiring an Option sentinel on the field type. + max_actions_per_hour: u32::MAX, + max_cost_per_day_cents: 500, + require_approval_for_medium_risk: true, + block_high_risk_commands: true, + trusted_roots: Vec::new(), + allow_tool_install: false, + auto_approve: Vec::new(), + tracker: ActionTracker::new(), + canonical_workspace: Arc::new(OnceCell::new()), + } + } +} diff --git a/src/openhuman/voice/factory.rs b/src/openhuman/voice/factory.rs deleted file mode 100644 index 6615c4760e..0000000000 --- a/src/openhuman/voice/factory.rs +++ /dev/null @@ -1,1279 +0,0 @@ -//! Factory functions for creating voice (STT / TTS) providers. -//! -//! Mirrors the shape of [`crate::openhuman::embeddings::factory`]: a single -//! entry point that takes a provider name + parameters and returns a boxed -//! trait object. Production paths pick the provider based on the user's -//! config (`stt_provider`, `tts_provider`); unit tests use the factory -//! directly to verify dispatch branches. -//! -//! ## Provider-string grammar -//! -//! Mirrors the LLM inference factory pattern in -//! [`crate::openhuman::inference::provider::factory`]: -//! -//! | String | Resolves to | -//! |-----------------------|------------------------------------------------| -//! | `"cloud"` / `"openhuman"` | OpenHuman backend proxy | -//! | `"whisper"` | Local Whisper (STT) | -//! | `"piper"` | Local Piper (TTS) | -//! | `":"` | Voice provider entry matched by slug | -//! | `""` | Bare slug — uses provider's default model/voice| -//! -//! ## STT providers -//! -//! - `"cloud"` → backend Whisper proxy (POST `/openai/v1/audio/transcriptions`). -//! - `"whisper"` → local Whisper via `WHISPER_BIN` (or in-process `whisper-rs`). -//! - `":"` → third-party STT API via the voice provider registry -//! (e.g. `"deepgram:nova-2"`, `"openai:whisper-1"`). -//! -//! ## TTS providers -//! -//! - `"cloud"` → backend ElevenLabs proxy (POST `/openai/v1/audio/speech`) -//! which also returns Oculus-15 visemes for the mascot lip-sync. -//! - `"piper"` → local Piper subprocess via `PIPER_BIN`. -//! - `":"` → third-party TTS API via the voice provider registry -//! (e.g. `"openai:alloy"`, `"elevenlabs:"`). -//! -//! ## Logging prefixes -//! -//! All factory branches log against `[voice-factory]`; the wrapped provider -//! implementations log under `[voice-stt]` / `[voice-tts]` so end-to-end -//! traces grep cleanly. - -use std::sync::Arc; - -use async_trait::async_trait; -use log::debug; -use serde::{Deserialize, Serialize}; - -use super::cloud_transcribe::{transcribe_cloud, CloudTranscribeOptions, CloudTranscribeResult}; -use super::local_speech::{synthesize_piper, PiperOptions}; -use super::local_transcribe::{transcribe_whisper, WhisperTranscribeOptions}; -use super::reply_speech::{synthesize_reply, ReplySpeechOptions, ReplySpeechResult}; -use crate::openhuman::config::schema::voice_providers::{ - SttApiStyle, TtsApiStyle, VoiceCapability, -}; -use crate::openhuman::config::Config; -use crate::rpc::RpcOutcome; - -const LOG_PREFIX: &str = "[voice-factory]"; - -// --------------------------------------------------------------------------- -// Provider traits -// --------------------------------------------------------------------------- - -/// Common shape both STT branches return after dispatch. Keeps the wire -/// contract identical regardless of provider — the UI only sees `text`. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct SttResult { - pub text: String, - /// Lowercase provider id (`"cloud"`, `"whisper"`) — exposed on the wire - /// so the renderer can show the user which path actually ran. - pub provider: String, -} - -/// Speech-to-text provider abstraction. Cloud (backend proxy) and Whisper -/// (local subprocess / in-process) both implement this; the factory hands -/// the caller a boxed trait object. -#[async_trait] -pub trait SttProvider: Send + Sync { - /// Stable identifier used in logs and config (`"cloud"`, `"whisper"`). - fn name(&self) -> &'static str; - - /// Transcribe a single base64-encoded audio blob. - /// - /// `mime_type` and `file_name` are hints; providers that don't care - /// may ignore them. `language` is BCP-47 (`"en"`, `"es"`); pass `None` - /// to let the provider auto-detect. - async fn transcribe( - &self, - config: &Config, - audio_base64: &str, - mime_type: Option<&str>, - file_name: Option<&str>, - language: Option<&str>, - ) -> Result, String>; -} - -/// Text-to-speech provider abstraction. Cloud returns rich viseme alignment -/// (used by the mascot lip-sync); Piper returns audio only and the caller -/// derives a flat viseme timeline downstream. -#[async_trait] -pub trait TtsProvider: Send + Sync { - fn name(&self) -> &'static str; - - /// Synthesize speech for `text`. Returns the same envelope shape as - /// `voice.reply_synthesize` so the renderer can swap providers without - /// branching on the response. - async fn synthesize( - &self, - config: &Config, - text: &str, - voice: Option<&str>, - ) -> Result, String>; -} - -// --------------------------------------------------------------------------- -// Cloud STT -// --------------------------------------------------------------------------- - -/// Cloud STT — wraps [`transcribe_cloud`]. Stateless; cheap to construct. -pub struct CloudSttProvider { - model: String, -} - -impl CloudSttProvider { - pub fn new(model: impl Into) -> Self { - Self { - model: model.into(), - } - } -} - -#[async_trait] -impl SttProvider for CloudSttProvider { - fn name(&self) -> &'static str { - "cloud" - } - - async fn transcribe( - &self, - config: &Config, - audio_base64: &str, - mime_type: Option<&str>, - file_name: Option<&str>, - language: Option<&str>, - ) -> Result, String> { - debug!( - "{LOG_PREFIX} cloud STT dispatch model={} bytes_b64={}", - self.model, - audio_base64.len() - ); - let opts = CloudTranscribeOptions { - model: Some(self.model.clone()), - language: language.map(str::to_string), - mime_type: mime_type.map(str::to_string), - file_name: file_name.map(str::to_string), - }; - let outcome = transcribe_cloud(config, audio_base64, &opts).await?; - let CloudTranscribeResult { text } = outcome.value; - Ok(RpcOutcome::single_log( - SttResult { - text, - provider: "cloud".to_string(), - }, - "voice-factory: cloud STT completed", - )) - } -} - -// --------------------------------------------------------------------------- -// Local Whisper STT -// --------------------------------------------------------------------------- - -/// Local Whisper STT — wraps [`transcribe_whisper`]. Resolves `WHISPER_BIN` -/// lazily on each call. -pub struct WhisperSttProvider { - model: String, -} - -impl WhisperSttProvider { - pub fn new(model: impl Into) -> Self { - Self { - model: model.into(), - } - } -} - -#[async_trait] -impl SttProvider for WhisperSttProvider { - fn name(&self) -> &'static str { - "whisper" - } - - async fn transcribe( - &self, - config: &Config, - audio_base64: &str, - mime_type: Option<&str>, - _file_name: Option<&str>, - language: Option<&str>, - ) -> Result, String> { - debug!( - "{LOG_PREFIX} whisper STT dispatch model={} mime={:?} lang={:?}", - self.model, mime_type, language - ); - let opts = WhisperTranscribeOptions { - model: Some(self.model.clone()), - mime_type: mime_type.map(str::to_string), - language: language.map(str::to_string), - }; - let outcome = transcribe_whisper(config, audio_base64, &opts).await?; - Ok(RpcOutcome::single_log( - SttResult { - text: outcome.value.text, - provider: "whisper".to_string(), - }, - "voice-factory: whisper STT completed", - )) - } -} - -// --------------------------------------------------------------------------- -// Cloud TTS -// --------------------------------------------------------------------------- - -/// Cloud TTS — wraps [`synthesize_reply`] (backend ElevenLabs proxy). -pub struct CloudTtsProvider { - voice: Option, -} - -impl CloudTtsProvider { - pub fn new(voice: Option) -> Self { - Self { voice } - } -} - -#[async_trait] -impl TtsProvider for CloudTtsProvider { - fn name(&self) -> &'static str { - "cloud" - } - - async fn synthesize( - &self, - config: &Config, - text: &str, - voice: Option<&str>, - ) -> Result, String> { - let resolved_voice = voice - .map(str::to_string) - .or_else(|| self.voice.clone()) - .filter(|s| !s.trim().is_empty()); - debug!( - "{LOG_PREFIX} cloud TTS dispatch voice={} chars={}", - resolved_voice.as_deref().unwrap_or(""), - text.len() - ); - let opts = ReplySpeechOptions { - voice_id: resolved_voice, - model_id: None, - output_format: None, - voice_settings: None, - }; - synthesize_reply(config, text, &opts).await - } -} - -// --------------------------------------------------------------------------- -// Local Piper TTS -// --------------------------------------------------------------------------- - -/// Local Piper TTS — wraps [`synthesize_piper`]. -pub struct PiperTtsProvider { - voice: String, -} - -impl PiperTtsProvider { - pub fn new(voice: impl Into) -> Self { - Self { - voice: voice.into(), - } - } -} - -#[async_trait] -impl TtsProvider for PiperTtsProvider { - fn name(&self) -> &'static str { - "piper" - } - - async fn synthesize( - &self, - config: &Config, - text: &str, - voice: Option<&str>, - ) -> Result, String> { - let resolved_voice = voice - .map(str::to_string) - .filter(|s| !s.trim().is_empty()) - .unwrap_or_else(|| self.voice.clone()); - debug!( - "{LOG_PREFIX} piper TTS dispatch voice={} chars={}", - resolved_voice, - text.len() - ); - let opts = PiperOptions { - voice: Some(resolved_voice), - }; - synthesize_piper(config, text, &opts).await - } -} - -// --------------------------------------------------------------------------- -// External STT provider (slug-keyed, third-party API) -// --------------------------------------------------------------------------- - -/// Third-party STT provider dispatched via the voice provider registry. -/// Supports OpenAI-compatible and Deepgram API styles. -pub struct ExternalSttProvider { - slug: String, - model: String, - endpoint: String, - api_key: String, - api_style: SttApiStyle, -} - -impl ExternalSttProvider { - pub fn new( - slug: impl Into, - model: impl Into, - endpoint: impl Into, - api_key: impl Into, - api_style: SttApiStyle, - ) -> Self { - Self { - slug: slug.into(), - model: model.into(), - endpoint: endpoint.into(), - api_key: api_key.into(), - api_style, - } - } -} - -#[async_trait] -impl SttProvider for ExternalSttProvider { - fn name(&self) -> &'static str { - "external" - } - - async fn transcribe( - &self, - _config: &Config, - audio_base64: &str, - mime_type: Option<&str>, - file_name: Option<&str>, - language: Option<&str>, - ) -> Result, String> { - debug!( - "{LOG_PREFIX} external STT dispatch slug={} model={} style={:?} bytes_b64={}", - self.slug, - self.model, - self.api_style, - audio_base64.len() - ); - - let audio_bytes = base64_decode(audio_base64)?; - let mime = mime_type.unwrap_or("audio/wav"); - - let result = match self.api_style { - SttApiStyle::OpenaiAudio => { - self.transcribe_openai_compat(&audio_bytes, mime, file_name, language) - .await? - } - SttApiStyle::Deepgram => { - self.transcribe_deepgram(&audio_bytes, mime, language) - .await? - } - }; - - Ok(RpcOutcome::single_log( - SttResult { - text: result, - provider: self.slug.clone(), - }, - &format!("voice-factory: external STT completed via {}", self.slug), - )) - } -} - -impl ExternalSttProvider { - async fn transcribe_openai_compat( - &self, - audio_bytes: &[u8], - mime: &str, - file_name: Option<&str>, - language: Option<&str>, - ) -> Result { - let url = format!( - "{}/audio/transcriptions", - self.endpoint.trim_end_matches('/') - ); - let ext = extension_for_mime(mime); - let default_fname = format!("audio.{ext}"); - let fname = file_name.unwrap_or(&default_fname); - - let file_part = reqwest::multipart::Part::bytes(audio_bytes.to_vec()) - .file_name(fname.to_string()) - .mime_str(mime) - .map_err(|e| format!("[voice-stt] mime error: {e}"))?; - - let mut form = reqwest::multipart::Form::new() - .text("model", self.model.clone()) - .part("file", file_part); - - if let Some(lang) = language { - form = form.text("language", lang.to_string()); - } - - let client = reqwest::Client::new(); - let resp = client - .post(&url) - .header("Authorization", format!("Bearer {}", self.api_key)) - .multipart(form) - .send() - .await - .map_err(|e| format!("[voice-stt] external STT request failed: {e}"))?; - - if !resp.status().is_success() { - let status = resp.status(); - let body = resp.text().await.unwrap_or_default(); - return Err(format!("[voice-stt] external STT error {status}: {body}")); - } - - #[derive(Deserialize)] - struct TranscriptionResp { - text: String, - } - let parsed: TranscriptionResp = resp - .json() - .await - .map_err(|e| format!("[voice-stt] failed to parse response: {e}"))?; - Ok(parsed.text) - } - - async fn transcribe_deepgram( - &self, - audio_bytes: &[u8], - mime: &str, - language: Option<&str>, - ) -> Result { - let mut url = format!( - "{}/listen?model={}", - self.endpoint.trim_end_matches('/'), - self.model - ); - if let Some(lang) = language { - url.push_str(&format!("&language={lang}")); - } - - let client = reqwest::Client::new(); - let resp = client - .post(&url) - .header("Authorization", format!("Token {}", self.api_key)) - .header("Content-Type", mime) - .body(audio_bytes.to_vec()) - .send() - .await - .map_err(|e| format!("[voice-stt] deepgram request failed: {e}"))?; - - if !resp.status().is_success() { - let status = resp.status(); - let body = resp.text().await.unwrap_or_default(); - return Err(format!("[voice-stt] deepgram error {status}: {body}")); - } - - #[derive(Deserialize)] - struct DeepgramChannel { - alternatives: Vec, - } - #[derive(Deserialize)] - struct DeepgramAlt { - transcript: String, - } - #[derive(Deserialize)] - struct DeepgramResult { - channels: Vec, - } - #[derive(Deserialize)] - struct DeepgramResp { - results: DeepgramResult, - } - - let parsed: DeepgramResp = resp - .json() - .await - .map_err(|e| format!("[voice-stt] deepgram parse error: {e}"))?; - - let text = parsed - .results - .channels - .first() - .and_then(|ch| ch.alternatives.first()) - .map(|a| a.transcript.clone()) - .unwrap_or_default(); - Ok(text) - } -} - -// --------------------------------------------------------------------------- -// External TTS provider (slug-keyed, third-party API) -// --------------------------------------------------------------------------- - -/// Third-party TTS provider dispatched via the voice provider registry. -/// Supports OpenAI-compatible and ElevenLabs API styles. -pub struct ExternalTtsProvider { - slug: String, - default_voice: String, - endpoint: String, - api_key: String, - api_style: TtsApiStyle, -} - -impl ExternalTtsProvider { - pub fn new( - slug: impl Into, - default_voice: impl Into, - endpoint: impl Into, - api_key: impl Into, - api_style: TtsApiStyle, - ) -> Self { - Self { - slug: slug.into(), - default_voice: default_voice.into(), - endpoint: endpoint.into(), - api_key: api_key.into(), - api_style, - } - } -} - -#[async_trait] -impl TtsProvider for ExternalTtsProvider { - fn name(&self) -> &'static str { - "external" - } - - async fn synthesize( - &self, - _config: &Config, - text: &str, - voice: Option<&str>, - ) -> Result, String> { - let resolved_voice = voice - .filter(|s| !s.trim().is_empty()) - .unwrap_or(&self.default_voice); - - debug!( - "{LOG_PREFIX} external TTS dispatch slug={} voice={} style={:?} chars={}", - self.slug, - resolved_voice, - self.api_style, - text.len() - ); - - let (audio_bytes, audio_mime) = match self.api_style { - TtsApiStyle::OpenaiAudio => self.synthesize_openai_compat(text, resolved_voice).await?, - TtsApiStyle::ElevenLabs => self.synthesize_elevenlabs(text, resolved_voice).await?, - }; - - use base64::Engine; - let audio_base64 = base64::engine::general_purpose::STANDARD.encode(&audio_bytes); - - Ok(RpcOutcome::single_log( - ReplySpeechResult { - audio_base64, - audio_mime, - visemes: Vec::new(), - alignment: None, - }, - &format!("voice-factory: external TTS completed via {}", self.slug), - )) - } -} - -impl ExternalTtsProvider { - async fn synthesize_openai_compat( - &self, - text: &str, - voice: &str, - ) -> Result<(Vec, String), String> { - let url = format!("{}/audio/speech", self.endpoint.trim_end_matches('/')); - - let body = serde_json::json!({ - "model": "tts-1", - "voice": voice, - "input": text, - }); - - let client = reqwest::Client::new(); - let resp = client - .post(&url) - .header("Authorization", format!("Bearer {}", self.api_key)) - .header("Content-Type", "application/json") - .body(body.to_string()) - .send() - .await - .map_err(|e| format!("[voice-tts] external TTS request failed: {e}"))?; - - if !resp.status().is_success() { - let status = resp.status(); - let body = resp.text().await.unwrap_or_default(); - return Err(format!("[voice-tts] external TTS error {status}: {body}")); - } - - let content_type = resp - .headers() - .get("content-type") - .and_then(|v| v.to_str().ok()) - .unwrap_or("audio/mpeg") - .to_string(); - - let bytes = resp - .bytes() - .await - .map_err(|e| format!("[voice-tts] failed to read audio: {e}"))?; - - Ok((bytes.to_vec(), content_type)) - } - - async fn synthesize_elevenlabs( - &self, - text: &str, - voice_id: &str, - ) -> Result<(Vec, String), String> { - let url = format!( - "{}/text-to-speech/{}", - self.endpoint.trim_end_matches('/'), - voice_id - ); - - let body = serde_json::json!({ - "text": text, - "model_id": "eleven_multilingual_v2", - }); - - let client = reqwest::Client::new(); - let resp = client - .post(&url) - .header("xi-api-key", &self.api_key) - .header("Content-Type", "application/json") - .body(body.to_string()) - .send() - .await - .map_err(|e| format!("[voice-tts] elevenlabs request failed: {e}"))?; - - if !resp.status().is_success() { - let status = resp.status(); - let body = resp.text().await.unwrap_or_default(); - return Err(format!("[voice-tts] elevenlabs error {status}: {body}")); - } - - let content_type = resp - .headers() - .get("content-type") - .and_then(|v| v.to_str().ok()) - .unwrap_or("audio/mpeg") - .to_string(); - - let bytes = resp - .bytes() - .await - .map_err(|e| format!("[voice-tts] failed to read elevenlabs audio: {e}"))?; - - Ok((bytes.to_vec(), content_type)) - } -} - -// --------------------------------------------------------------------------- -// Slug:model helpers -// --------------------------------------------------------------------------- - -/// Split a provider string into `(slug, model)`. -/// -/// `"deepgram:nova-2"` → `("deepgram", "nova-2")` -/// `"deepgram"` → `("deepgram", "")` -fn split_slug_model(s: &str) -> (&str, &str) { - match s.find(':') { - Some(pos) => (&s[..pos], &s[pos + 1..]), - None => (s, ""), - } -} - -/// Resolve the effective STT provider string from config. -/// -/// Precedence: `config.stt_provider` → `config.local_ai.stt_provider` → `"cloud"`. -pub fn effective_stt_provider(config: &Config) -> String { - config - .stt_provider - .as_deref() - .filter(|s| !s.trim().is_empty()) - .or_else(|| { - let legacy = config.local_ai.stt_provider.as_str(); - if legacy.trim().is_empty() { - None - } else { - Some(legacy) - } - }) - .unwrap_or("cloud") - .to_string() -} - -/// Resolve the effective TTS provider string from config. -/// -/// Precedence: `config.tts_provider` → `config.local_ai.tts_provider` → `"cloud"`. -pub fn effective_tts_provider(config: &Config) -> String { - config - .tts_provider - .as_deref() - .filter(|s| !s.trim().is_empty()) - .or_else(|| { - let legacy = config.local_ai.tts_provider.as_str(); - if legacy.trim().is_empty() { - None - } else { - Some(legacy) - } - }) - .unwrap_or("cloud") - .to_string() -} - -/// Create an STT provider by looking up a slug in `config.voice_providers`. -fn create_stt_provider_by_slug( - slug: &str, - model: &str, - config: &Config, -) -> anyhow::Result> { - let entry = config - .voice_providers - .iter() - .find(|p| p.slug == slug) - .ok_or_else(|| { - anyhow::anyhow!( - "no voice provider with slug '{}' found in voice_providers", - slug - ) - })?; - - if !entry.capability.supports_stt() { - return Err(anyhow::anyhow!( - "voice provider '{}' does not support STT (capability: {})", - slug, - entry.capability.as_str() - )); - } - - let effective_model = if model.trim().is_empty() { - entry.default_stt_model.as_deref().unwrap_or("default") - } else { - model - }; - - let api_key = crate::openhuman::inference::provider::factory::lookup_key_for_slug(slug, config) - .unwrap_or_default(); - - debug!( - "{LOG_PREFIX} creating external STT provider slug={slug} model={effective_model} \ - endpoint={} key_present={}", - entry.endpoint, - !api_key.is_empty() - ); - - Ok(Box::new(ExternalSttProvider::new( - slug, - effective_model, - &entry.endpoint, - api_key, - entry.stt_api_style, - ))) -} - -/// Create a TTS provider by looking up a slug in `config.voice_providers`. -fn create_tts_provider_by_slug( - slug: &str, - voice: &str, - config: &Config, -) -> anyhow::Result> { - let entry = config - .voice_providers - .iter() - .find(|p| p.slug == slug) - .ok_or_else(|| { - anyhow::anyhow!( - "no voice provider with slug '{}' found in voice_providers", - slug - ) - })?; - - if !entry.capability.supports_tts() { - return Err(anyhow::anyhow!( - "voice provider '{}' does not support TTS (capability: {})", - slug, - entry.capability.as_str() - )); - } - - let effective_voice = if voice.trim().is_empty() { - entry.default_tts_voice.as_deref().unwrap_or("default") - } else { - voice - }; - - let api_key = crate::openhuman::inference::provider::factory::lookup_key_for_slug(slug, config) - .unwrap_or_default(); - - debug!( - "{LOG_PREFIX} creating external TTS provider slug={slug} voice={effective_voice} \ - endpoint={} key_present={}", - entry.endpoint, - !api_key.is_empty() - ); - - Ok(Box::new(ExternalTtsProvider::new( - slug, - effective_voice, - &entry.endpoint, - api_key, - entry.tts_api_style, - ))) -} - -fn base64_decode(input: &str) -> Result, String> { - use base64::Engine; - base64::engine::general_purpose::STANDARD - .decode(input) - .map_err(|e| format!("[voice-factory] base64 decode error: {e}")) -} - -fn extension_for_mime(mime: &str) -> &str { - match mime { - "audio/wav" | "audio/x-wav" => "wav", - "audio/mpeg" | "audio/mp3" => "mp3", - "audio/ogg" => "ogg", - "audio/webm" => "webm", - "audio/flac" => "flac", - "audio/mp4" | "audio/m4a" => "m4a", - _ => "wav", - } -} - -// --------------------------------------------------------------------------- -// Factory entry points (mirrors embeddings/factory.rs) -// --------------------------------------------------------------------------- - -/// Creates a speech-to-text provider based on the specified name and model. -/// -/// Supported provider names: -/// - `"cloud"` → backend Whisper proxy — default, preferred for laptops -/// without local models -/// - `"whisper"` → local whisper.cpp via `WHISPER_BIN` (or in-process -/// `whisper-rs` when configured) -/// -/// Returns an error for unrecognised provider names so configuration -/// mistakes surface immediately rather than silently degrading. -/// -/// The factory does not eagerly resolve the binary — `WhisperSttProvider` -/// looks up `WHISPER_BIN` lazily inside `transcribe()` so a misconfigured -/// install fails at use-time with a clear error message instead of at -/// startup. -pub fn create_stt_provider( - provider: &str, - model: &str, - config: &Config, -) -> anyhow::Result> { - debug!("{LOG_PREFIX} create_stt_provider provider={provider} model={model}"); - let model = if model.trim().is_empty() { - DEFAULT_WHISPER_MODEL - } else { - model - }; - match provider.trim() { - "cloud" | "openhuman" => Ok(Box::new(CloudSttProvider::new( - super::cloud_transcribe_default_model(), - ))), - "whisper" => Ok(Box::new(WhisperSttProvider::new(model))), - other => { - let (slug, slug_model) = split_slug_model(other); - let effective_model = if slug_model.is_empty() { - model - } else { - slug_model - }; - create_stt_provider_by_slug(slug, effective_model, config) - } - } -} - -/// Creates a text-to-speech provider based on the specified name and voice. -/// -/// Supported provider names: -/// - `"cloud"` → backend ElevenLabs proxy with viseme alignment -/// - `"piper"` → local Piper subprocess via `PIPER_BIN` -/// -/// Kokoro is **not** implemented in this cut — the integration shipped with -/// Piper because `PIPER_BIN` is already reserved in `.env.example` and the -/// runtime contract (subprocess + `.onnx` model) is simpler. Adding Kokoro -/// later is straightforward: add a new branch here and a `local_speech_kokoro` -/// sibling module. -pub fn create_tts_provider( - provider: &str, - voice: &str, - config: &Config, -) -> anyhow::Result> { - debug!("{LOG_PREFIX} create_tts_provider provider={provider} voice={voice}"); - let voice = if voice.trim().is_empty() { - DEFAULT_PIPER_VOICE - } else { - voice - }; - match provider.trim() { - "cloud" | "openhuman" => Ok(Box::new(CloudTtsProvider::new(if voice.is_empty() { - None - } else { - Some(voice.to_string()) - }))), - "piper" => Ok(Box::new(PiperTtsProvider::new(voice))), - other => { - let (slug, slug_voice) = split_slug_model(other); - let effective_voice = if slug_voice.is_empty() { - voice - } else { - slug_voice - }; - create_tts_provider_by_slug(slug, effective_voice, config) - } - } -} - -/// Default Whisper model. `whisper-large-v3-turbo` is the recommended ship -/// default — best accuracy-to-latency tradeoff in the Whisper family (5× -/// faster than `large-v3` with comparable WER on English). Users on lower- -/// spec hardware can drop down to `medium` / `small` / `base` / `tiny` via -/// the install presets. -pub const DEFAULT_WHISPER_MODEL: &str = "whisper-large-v3-turbo"; - -/// Default Piper voice — `en_US-lessac-medium`, matches -/// [`super::super::local_ai::model_ids::effective_tts_voice_id`]. -pub const DEFAULT_PIPER_VOICE: &str = "en_US-lessac-medium"; - -/// Whisper install presets (size tiers exposed to the installer UI). -/// Mirrors the Ollama model installer surface: each entry is `(id, label)`. -pub const WHISPER_MODEL_PRESETS: &[(&str, &str)] = &[ - ("tiny", "Tiny (39 MB, fastest)"), - ("base", "Base (74 MB)"), - ("small", "Small (244 MB)"), - ("medium", "Medium (769 MB, recommended)"), - ("large-v3-turbo", "Large v3 Turbo (1.5 GB, best accuracy)"), -]; - -/// Returns a thread-safe default STT provider (cloud). Used by callers that -/// can't easily plumb a `Config` reference but still need a sensible default. -pub fn default_stt_provider() -> Arc { - Arc::new(CloudSttProvider::new( - super::cloud_transcribe_default_model(), - )) -} - -/// Returns a thread-safe default TTS provider (cloud). -pub fn default_tts_provider() -> Arc { - Arc::new(CloudTtsProvider::new(None)) -} - -// --------------------------------------------------------------------------- -// Tests -// --------------------------------------------------------------------------- - -#[cfg(test)] -mod tests { - use super::*; - - fn cfg() -> Config { - Config::default() - } - - #[test] - fn stt_factory_cloud_branch() { - let p = create_stt_provider("cloud", "ignored", &cfg()).unwrap(); - assert_eq!(p.name(), "cloud"); - } - - #[test] - fn stt_factory_whisper_branch() { - let p = create_stt_provider("whisper", "whisper-large-v3-turbo", &cfg()).unwrap(); - assert_eq!(p.name(), "whisper"); - } - - #[test] - fn stt_factory_whisper_empty_model_uses_default() { - // Empty model → default whisper-large-v3-turbo; constructor must not - // reject an empty string with an opaque error. - let p = create_stt_provider("whisper", "", &cfg()).unwrap(); - assert_eq!(p.name(), "whisper"); - } - - #[test] - fn stt_factory_openhuman_sentinel() { - let p = create_stt_provider("openhuman", "ignored", &cfg()).unwrap(); - assert_eq!(p.name(), "cloud"); - } - - #[test] - fn stt_factory_slug_without_registry_errors() { - let err = create_stt_provider("deepgram", "nova-2", &cfg()) - .err() - .expect("deepgram without registry entry must error"); - let msg = err.to_string(); - assert!(msg.contains("deepgram"), "should name the slug: {msg}"); - assert!( - msg.contains("no voice provider"), - "should explain missing: {msg}" - ); - } - - #[test] - fn stt_factory_slug_colon_model_resolves_with_registry() { - let mut config = cfg(); - config.voice_providers.push( - crate::openhuman::config::schema::voice_providers::VoiceProviderCreds { - slug: "deepgram".into(), - endpoint: "https://api.deepgram.com/v1".into(), - capability: VoiceCapability::Stt, - stt_api_style: SttApiStyle::Deepgram, - ..Default::default() - }, - ); - let p = create_stt_provider("deepgram:nova-2", "", &config).unwrap(); - assert_eq!(p.name(), "external"); - } - - #[test] - fn stt_factory_bare_slug_resolves_with_registry() { - let mut config = cfg(); - config.voice_providers.push( - crate::openhuman::config::schema::voice_providers::VoiceProviderCreds { - slug: "openai".into(), - endpoint: "https://api.openai.com/v1".into(), - capability: VoiceCapability::Both, - default_stt_model: Some("whisper-1".into()), - ..Default::default() - }, - ); - let p = create_stt_provider("openai", "", &config).unwrap(); - assert_eq!(p.name(), "external"); - } - - #[test] - fn stt_factory_tts_only_provider_rejects() { - let mut config = cfg(); - config.voice_providers.push( - crate::openhuman::config::schema::voice_providers::VoiceProviderCreds { - slug: "elevenlabs".into(), - endpoint: "https://api.elevenlabs.io/v1".into(), - capability: VoiceCapability::Tts, - ..Default::default() - }, - ); - let err = create_stt_provider("elevenlabs", "model", &config) - .err() - .expect("TTS-only provider must reject STT"); - assert!(err.to_string().contains("does not support STT")); - } - - #[test] - fn stt_factory_empty_string_errors() { - let err = create_stt_provider("", "model", &cfg()) - .err() - .expect("empty provider must error"); - assert!(err.to_string().contains("no voice provider")); - } - - #[test] - fn tts_factory_cloud_branch() { - let p = create_tts_provider("cloud", "Rachel", &cfg()).unwrap(); - assert_eq!(p.name(), "cloud"); - } - - #[test] - fn tts_factory_piper_branch() { - let p = create_tts_provider("piper", "en_US-lessac-medium", &cfg()).unwrap(); - assert_eq!(p.name(), "piper"); - } - - #[test] - fn tts_factory_piper_empty_voice_uses_default() { - let p = create_tts_provider("piper", "", &cfg()).unwrap(); - assert_eq!(p.name(), "piper"); - } - - #[test] - fn tts_factory_openhuman_sentinel() { - let p = create_tts_provider("openhuman", "alloy", &cfg()).unwrap(); - assert_eq!(p.name(), "cloud"); - } - - #[test] - fn tts_factory_slug_without_registry_errors() { - let err = create_tts_provider("kokoro", "af_bella", &cfg()) - .err() - .expect("kokoro without registry entry must error"); - let msg = err.to_string(); - assert!(msg.contains("kokoro"), "should name the slug: {msg}"); - assert!( - msg.contains("no voice provider"), - "should explain missing: {msg}" - ); - } - - #[test] - fn tts_factory_slug_colon_voice_resolves_with_registry() { - let mut config = cfg(); - config.voice_providers.push( - crate::openhuman::config::schema::voice_providers::VoiceProviderCreds { - slug: "openai".into(), - endpoint: "https://api.openai.com/v1".into(), - capability: VoiceCapability::Both, - default_tts_voice: Some("alloy".into()), - ..Default::default() - }, - ); - let p = create_tts_provider("openai:shimmer", "", &config).unwrap(); - assert_eq!(p.name(), "external"); - } - - #[test] - fn tts_factory_stt_only_provider_rejects() { - let mut config = cfg(); - config.voice_providers.push( - crate::openhuman::config::schema::voice_providers::VoiceProviderCreds { - slug: "deepgram".into(), - endpoint: "https://api.deepgram.com/v1".into(), - capability: VoiceCapability::Stt, - ..Default::default() - }, - ); - let err = create_tts_provider("deepgram", "voice", &config) - .err() - .expect("STT-only provider must reject TTS"); - assert!(err.to_string().contains("does not support TTS")); - } - - #[test] - fn whisper_presets_cover_full_size_ladder() { - // Sanity-check the installer surface: tiny→large-v3-turbo must all be - // exposed so the local-AI panel can render the size picker without - // hard-coding the list. - let ids: Vec<&str> = WHISPER_MODEL_PRESETS.iter().map(|(id, _)| *id).collect(); - for expected in ["tiny", "base", "small", "medium", "large-v3-turbo"] { - assert!( - ids.contains(&expected), - "WHISPER_MODEL_PRESETS missing {expected}" - ); - } - } - - #[tokio::test] - async fn whisper_provider_fails_clearly_when_binary_missing() { - // No WHISPER_BIN env, no model file — the provider must surface an - // actionable error rather than panic. Drive a small base64 payload - // so we never reach the actual transcription call. - let _guard = unset_env_guard("WHISPER_BIN"); - let provider = WhisperSttProvider::new("whisper-large-v3-turbo"); - let result = provider - .transcribe(&cfg(), "AAAA", Some("audio/wav"), None, None) - .await; - assert!(result.is_err(), "missing binary must error"); - let msg = result.err().unwrap(); - // Whatever the underlying message says, it must NOT be a serialize - // panic — i.e. we must have hit the binary-resolution branch. - assert!( - !msg.is_empty(), - "error message should be populated for diagnosis" - ); - } - - #[test] - fn default_providers_return_cloud() { - assert_eq!(default_stt_provider().name(), "cloud"); - assert_eq!(default_tts_provider().name(), "cloud"); - } - - // ── slug:model parsing ────────────────────────────────────────────── - - #[test] - fn split_slug_model_with_colon() { - assert_eq!(split_slug_model("deepgram:nova-2"), ("deepgram", "nova-2")); - } - - #[test] - fn split_slug_model_bare_slug() { - assert_eq!(split_slug_model("deepgram"), ("deepgram", "")); - } - - #[test] - fn split_slug_model_multiple_colons() { - assert_eq!(split_slug_model("custom:model:v2"), ("custom", "model:v2")); - } - - // ── effective provider resolution ─────────────────────────────────── - - #[test] - fn effective_stt_prefers_new_field() { - let mut config = cfg(); - config.stt_provider = Some("deepgram:nova-2".into()); - config.local_ai.stt_provider = "whisper".into(); - assert_eq!(effective_stt_provider(&config), "deepgram:nova-2"); - } - - #[test] - fn effective_stt_falls_back_to_legacy() { - let mut config = cfg(); - config.stt_provider = None; - config.local_ai.stt_provider = "whisper".into(); - assert_eq!(effective_stt_provider(&config), "whisper"); - } - - #[test] - fn effective_stt_defaults_to_cloud() { - let mut config = cfg(); - config.stt_provider = None; - config.local_ai.stt_provider = String::new(); - assert_eq!(effective_stt_provider(&config), "cloud"); - } - - #[test] - fn effective_tts_prefers_new_field() { - let mut config = cfg(); - config.tts_provider = Some("openai:alloy".into()); - config.local_ai.tts_provider = "piper".into(); - assert_eq!(effective_tts_provider(&config), "openai:alloy"); - } - - #[test] - fn effective_tts_falls_back_to_legacy() { - let mut config = cfg(); - config.tts_provider = None; - config.local_ai.tts_provider = "piper".into(); - assert_eq!(effective_tts_provider(&config), "piper"); - } - - #[test] - fn effective_tts_defaults_to_cloud() { - let config = cfg(); - assert_eq!(effective_tts_provider(&config), "cloud"); - } - - /// Drop guard that unsets an env var on construction and restores it on - /// drop. Necessary because cargo runs tests in parallel and bare - /// `remove_var` would leak across tests. - fn unset_env_guard(key: &'static str) -> EnvUnsetGuard { - let prev = std::env::var_os(key); - std::env::remove_var(key); - EnvUnsetGuard { key, prev } - } - - struct EnvUnsetGuard { - key: &'static str, - prev: Option, - } - impl Drop for EnvUnsetGuard { - fn drop(&mut self) { - match &self.prev { - Some(v) => std::env::set_var(self.key, v), - None => std::env::remove_var(self.key), - } - } - } -} diff --git a/src/openhuman/voice/factory/entry.rs b/src/openhuman/voice/factory/entry.rs new file mode 100644 index 0000000000..076d1e4d27 --- /dev/null +++ b/src/openhuman/voice/factory/entry.rs @@ -0,0 +1,131 @@ +//! Factory entry points: `create_stt_provider`, `create_tts_provider`, defaults, and constants. + +use std::sync::Arc; + +use log::debug; + +use super::helpers::{ + create_stt_provider_by_slug, create_tts_provider_by_slug, split_slug_model, LOG_PREFIX, +}; +use super::stt_providers::{CloudSttProvider, WhisperSttProvider}; +use super::traits::{SttProvider, TtsProvider}; +use super::tts_providers::{CloudTtsProvider, PiperTtsProvider}; +use crate::openhuman::config::Config; + +/// Default Whisper model. `whisper-large-v3-turbo` is the recommended ship +/// default — best accuracy-to-latency tradeoff in the Whisper family (5× +/// faster than `large-v3` with comparable WER on English). Users on lower- +/// spec hardware can drop down to `medium` / `small` / `base` / `tiny` via +/// the install presets. +pub const DEFAULT_WHISPER_MODEL: &str = "whisper-large-v3-turbo"; + +/// Default Piper voice — `en_US-lessac-medium`, matches +/// [`super::super::local_ai::model_ids::effective_tts_voice_id`]. +pub const DEFAULT_PIPER_VOICE: &str = "en_US-lessac-medium"; + +/// Whisper install presets (size tiers exposed to the installer UI). +/// Mirrors the Ollama model installer surface: each entry is `(id, label)`. +pub const WHISPER_MODEL_PRESETS: &[(&str, &str)] = &[ + ("tiny", "Tiny (39 MB, fastest)"), + ("base", "Base (74 MB)"), + ("small", "Small (244 MB)"), + ("medium", "Medium (769 MB, recommended)"), + ("large-v3-turbo", "Large v3 Turbo (1.5 GB, best accuracy)"), +]; + +/// Creates a speech-to-text provider based on the specified name and model. +/// +/// Supported provider names: +/// - `"cloud"` → backend Whisper proxy — default, preferred for laptops +/// without local models +/// - `"whisper"` → local whisper.cpp via `WHISPER_BIN` (or in-process +/// `whisper-rs` when configured) +/// +/// Returns an error for unrecognised provider names so configuration +/// mistakes surface immediately rather than silently degrading. +/// +/// The factory does not eagerly resolve the binary — `WhisperSttProvider` +/// looks up `WHISPER_BIN` lazily inside `transcribe()` so a misconfigured +/// install fails at use-time with a clear error message instead of at +/// startup. +pub fn create_stt_provider( + provider: &str, + model: &str, + config: &Config, +) -> anyhow::Result> { + debug!("{LOG_PREFIX} create_stt_provider provider={provider} model={model}"); + let model = if model.trim().is_empty() { + DEFAULT_WHISPER_MODEL + } else { + model + }; + match provider.trim() { + "cloud" | "openhuman" => Ok(Box::new(CloudSttProvider::new( + super::super::cloud_transcribe_default_model(), + ))), + "whisper" => Ok(Box::new(WhisperSttProvider::new(model))), + other => { + let (slug, slug_model) = split_slug_model(other); + let effective_model = if slug_model.is_empty() { + model + } else { + slug_model + }; + create_stt_provider_by_slug(slug, effective_model, config) + } + } +} + +/// Creates a text-to-speech provider based on the specified name and voice. +/// +/// Supported provider names: +/// - `"cloud"` → backend ElevenLabs proxy with viseme alignment +/// - `"piper"` → local Piper subprocess via `PIPER_BIN` +/// +/// Kokoro is **not** implemented in this cut — the integration shipped with +/// Piper because `PIPER_BIN` is already reserved in `.env.example` and the +/// runtime contract (subprocess + `.onnx` model) is simpler. Adding Kokoro +/// later is straightforward: add a new branch here and a `local_speech_kokoro` +/// sibling module. +pub fn create_tts_provider( + provider: &str, + voice: &str, + config: &Config, +) -> anyhow::Result> { + debug!("{LOG_PREFIX} create_tts_provider provider={provider} voice={voice}"); + let voice = if voice.trim().is_empty() { + DEFAULT_PIPER_VOICE + } else { + voice + }; + match provider.trim() { + "cloud" | "openhuman" => Ok(Box::new(CloudTtsProvider::new(if voice.is_empty() { + None + } else { + Some(voice.to_string()) + }))), + "piper" => Ok(Box::new(PiperTtsProvider::new(voice))), + other => { + let (slug, slug_voice) = split_slug_model(other); + let effective_voice = if slug_voice.is_empty() { + voice + } else { + slug_voice + }; + create_tts_provider_by_slug(slug, effective_voice, config) + } + } +} + +/// Returns a thread-safe default STT provider (cloud). Used by callers that +/// can't easily plumb a `Config` reference but still need a sensible default. +pub fn default_stt_provider() -> Arc { + Arc::new(CloudSttProvider::new( + super::super::cloud_transcribe_default_model(), + )) +} + +/// Returns a thread-safe default TTS provider (cloud). +pub fn default_tts_provider() -> Arc { + Arc::new(CloudTtsProvider::new(None)) +} diff --git a/src/openhuman/voice/factory/helpers.rs b/src/openhuman/voice/factory/helpers.rs new file mode 100644 index 0000000000..500fe7c890 --- /dev/null +++ b/src/openhuman/voice/factory/helpers.rs @@ -0,0 +1,196 @@ +//! Helper functions: slug parsing, config resolution, slug-keyed provider lookup. + +use log::debug; + +use super::stt_providers::ExternalSttProvider; +use super::traits::{SttProvider, TtsProvider}; +use super::tts_providers::ExternalTtsProvider; +use crate::openhuman::config::Config; + +pub(super) const LOG_PREFIX: &str = "[voice-factory]"; + +// --------------------------------------------------------------------------- +// Slug / model parsing +// --------------------------------------------------------------------------- + +/// Split a provider string into `(slug, model)`. +/// +/// `"deepgram:nova-2"` → `("deepgram", "nova-2")` +/// `"deepgram"` → `("deepgram", "")` +pub(super) fn split_slug_model(s: &str) -> (&str, &str) { + match s.find(':') { + Some(pos) => (&s[..pos], &s[pos + 1..]), + None => (s, ""), + } +} + +// --------------------------------------------------------------------------- +// Effective provider resolution +// --------------------------------------------------------------------------- + +/// Resolve the effective STT provider string from config. +/// +/// Precedence: `config.stt_provider` → `config.local_ai.stt_provider` → `"cloud"`. +pub fn effective_stt_provider(config: &Config) -> String { + config + .stt_provider + .as_deref() + .filter(|s| !s.trim().is_empty()) + .or_else(|| { + let legacy = config.local_ai.stt_provider.as_str(); + if legacy.trim().is_empty() { + None + } else { + Some(legacy) + } + }) + .unwrap_or("cloud") + .to_string() +} + +/// Resolve the effective TTS provider string from config. +/// +/// Precedence: `config.tts_provider` → `config.local_ai.tts_provider` → `"cloud"`. +pub fn effective_tts_provider(config: &Config) -> String { + config + .tts_provider + .as_deref() + .filter(|s| !s.trim().is_empty()) + .or_else(|| { + let legacy = config.local_ai.tts_provider.as_str(); + if legacy.trim().is_empty() { + None + } else { + Some(legacy) + } + }) + .unwrap_or("cloud") + .to_string() +} + +// --------------------------------------------------------------------------- +// Slug-keyed provider creation +// --------------------------------------------------------------------------- + +/// Create an STT provider by looking up a slug in `config.voice_providers`. +pub(super) fn create_stt_provider_by_slug( + slug: &str, + model: &str, + config: &Config, +) -> anyhow::Result> { + let entry = config + .voice_providers + .iter() + .find(|p| p.slug == slug) + .ok_or_else(|| { + anyhow::anyhow!( + "no voice provider with slug '{}' found in voice_providers", + slug + ) + })?; + + if !entry.capability.supports_stt() { + return Err(anyhow::anyhow!( + "voice provider '{}' does not support STT (capability: {})", + slug, + entry.capability.as_str() + )); + } + + let effective_model = if model.trim().is_empty() { + entry.default_stt_model.as_deref().unwrap_or("default") + } else { + model + }; + + let api_key = crate::openhuman::inference::provider::factory::lookup_key_for_slug(slug, config) + .unwrap_or_default(); + + debug!( + "{LOG_PREFIX} creating external STT provider slug={slug} model={effective_model} \ + endpoint={} key_present={}", + entry.endpoint, + !api_key.is_empty() + ); + + Ok(Box::new(ExternalSttProvider::new( + slug, + effective_model, + &entry.endpoint, + api_key, + entry.stt_api_style, + ))) +} + +/// Create a TTS provider by looking up a slug in `config.voice_providers`. +pub(super) fn create_tts_provider_by_slug( + slug: &str, + voice: &str, + config: &Config, +) -> anyhow::Result> { + let entry = config + .voice_providers + .iter() + .find(|p| p.slug == slug) + .ok_or_else(|| { + anyhow::anyhow!( + "no voice provider with slug '{}' found in voice_providers", + slug + ) + })?; + + if !entry.capability.supports_tts() { + return Err(anyhow::anyhow!( + "voice provider '{}' does not support TTS (capability: {})", + slug, + entry.capability.as_str() + )); + } + + let effective_voice = if voice.trim().is_empty() { + entry.default_tts_voice.as_deref().unwrap_or("default") + } else { + voice + }; + + let api_key = crate::openhuman::inference::provider::factory::lookup_key_for_slug(slug, config) + .unwrap_or_default(); + + debug!( + "{LOG_PREFIX} creating external TTS provider slug={slug} voice={effective_voice} \ + endpoint={} key_present={}", + entry.endpoint, + !api_key.is_empty() + ); + + Ok(Box::new(ExternalTtsProvider::new( + slug, + effective_voice, + &entry.endpoint, + api_key, + entry.tts_api_style, + ))) +} + +// --------------------------------------------------------------------------- +// Low-level utilities +// --------------------------------------------------------------------------- + +pub(super) fn base64_decode(input: &str) -> Result, String> { + use base64::Engine; + base64::engine::general_purpose::STANDARD + .decode(input) + .map_err(|e| format!("[voice-factory] base64 decode error: {e}")) +} + +pub(super) fn extension_for_mime(mime: &str) -> &str { + match mime { + "audio/wav" | "audio/x-wav" => "wav", + "audio/mpeg" | "audio/mp3" => "mp3", + "audio/ogg" => "ogg", + "audio/webm" => "webm", + "audio/flac" => "flac", + "audio/mp4" | "audio/m4a" => "m4a", + _ => "wav", + } +} diff --git a/src/openhuman/voice/factory/mod.rs b/src/openhuman/voice/factory/mod.rs new file mode 100644 index 0000000000..83392fda2a --- /dev/null +++ b/src/openhuman/voice/factory/mod.rs @@ -0,0 +1,60 @@ +//! Factory functions for creating voice (STT / TTS) providers. +//! +//! Mirrors the shape of [`crate::openhuman::embeddings::factory`]: a single +//! entry point that takes a provider name + parameters and returns a boxed +//! trait object. Production paths pick the provider based on the user's +//! config (`stt_provider`, `tts_provider`); unit tests use the factory +//! directly to verify dispatch branches. +//! +//! ## Provider-string grammar +//! +//! Mirrors the LLM inference factory pattern in +//! [`crate::openhuman::inference::provider::factory`]: +//! +//! | String | Resolves to | +//! |-----------------------|------------------------------------------------| +//! | `"cloud"` / `"openhuman"` | OpenHuman backend proxy | +//! | `"whisper"` | Local Whisper (STT) | +//! | `"piper"` | Local Piper (TTS) | +//! | `":"` | Voice provider entry matched by slug | +//! | `""` | Bare slug — uses provider's default model/voice| +//! +//! ## STT providers +//! +//! - `"cloud"` → backend Whisper proxy (POST `/openai/v1/audio/transcriptions`). +//! - `"whisper"` → local Whisper via `WHISPER_BIN` (or in-process `whisper-rs`). +//! - `":"` → third-party STT API via the voice provider registry +//! (e.g. `"deepgram:nova-2"`, `"openai:whisper-1"`). +//! +//! ## TTS providers +//! +//! - `"cloud"` → backend ElevenLabs proxy (POST `/openai/v1/audio/speech`) +//! which also returns Oculus-15 visemes for the mascot lip-sync. +//! - `"piper"` → local Piper subprocess via `PIPER_BIN`. +//! - `":"` → third-party TTS API via the voice provider registry +//! (e.g. `"openai:alloy"`, `"elevenlabs:"`). +//! +//! ## Logging prefixes +//! +//! All factory branches log against `[voice-factory]`; the wrapped provider +//! implementations log under `[voice-stt]` / `[voice-tts]` so end-to-end +//! traces grep cleanly. + +mod entry; +mod helpers; +mod stt_providers; +mod traits; +mod tts_providers; + +#[cfg(test)] +mod tests; + +// Re-export the public API — exact visibility preserved from the original file. +pub use entry::{ + create_stt_provider, create_tts_provider, default_stt_provider, default_tts_provider, + DEFAULT_PIPER_VOICE, DEFAULT_WHISPER_MODEL, WHISPER_MODEL_PRESETS, +}; +pub use helpers::{effective_stt_provider, effective_tts_provider}; +pub use stt_providers::{CloudSttProvider, ExternalSttProvider, WhisperSttProvider}; +pub use traits::{SttProvider, SttResult, TtsProvider}; +pub use tts_providers::{CloudTtsProvider, ExternalTtsProvider, PiperTtsProvider}; diff --git a/src/openhuman/voice/factory/stt_providers.rs b/src/openhuman/voice/factory/stt_providers.rs new file mode 100644 index 0000000000..9e6d9bae51 --- /dev/null +++ b/src/openhuman/voice/factory/stt_providers.rs @@ -0,0 +1,320 @@ +//! STT provider implementations: cloud, local Whisper, and external (slug-keyed). + +use async_trait::async_trait; +use log::debug; +use serde::Deserialize; + +use super::super::cloud_transcribe::{ + transcribe_cloud, CloudTranscribeOptions, CloudTranscribeResult, +}; +use super::super::local_transcribe::{transcribe_whisper, WhisperTranscribeOptions}; +use super::helpers::{base64_decode, extension_for_mime}; +use super::traits::{SttProvider, SttResult}; +use crate::openhuman::config::schema::voice_providers::SttApiStyle; +use crate::openhuman::config::Config; +use crate::rpc::RpcOutcome; + +const LOG_PREFIX: &str = "[voice-factory]"; + +// --------------------------------------------------------------------------- +// Cloud STT +// --------------------------------------------------------------------------- + +/// Cloud STT — wraps [`transcribe_cloud`]. Stateless; cheap to construct. +pub struct CloudSttProvider { + model: String, +} + +impl CloudSttProvider { + pub fn new(model: impl Into) -> Self { + Self { + model: model.into(), + } + } +} + +#[async_trait] +impl SttProvider for CloudSttProvider { + fn name(&self) -> &'static str { + "cloud" + } + + async fn transcribe( + &self, + config: &Config, + audio_base64: &str, + mime_type: Option<&str>, + file_name: Option<&str>, + language: Option<&str>, + ) -> Result, String> { + debug!( + "{LOG_PREFIX} cloud STT dispatch model={} bytes_b64={}", + self.model, + audio_base64.len() + ); + let opts = CloudTranscribeOptions { + model: Some(self.model.clone()), + language: language.map(str::to_string), + mime_type: mime_type.map(str::to_string), + file_name: file_name.map(str::to_string), + }; + let outcome = transcribe_cloud(config, audio_base64, &opts).await?; + let CloudTranscribeResult { text } = outcome.value; + Ok(RpcOutcome::single_log( + SttResult { + text, + provider: "cloud".to_string(), + }, + "voice-factory: cloud STT completed", + )) + } +} + +// --------------------------------------------------------------------------- +// Local Whisper STT +// --------------------------------------------------------------------------- + +/// Local Whisper STT — wraps [`transcribe_whisper`]. Resolves `WHISPER_BIN` +/// lazily on each call. +pub struct WhisperSttProvider { + model: String, +} + +impl WhisperSttProvider { + pub fn new(model: impl Into) -> Self { + Self { + model: model.into(), + } + } +} + +#[async_trait] +impl SttProvider for WhisperSttProvider { + fn name(&self) -> &'static str { + "whisper" + } + + async fn transcribe( + &self, + config: &Config, + audio_base64: &str, + mime_type: Option<&str>, + _file_name: Option<&str>, + language: Option<&str>, + ) -> Result, String> { + debug!( + "{LOG_PREFIX} whisper STT dispatch model={} mime={:?} lang={:?}", + self.model, mime_type, language + ); + let opts = WhisperTranscribeOptions { + model: Some(self.model.clone()), + mime_type: mime_type.map(str::to_string), + language: language.map(str::to_string), + }; + let outcome = transcribe_whisper(config, audio_base64, &opts).await?; + Ok(RpcOutcome::single_log( + SttResult { + text: outcome.value.text, + provider: "whisper".to_string(), + }, + "voice-factory: whisper STT completed", + )) + } +} + +// --------------------------------------------------------------------------- +// External STT provider (slug-keyed, third-party API) +// --------------------------------------------------------------------------- + +/// Third-party STT provider dispatched via the voice provider registry. +/// Supports OpenAI-compatible and Deepgram API styles. +pub struct ExternalSttProvider { + slug: String, + model: String, + endpoint: String, + api_key: String, + api_style: SttApiStyle, +} + +impl ExternalSttProvider { + pub fn new( + slug: impl Into, + model: impl Into, + endpoint: impl Into, + api_key: impl Into, + api_style: SttApiStyle, + ) -> Self { + Self { + slug: slug.into(), + model: model.into(), + endpoint: endpoint.into(), + api_key: api_key.into(), + api_style, + } + } +} + +#[async_trait] +impl SttProvider for ExternalSttProvider { + fn name(&self) -> &'static str { + "external" + } + + async fn transcribe( + &self, + _config: &Config, + audio_base64: &str, + mime_type: Option<&str>, + file_name: Option<&str>, + language: Option<&str>, + ) -> Result, String> { + debug!( + "{LOG_PREFIX} external STT dispatch slug={} model={} style={:?} bytes_b64={}", + self.slug, + self.model, + self.api_style, + audio_base64.len() + ); + + let audio_bytes = base64_decode(audio_base64)?; + let mime = mime_type.unwrap_or("audio/wav"); + + let result = match self.api_style { + SttApiStyle::OpenaiAudio => { + self.transcribe_openai_compat(&audio_bytes, mime, file_name, language) + .await? + } + SttApiStyle::Deepgram => { + self.transcribe_deepgram(&audio_bytes, mime, language) + .await? + } + }; + + Ok(RpcOutcome::single_log( + SttResult { + text: result, + provider: self.slug.clone(), + }, + &format!("voice-factory: external STT completed via {}", self.slug), + )) + } +} + +impl ExternalSttProvider { + async fn transcribe_openai_compat( + &self, + audio_bytes: &[u8], + mime: &str, + file_name: Option<&str>, + language: Option<&str>, + ) -> Result { + let url = format!( + "{}/audio/transcriptions", + self.endpoint.trim_end_matches('/') + ); + let ext = extension_for_mime(mime); + let default_fname = format!("audio.{ext}"); + let fname = file_name.unwrap_or(&default_fname); + + let file_part = reqwest::multipart::Part::bytes(audio_bytes.to_vec()) + .file_name(fname.to_string()) + .mime_str(mime) + .map_err(|e| format!("[voice-stt] mime error: {e}"))?; + + let mut form = reqwest::multipart::Form::new() + .text("model", self.model.clone()) + .part("file", file_part); + + if let Some(lang) = language { + form = form.text("language", lang.to_string()); + } + + let client = reqwest::Client::new(); + let resp = client + .post(&url) + .header("Authorization", format!("Bearer {}", self.api_key)) + .multipart(form) + .send() + .await + .map_err(|e| format!("[voice-stt] external STT request failed: {e}"))?; + + if !resp.status().is_success() { + let status = resp.status(); + let body = resp.text().await.unwrap_or_default(); + return Err(format!("[voice-stt] external STT error {status}: {body}")); + } + + #[derive(Deserialize)] + struct TranscriptionResp { + text: String, + } + let parsed: TranscriptionResp = resp + .json() + .await + .map_err(|e| format!("[voice-stt] failed to parse response: {e}"))?; + Ok(parsed.text) + } + + async fn transcribe_deepgram( + &self, + audio_bytes: &[u8], + mime: &str, + language: Option<&str>, + ) -> Result { + let mut url = format!( + "{}/listen?model={}", + self.endpoint.trim_end_matches('/'), + self.model + ); + if let Some(lang) = language { + url.push_str(&format!("&language={lang}")); + } + + let client = reqwest::Client::new(); + let resp = client + .post(&url) + .header("Authorization", format!("Token {}", self.api_key)) + .header("Content-Type", mime) + .body(audio_bytes.to_vec()) + .send() + .await + .map_err(|e| format!("[voice-stt] deepgram request failed: {e}"))?; + + if !resp.status().is_success() { + let status = resp.status(); + let body = resp.text().await.unwrap_or_default(); + return Err(format!("[voice-stt] deepgram error {status}: {body}")); + } + + #[derive(Deserialize)] + struct DeepgramChannel { + alternatives: Vec, + } + #[derive(Deserialize)] + struct DeepgramAlt { + transcript: String, + } + #[derive(Deserialize)] + struct DeepgramResult { + channels: Vec, + } + #[derive(Deserialize)] + struct DeepgramResp { + results: DeepgramResult, + } + + let parsed: DeepgramResp = resp + .json() + .await + .map_err(|e| format!("[voice-stt] deepgram parse error: {e}"))?; + + let text = parsed + .results + .channels + .first() + .and_then(|ch| ch.alternatives.first()) + .map(|a| a.transcript.clone()) + .unwrap_or_default(); + Ok(text) + } +} diff --git a/src/openhuman/voice/factory/tests.rs b/src/openhuman/voice/factory/tests.rs new file mode 100644 index 0000000000..a5884ff09c --- /dev/null +++ b/src/openhuman/voice/factory/tests.rs @@ -0,0 +1,310 @@ +//! Unit tests for the voice factory. + +use super::entry::{ + create_stt_provider, create_tts_provider, default_stt_provider, default_tts_provider, + DEFAULT_WHISPER_MODEL, WHISPER_MODEL_PRESETS, +}; +use super::helpers::{effective_stt_provider, effective_tts_provider, split_slug_model}; +use super::stt_providers::WhisperSttProvider; +use super::traits::SttProvider; +use crate::openhuman::config::schema::voice_providers::{ + SttApiStyle, TtsApiStyle, VoiceCapability, +}; +use crate::openhuman::config::Config; + +fn cfg() -> Config { + Config::default() +} + +#[test] +fn stt_factory_cloud_branch() { + let p = create_stt_provider("cloud", "ignored", &cfg()).unwrap(); + assert_eq!(p.name(), "cloud"); +} + +#[test] +fn stt_factory_whisper_branch() { + let p = create_stt_provider("whisper", "whisper-large-v3-turbo", &cfg()).unwrap(); + assert_eq!(p.name(), "whisper"); +} + +#[test] +fn stt_factory_whisper_empty_model_uses_default() { + // Empty model → default whisper-large-v3-turbo; constructor must not + // reject an empty string with an opaque error. + let p = create_stt_provider("whisper", "", &cfg()).unwrap(); + assert_eq!(p.name(), "whisper"); +} + +#[test] +fn stt_factory_openhuman_sentinel() { + let p = create_stt_provider("openhuman", "ignored", &cfg()).unwrap(); + assert_eq!(p.name(), "cloud"); +} + +#[test] +fn stt_factory_slug_without_registry_errors() { + let err = create_stt_provider("deepgram", "nova-2", &cfg()) + .err() + .expect("deepgram without registry entry must error"); + let msg = err.to_string(); + assert!(msg.contains("deepgram"), "should name the slug: {msg}"); + assert!( + msg.contains("no voice provider"), + "should explain missing: {msg}" + ); +} + +#[test] +fn stt_factory_slug_colon_model_resolves_with_registry() { + let mut config = cfg(); + config.voice_providers.push( + crate::openhuman::config::schema::voice_providers::VoiceProviderCreds { + slug: "deepgram".into(), + endpoint: "https://api.deepgram.com/v1".into(), + capability: VoiceCapability::Stt, + stt_api_style: SttApiStyle::Deepgram, + ..Default::default() + }, + ); + let p = create_stt_provider("deepgram:nova-2", "", &config).unwrap(); + assert_eq!(p.name(), "external"); +} + +#[test] +fn stt_factory_bare_slug_resolves_with_registry() { + let mut config = cfg(); + config.voice_providers.push( + crate::openhuman::config::schema::voice_providers::VoiceProviderCreds { + slug: "openai".into(), + endpoint: "https://api.openai.com/v1".into(), + capability: VoiceCapability::Both, + default_stt_model: Some("whisper-1".into()), + ..Default::default() + }, + ); + let p = create_stt_provider("openai", "", &config).unwrap(); + assert_eq!(p.name(), "external"); +} + +#[test] +fn stt_factory_tts_only_provider_rejects() { + let mut config = cfg(); + config.voice_providers.push( + crate::openhuman::config::schema::voice_providers::VoiceProviderCreds { + slug: "elevenlabs".into(), + endpoint: "https://api.elevenlabs.io/v1".into(), + capability: VoiceCapability::Tts, + ..Default::default() + }, + ); + let err = create_stt_provider("elevenlabs", "model", &config) + .err() + .expect("TTS-only provider must reject STT"); + assert!(err.to_string().contains("does not support STT")); +} + +#[test] +fn stt_factory_empty_string_errors() { + let err = create_stt_provider("", "model", &cfg()) + .err() + .expect("empty provider must error"); + assert!(err.to_string().contains("no voice provider")); +} + +#[test] +fn tts_factory_cloud_branch() { + let p = create_tts_provider("cloud", "Rachel", &cfg()).unwrap(); + assert_eq!(p.name(), "cloud"); +} + +#[test] +fn tts_factory_piper_branch() { + let p = create_tts_provider("piper", "en_US-lessac-medium", &cfg()).unwrap(); + assert_eq!(p.name(), "piper"); +} + +#[test] +fn tts_factory_piper_empty_voice_uses_default() { + let p = create_tts_provider("piper", "", &cfg()).unwrap(); + assert_eq!(p.name(), "piper"); +} + +#[test] +fn tts_factory_openhuman_sentinel() { + let p = create_tts_provider("openhuman", "alloy", &cfg()).unwrap(); + assert_eq!(p.name(), "cloud"); +} + +#[test] +fn tts_factory_slug_without_registry_errors() { + let err = create_tts_provider("kokoro", "af_bella", &cfg()) + .err() + .expect("kokoro without registry entry must error"); + let msg = err.to_string(); + assert!(msg.contains("kokoro"), "should name the slug: {msg}"); + assert!( + msg.contains("no voice provider"), + "should explain missing: {msg}" + ); +} + +#[test] +fn tts_factory_slug_colon_voice_resolves_with_registry() { + let mut config = cfg(); + config.voice_providers.push( + crate::openhuman::config::schema::voice_providers::VoiceProviderCreds { + slug: "openai".into(), + endpoint: "https://api.openai.com/v1".into(), + capability: VoiceCapability::Both, + default_tts_voice: Some("alloy".into()), + ..Default::default() + }, + ); + let p = create_tts_provider("openai:shimmer", "", &config).unwrap(); + assert_eq!(p.name(), "external"); +} + +#[test] +fn tts_factory_stt_only_provider_rejects() { + let mut config = cfg(); + config.voice_providers.push( + crate::openhuman::config::schema::voice_providers::VoiceProviderCreds { + slug: "deepgram".into(), + endpoint: "https://api.deepgram.com/v1".into(), + capability: VoiceCapability::Stt, + ..Default::default() + }, + ); + let err = create_tts_provider("deepgram", "voice", &config) + .err() + .expect("STT-only provider must reject TTS"); + assert!(err.to_string().contains("does not support TTS")); +} + +#[test] +fn whisper_presets_cover_full_size_ladder() { + // Sanity-check the installer surface: tiny→large-v3-turbo must all be + // exposed so the local-AI panel can render the size picker without + // hard-coding the list. + let ids: Vec<&str> = WHISPER_MODEL_PRESETS.iter().map(|(id, _)| *id).collect(); + for expected in ["tiny", "base", "small", "medium", "large-v3-turbo"] { + assert!( + ids.contains(&expected), + "WHISPER_MODEL_PRESETS missing {expected}" + ); + } +} + +#[tokio::test] +async fn whisper_provider_fails_clearly_when_binary_missing() { + // No WHISPER_BIN env, no model file — the provider must surface an + // actionable error rather than panic. Drive a small base64 payload + // so we never reach the actual transcription call. + let _guard = unset_env_guard("WHISPER_BIN"); + let provider = WhisperSttProvider::new("whisper-large-v3-turbo"); + let result = provider + .transcribe(&cfg(), "AAAA", Some("audio/wav"), None, None) + .await; + assert!(result.is_err(), "missing binary must error"); + let msg = result.err().unwrap(); + // Whatever the underlying message says, it must NOT be a serialize + // panic — i.e. we must have hit the binary-resolution branch. + assert!( + !msg.is_empty(), + "error message should be populated for diagnosis" + ); +} + +#[test] +fn default_providers_return_cloud() { + assert_eq!(default_stt_provider().name(), "cloud"); + assert_eq!(default_tts_provider().name(), "cloud"); +} + +// ── slug:model parsing ────────────────────────────────────────────── + +#[test] +fn split_slug_model_with_colon() { + assert_eq!(split_slug_model("deepgram:nova-2"), ("deepgram", "nova-2")); +} + +#[test] +fn split_slug_model_bare_slug() { + assert_eq!(split_slug_model("deepgram"), ("deepgram", "")); +} + +#[test] +fn split_slug_model_multiple_colons() { + assert_eq!(split_slug_model("custom:model:v2"), ("custom", "model:v2")); +} + +// ── effective provider resolution ─────────────────────────────────── + +#[test] +fn effective_stt_prefers_new_field() { + let mut config = cfg(); + config.stt_provider = Some("deepgram:nova-2".into()); + config.local_ai.stt_provider = "whisper".into(); + assert_eq!(effective_stt_provider(&config), "deepgram:nova-2"); +} + +#[test] +fn effective_stt_falls_back_to_legacy() { + let mut config = cfg(); + config.stt_provider = None; + config.local_ai.stt_provider = "whisper".into(); + assert_eq!(effective_stt_provider(&config), "whisper"); +} + +#[test] +fn effective_stt_defaults_to_cloud() { + let mut config = cfg(); + config.stt_provider = None; + config.local_ai.stt_provider = String::new(); + assert_eq!(effective_stt_provider(&config), "cloud"); +} + +#[test] +fn effective_tts_prefers_new_field() { + let mut config = cfg(); + config.tts_provider = Some("openai:alloy".into()); + config.local_ai.tts_provider = "piper".into(); + assert_eq!(effective_tts_provider(&config), "openai:alloy"); +} + +#[test] +fn effective_tts_falls_back_to_legacy() { + let mut config = cfg(); + config.tts_provider = None; + config.local_ai.tts_provider = "piper".into(); + assert_eq!(effective_tts_provider(&config), "piper"); +} + +#[test] +fn effective_tts_defaults_to_cloud() { + let config = cfg(); + assert_eq!(effective_tts_provider(&config), "cloud"); +} + +/// Drop guard that unsets an env var on construction and restores it on +/// drop. Necessary because cargo runs tests in parallel and bare +/// `remove_var` would leak across tests. +fn unset_env_guard(key: &'static str) -> EnvUnsetGuard { + let prev = std::env::var_os(key); + std::env::remove_var(key); + EnvUnsetGuard { key, prev } +} + +struct EnvUnsetGuard { + key: &'static str, + prev: Option, +} +impl Drop for EnvUnsetGuard { + fn drop(&mut self) { + match &self.prev { + Some(v) => std::env::set_var(self.key, v), + None => std::env::remove_var(self.key), + } + } +} diff --git a/src/openhuman/voice/factory/traits.rs b/src/openhuman/voice/factory/traits.rs new file mode 100644 index 0000000000..78c1323b4c --- /dev/null +++ b/src/openhuman/voice/factory/traits.rs @@ -0,0 +1,67 @@ +//! Provider traits and shared result types for STT / TTS dispatch. + +use async_trait::async_trait; +use serde::{Deserialize, Serialize}; + +use super::super::reply_speech::ReplySpeechResult; +use crate::openhuman::config::Config; +use crate::rpc::RpcOutcome; + +// --------------------------------------------------------------------------- +// Shared result type +// --------------------------------------------------------------------------- + +/// Common shape both STT branches return after dispatch. Keeps the wire +/// contract identical regardless of provider — the UI only sees `text`. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SttResult { + pub text: String, + /// Lowercase provider id (`"cloud"`, `"whisper"`) — exposed on the wire + /// so the renderer can show the user which path actually ran. + pub provider: String, +} + +// --------------------------------------------------------------------------- +// Provider traits +// --------------------------------------------------------------------------- + +/// Speech-to-text provider abstraction. Cloud (backend proxy) and Whisper +/// (local subprocess / in-process) both implement this; the factory hands +/// the caller a boxed trait object. +#[async_trait] +pub trait SttProvider: Send + Sync { + /// Stable identifier used in logs and config (`"cloud"`, `"whisper"`). + fn name(&self) -> &'static str; + + /// Transcribe a single base64-encoded audio blob. + /// + /// `mime_type` and `file_name` are hints; providers that don't care + /// may ignore them. `language` is BCP-47 (`"en"`, `"es"`); pass `None` + /// to let the provider auto-detect. + async fn transcribe( + &self, + config: &Config, + audio_base64: &str, + mime_type: Option<&str>, + file_name: Option<&str>, + language: Option<&str>, + ) -> Result, String>; +} + +/// Text-to-speech provider abstraction. Cloud returns rich viseme alignment +/// (used by the mascot lip-sync); Piper returns audio only and the caller +/// derives a flat viseme timeline downstream. +#[async_trait] +pub trait TtsProvider: Send + Sync { + fn name(&self) -> &'static str; + + /// Synthesize speech for `text`. Returns the same envelope shape as + /// `voice.reply_synthesize` so the renderer can swap providers without + /// branching on the response. + async fn synthesize( + &self, + config: &Config, + text: &str, + voice: Option<&str>, + ) -> Result, String>; +} diff --git a/src/openhuman/voice/factory/tts_providers.rs b/src/openhuman/voice/factory/tts_providers.rs new file mode 100644 index 0000000000..b6bc0cec19 --- /dev/null +++ b/src/openhuman/voice/factory/tts_providers.rs @@ -0,0 +1,273 @@ +//! TTS provider implementations: cloud, local Piper, and external (slug-keyed). + +use async_trait::async_trait; +use log::debug; + +use super::super::local_speech::{synthesize_piper, PiperOptions}; +use super::super::reply_speech::{synthesize_reply, ReplySpeechOptions, ReplySpeechResult}; +use super::traits::TtsProvider; +use crate::openhuman::config::schema::voice_providers::TtsApiStyle; +use crate::openhuman::config::Config; +use crate::rpc::RpcOutcome; + +const LOG_PREFIX: &str = "[voice-factory]"; + +// --------------------------------------------------------------------------- +// Cloud TTS +// --------------------------------------------------------------------------- + +/// Cloud TTS — wraps [`synthesize_reply`] (backend ElevenLabs proxy). +pub struct CloudTtsProvider { + voice: Option, +} + +impl CloudTtsProvider { + pub fn new(voice: Option) -> Self { + Self { voice } + } +} + +#[async_trait] +impl TtsProvider for CloudTtsProvider { + fn name(&self) -> &'static str { + "cloud" + } + + async fn synthesize( + &self, + config: &Config, + text: &str, + voice: Option<&str>, + ) -> Result, String> { + let resolved_voice = voice + .map(str::to_string) + .or_else(|| self.voice.clone()) + .filter(|s| !s.trim().is_empty()); + debug!( + "{LOG_PREFIX} cloud TTS dispatch voice={} chars={}", + resolved_voice.as_deref().unwrap_or(""), + text.len() + ); + let opts = ReplySpeechOptions { + voice_id: resolved_voice, + model_id: None, + output_format: None, + voice_settings: None, + }; + synthesize_reply(config, text, &opts).await + } +} + +// --------------------------------------------------------------------------- +// Local Piper TTS +// --------------------------------------------------------------------------- + +/// Local Piper TTS — wraps [`synthesize_piper`]. +pub struct PiperTtsProvider { + voice: String, +} + +impl PiperTtsProvider { + pub fn new(voice: impl Into) -> Self { + Self { + voice: voice.into(), + } + } +} + +#[async_trait] +impl TtsProvider for PiperTtsProvider { + fn name(&self) -> &'static str { + "piper" + } + + async fn synthesize( + &self, + config: &Config, + text: &str, + voice: Option<&str>, + ) -> Result, String> { + let resolved_voice = voice + .map(str::to_string) + .filter(|s| !s.trim().is_empty()) + .unwrap_or_else(|| self.voice.clone()); + debug!( + "{LOG_PREFIX} piper TTS dispatch voice={} chars={}", + resolved_voice, + text.len() + ); + let opts = PiperOptions { + voice: Some(resolved_voice), + }; + synthesize_piper(config, text, &opts).await + } +} + +// --------------------------------------------------------------------------- +// External TTS provider (slug-keyed, third-party API) +// --------------------------------------------------------------------------- + +/// Third-party TTS provider dispatched via the voice provider registry. +/// Supports OpenAI-compatible and ElevenLabs API styles. +pub struct ExternalTtsProvider { + slug: String, + default_voice: String, + endpoint: String, + api_key: String, + api_style: TtsApiStyle, +} + +impl ExternalTtsProvider { + pub fn new( + slug: impl Into, + default_voice: impl Into, + endpoint: impl Into, + api_key: impl Into, + api_style: TtsApiStyle, + ) -> Self { + Self { + slug: slug.into(), + default_voice: default_voice.into(), + endpoint: endpoint.into(), + api_key: api_key.into(), + api_style, + } + } +} + +#[async_trait] +impl TtsProvider for ExternalTtsProvider { + fn name(&self) -> &'static str { + "external" + } + + async fn synthesize( + &self, + _config: &Config, + text: &str, + voice: Option<&str>, + ) -> Result, String> { + let resolved_voice = voice + .filter(|s| !s.trim().is_empty()) + .unwrap_or(&self.default_voice); + + debug!( + "{LOG_PREFIX} external TTS dispatch slug={} voice={} style={:?} chars={}", + self.slug, + resolved_voice, + self.api_style, + text.len() + ); + + let (audio_bytes, audio_mime) = match self.api_style { + TtsApiStyle::OpenaiAudio => self.synthesize_openai_compat(text, resolved_voice).await?, + TtsApiStyle::ElevenLabs => self.synthesize_elevenlabs(text, resolved_voice).await?, + }; + + use base64::Engine; + let audio_base64 = base64::engine::general_purpose::STANDARD.encode(&audio_bytes); + + Ok(RpcOutcome::single_log( + ReplySpeechResult { + audio_base64, + audio_mime, + visemes: Vec::new(), + alignment: None, + }, + &format!("voice-factory: external TTS completed via {}", self.slug), + )) + } +} + +impl ExternalTtsProvider { + async fn synthesize_openai_compat( + &self, + text: &str, + voice: &str, + ) -> Result<(Vec, String), String> { + let url = format!("{}/audio/speech", self.endpoint.trim_end_matches('/')); + + let body = serde_json::json!({ + "model": "tts-1", + "voice": voice, + "input": text, + }); + + let client = reqwest::Client::new(); + let resp = client + .post(&url) + .header("Authorization", format!("Bearer {}", self.api_key)) + .header("Content-Type", "application/json") + .body(body.to_string()) + .send() + .await + .map_err(|e| format!("[voice-tts] external TTS request failed: {e}"))?; + + if !resp.status().is_success() { + let status = resp.status(); + let body = resp.text().await.unwrap_or_default(); + return Err(format!("[voice-tts] external TTS error {status}: {body}")); + } + + let content_type = resp + .headers() + .get("content-type") + .and_then(|v| v.to_str().ok()) + .unwrap_or("audio/mpeg") + .to_string(); + + let bytes = resp + .bytes() + .await + .map_err(|e| format!("[voice-tts] failed to read audio: {e}"))?; + + Ok((bytes.to_vec(), content_type)) + } + + async fn synthesize_elevenlabs( + &self, + text: &str, + voice_id: &str, + ) -> Result<(Vec, String), String> { + let url = format!( + "{}/text-to-speech/{}", + self.endpoint.trim_end_matches('/'), + voice_id + ); + + let body = serde_json::json!({ + "text": text, + "model_id": "eleven_multilingual_v2", + }); + + let client = reqwest::Client::new(); + let resp = client + .post(&url) + .header("xi-api-key", &self.api_key) + .header("Content-Type", "application/json") + .body(body.to_string()) + .send() + .await + .map_err(|e| format!("[voice-tts] elevenlabs request failed: {e}"))?; + + if !resp.status().is_success() { + let status = resp.status(); + let body = resp.text().await.unwrap_or_default(); + return Err(format!("[voice-tts] elevenlabs error {status}: {body}")); + } + + let content_type = resp + .headers() + .get("content-type") + .and_then(|v| v.to_str().ok()) + .unwrap_or("audio/mpeg") + .to_string(); + + let bytes = resp + .bytes() + .await + .map_err(|e| format!("[voice-tts] failed to read elevenlabs audio: {e}"))?; + + Ok((bytes.to_vec(), content_type)) + } +} diff --git a/src/openhuman/voice/schemas.rs b/src/openhuman/voice/schemas.rs deleted file mode 100644 index 274853c739..0000000000 --- a/src/openhuman/voice/schemas.rs +++ /dev/null @@ -1,1501 +0,0 @@ -//! Controller schemas and RPC handler dispatch for the voice domain. - -use serde::de::DeserializeOwned; -use serde::Deserialize; -use serde_json::{Map, Value}; - -use crate::core::all::{ControllerFuture, RegisteredController}; -use crate::core::{ControllerSchema, FieldSchema, TypeSchema}; -use crate::openhuman::config::rpc as config_rpc; -use crate::rpc::RpcOutcome; - -// --------------------------------------------------------------------------- -// Param structs -// --------------------------------------------------------------------------- - -#[derive(Debug, Deserialize)] -struct TranscribeParams { - audio_path: String, - /// Optional conversation context for LLM post-processing. - #[serde(default)] - context: Option, - /// Skip LLM cleanup and return raw whisper output. - #[serde(default)] - skip_cleanup: bool, -} - -#[derive(Debug, Deserialize)] -struct TranscribeBytesParams { - audio_bytes: Vec, - #[serde(default)] - extension: Option, - /// Optional conversation context for LLM post-processing. - #[serde(default)] - context: Option, - /// Skip LLM cleanup and return raw whisper output. - #[serde(default)] - skip_cleanup: bool, -} - -#[derive(Debug, Deserialize)] -struct TtsParams { - text: String, - #[serde(default)] - output_path: Option, -} - -#[derive(Debug, Deserialize)] -struct CloudTranscribeParams { - audio_base64: String, - #[serde(default)] - mime_type: Option, - #[serde(default)] - file_name: Option, - #[serde(default)] - model: Option, - #[serde(default)] - language: Option, -} - -/// Factory-dispatched STT request. The caller can either pin a provider -/// explicitly (`"cloud"` / `"whisper"`) or let the controller resolve the -/// effective provider from `config.local_ai.stt_provider`. Keeps the -/// existing `voice_cloud_transcribe` RPC intact for back-compat — older -/// renderers still pin the cloud path directly. -#[derive(Debug, Deserialize)] -struct SttDispatchParams { - audio_base64: String, - /// Provider override; falls back to `config.local_ai.stt_provider`. - #[serde(default)] - provider: Option, - /// Model override (cloud branch ignores it). - #[serde(default)] - model: Option, - #[serde(default)] - mime_type: Option, - #[serde(default)] - file_name: Option, - #[serde(default)] - language: Option, -} - -/// Factory-dispatched TTS request. Same provider-resolution rule as -/// [`SttDispatchParams`]. -#[derive(Debug, Deserialize)] -struct TtsDispatchParams { - text: String, - #[serde(default)] - provider: Option, - #[serde(default)] - voice: Option, -} - -/// Settings-panel update for the STT/TTS provider selectors. Both are -/// optional; omitted fields are left at their current value. -#[derive(Debug, Deserialize)] -struct SetProvidersParams { - #[serde(default)] - stt_provider: Option, - #[serde(default)] - tts_provider: Option, - #[serde(default)] - stt_model: Option, - #[serde(default)] - tts_voice: Option, -} - -#[derive(Debug, Deserialize)] -struct ReplySynthesizeParams { - text: String, - #[serde(default)] - voice_id: Option, - #[serde(default)] - model_id: Option, - #[serde(default)] - output_format: Option, -} - -/// Voice provider registry update. Mirrors `InferenceUpdateModelSettingsParams`. -#[derive(Debug, Deserialize)] -struct VoiceUpdateProviderSettingsParams { - #[serde(default)] - voice_providers: Option>, - #[serde(default)] - stt_provider: Option, - #[serde(default)] - tts_provider: Option, -} - -/// Wire format for a single voice provider entry in the update call. -#[derive(Debug, Deserialize)] -struct VoiceProviderCredUpdate { - #[serde(default)] - id: Option, - slug: String, - #[serde(default)] - label: Option, - #[serde(default)] - endpoint: Option, - #[serde(default)] - auth_style: Option, - #[serde(default)] - capability: Option, - #[serde(default)] - stt_api_style: Option, - #[serde(default)] - tts_api_style: Option, - #[serde(default)] - default_stt_model: Option, - #[serde(default)] - default_tts_voice: Option, -} - -/// List models/voices for a voice provider. -#[derive(Debug, Deserialize)] -struct VoiceListModelsParams { - provider_id: String, - #[serde(default)] - capability: Option, -} - -/// Test a voice provider endpoint. -#[derive(Debug, Deserialize)] -struct VoiceTestProviderParams { - workload: String, - provider: String, - /// When true, only validate the API key (lightweight GET) without - /// synthesizing or transcribing. Used by the provider-enable modal. - #[serde(default)] - validate_only: bool, -} - -#[derive(Debug, Deserialize)] -#[serde(rename_all = "snake_case")] -enum OverlaySttState { - RecordingStarted, - TranscriptionDone, - Cancelled, - Error, -} - -#[derive(Debug, Deserialize)] -struct OverlaySttNotifyParams { - /// Voice state transition. - state: OverlaySttState, - /// Transcribed text (required when state is "transcription_done"). - #[serde(default)] - text: Option, -} - -// --------------------------------------------------------------------------- -// Schema + registry exports -// --------------------------------------------------------------------------- - -pub fn all_voice_controller_schemas() -> Vec { - vec![ - voice_schemas("voice_status"), - voice_schemas("voice_transcribe"), - voice_schemas("voice_transcribe_bytes"), - voice_schemas("voice_tts"), - voice_schemas("voice_reply_synthesize"), - voice_schemas("voice_cloud_transcribe"), - voice_schemas("voice_stt_dispatch"), - voice_schemas("voice_tts_dispatch"), - voice_schemas("voice_set_providers"), - voice_schemas("voice_update_provider_settings"), - voice_schemas("voice_list_models"), - voice_schemas("voice_test_provider"), - voice_schemas("voice_server_start"), - voice_schemas("voice_server_stop"), - voice_schemas("voice_server_status"), - voice_schemas("overlay_stt_notify"), - ] -} - -pub fn all_voice_registered_controllers() -> Vec { - vec![ - RegisteredController { - schema: voice_schemas("voice_status"), - handler: handle_voice_status, - }, - RegisteredController { - schema: voice_schemas("voice_transcribe"), - handler: handle_voice_transcribe, - }, - RegisteredController { - schema: voice_schemas("voice_transcribe_bytes"), - handler: handle_voice_transcribe_bytes, - }, - RegisteredController { - schema: voice_schemas("voice_tts"), - handler: handle_voice_tts, - }, - RegisteredController { - schema: voice_schemas("voice_reply_synthesize"), - handler: handle_voice_reply_synthesize, - }, - RegisteredController { - schema: voice_schemas("voice_cloud_transcribe"), - handler: handle_voice_cloud_transcribe, - }, - RegisteredController { - schema: voice_schemas("voice_stt_dispatch"), - handler: handle_voice_stt_dispatch, - }, - RegisteredController { - schema: voice_schemas("voice_tts_dispatch"), - handler: handle_voice_tts_dispatch, - }, - RegisteredController { - schema: voice_schemas("voice_set_providers"), - handler: handle_voice_set_providers, - }, - RegisteredController { - schema: voice_schemas("voice_update_provider_settings"), - handler: handle_voice_update_provider_settings, - }, - RegisteredController { - schema: voice_schemas("voice_list_models"), - handler: handle_voice_list_models, - }, - RegisteredController { - schema: voice_schemas("voice_test_provider"), - handler: handle_voice_test_provider, - }, - RegisteredController { - schema: voice_schemas("voice_server_start"), - handler: handle_voice_server_start, - }, - RegisteredController { - schema: voice_schemas("voice_server_stop"), - handler: handle_voice_server_stop, - }, - RegisteredController { - schema: voice_schemas("voice_server_status"), - handler: handle_voice_server_status, - }, - RegisteredController { - schema: voice_schemas("overlay_stt_notify"), - handler: handle_overlay_stt_notify, - }, - ] -} - -pub fn voice_schemas(function: &str) -> ControllerSchema { - match function { - "voice_status" => ControllerSchema { - namespace: "voice", - function: "status", - description: "Check availability of STT/TTS binaries and models.", - inputs: vec![], - outputs: vec![json_output("status", "Voice availability status.")], - }, - "voice_transcribe" => ControllerSchema { - namespace: "voice", - function: "transcribe", - description: - "Transcribe audio from a file path using whisper.cpp, with optional LLM cleanup.", - inputs: vec![ - required_string("audio_path", "Path to the audio file."), - optional_string("context", "Conversation context for LLM post-processing."), - optional_bool( - "skip_cleanup", - "Skip LLM cleanup, return raw whisper output.", - ), - ], - outputs: vec![json_output( - "speech", - "Transcription result with text and raw_text.", - )], - }, - "voice_transcribe_bytes" => ControllerSchema { - namespace: "voice", - function: "transcribe_bytes", - description: - "Transcribe audio from raw bytes using whisper.cpp, with optional LLM cleanup.", - inputs: vec![ - FieldSchema { - name: "audio_bytes", - ty: TypeSchema::Bytes, - comment: "Raw audio bytes.", - required: true, - }, - optional_string("extension", "Audio file extension (default: webm)."), - optional_string("context", "Conversation context for LLM post-processing."), - optional_bool( - "skip_cleanup", - "Skip LLM cleanup, return raw whisper output.", - ), - ], - outputs: vec![json_output( - "speech", - "Transcription result with text and raw_text.", - )], - }, - "voice_tts" => ControllerSchema { - namespace: "voice", - function: "tts", - description: "Synthesize speech from text using piper.", - inputs: vec![ - required_string("text", "Text to synthesize."), - optional_string("output_path", "Optional output file path."), - ], - outputs: vec![json_output("tts", "TTS result with output path.")], - }, - "voice_reply_synthesize" => ControllerSchema { - namespace: "voice", - function: "reply_synthesize", - description: - "Synthesize an agent reply via the hosted backend (ElevenLabs) and return \ - base64 audio plus an Oculus-15 viseme alignment for mascot lip-sync.", - inputs: vec![ - required_string("text", "Text to synthesize."), - optional_string( - "voice_id", - "Override voice id (defaults to backend selection).", - ), - optional_string("model_id", "Override model id."), - optional_string("output_format", "Override audio format (e.g. mp3_44100)."), - ], - outputs: vec![json_output( - "reply", - "ReplySpeechResult: { audio_base64, audio_mime, visemes, alignment? }.", - )], - }, - "voice_stt_dispatch" => ControllerSchema { - namespace: "voice", - function: "stt_dispatch", - description: - "Factory-dispatched speech-to-text. Routes to the cloud Whisper proxy or the \ - local whisper.cpp binary based on `provider` (or `config.local_ai.stt_provider` \ - when unspecified). Returns the same `{ text }` payload either way.", - inputs: vec![ - required_string( - "audio_base64", - "Base64-encoded audio bytes (e.g. webm/opus from MediaRecorder).", - ), - optional_string( - "provider", - "Override provider: 'cloud' or 'whisper'. Defaults to config.local_ai.stt_provider.", - ), - optional_string("model", "Whisper model id (whisper branch only)."), - optional_string("mime_type", "Audio MIME type (default: audio/webm)."), - optional_string("file_name", "Filename hint (default: audio.webm)."), - optional_string("language", "BCP-47 language hint, e.g. 'en'."), - ], - outputs: vec![json_output( - "result", - "SttResult: { text, provider }.", - )], - }, - "voice_tts_dispatch" => ControllerSchema { - namespace: "voice", - function: "tts_dispatch", - description: - "Factory-dispatched text-to-speech. Routes to the cloud ElevenLabs proxy \ - (returns rich viseme alignment) or local Piper (returns audio + a synthetic \ - viseme timeline) based on `provider` (or `config.local_ai.tts_provider`).", - inputs: vec![ - required_string("text", "Text to synthesize."), - optional_string( - "provider", - "Override provider: 'cloud' or 'piper'. Defaults to config.local_ai.tts_provider.", - ), - optional_string( - "voice", - "Voice id (provider-specific). Piper expects an id like 'en_US-lessac-medium'.", - ), - ], - outputs: vec![json_output( - "reply", - "ReplySpeechResult: { audio_base64, audio_mime, visemes, alignment? }.", - )], - }, - "voice_set_providers" => ControllerSchema { - namespace: "voice", - function: "set_providers", - description: - "Persist the STT / TTS provider selection (and optional model/voice id) into \ - `config.local_ai.{stt,tts}_provider` so subsequent voice_stt_dispatch / \ - voice_tts_dispatch calls resolve without an explicit provider param.", - inputs: vec![ - optional_string( - "stt_provider", - "STT provider id ('cloud' or 'whisper'). Omitted = unchanged.", - ), - optional_string( - "tts_provider", - "TTS provider id ('cloud' or 'piper'). Omitted = unchanged.", - ), - optional_string("stt_model", "Whisper model id (e.g. 'whisper-large-v3-turbo')."), - optional_string("tts_voice", "Piper voice id (e.g. 'en_US-lessac-medium')."), - ], - outputs: vec![json_output( - "providers", - "Updated provider selectors: { stt_provider, tts_provider, stt_model_id, tts_voice_id }.", - )], - }, - "voice_update_provider_settings" => ControllerSchema { - namespace: "voice", - function: "update_provider_settings", - description: - "Persist the voice provider registry and STT/TTS routing strings. \ - Mirrors openhuman.inference_update_model_settings for the voice domain.", - inputs: vec![ - FieldSchema { - name: "voice_providers", - ty: TypeSchema::Option(Box::new(TypeSchema::Json)), - comment: "Array of voice provider entries (VoiceProviderCreds shape).", - required: false, - }, - optional_string( - "stt_provider", - "STT routing string ('cloud', 'whisper', or ':').", - ), - optional_string( - "tts_provider", - "TTS routing string ('cloud', 'piper', or ':').", - ), - ], - outputs: vec![json_output( - "settings", - "Updated voice_providers + routing strings snapshot.", - )], - }, - "voice_list_models" => ControllerSchema { - namespace: "voice", - function: "list_models", - description: - "List available models or voices for a voice provider. Returns static \ - presets for built-in slugs; probes /models for custom providers.", - inputs: vec![ - required_string("provider_id", "Provider id or slug."), - optional_string( - "capability", - "Filter by capability: 'stt' or 'tts'. Defaults to both.", - ), - ], - outputs: vec![json_output( - "models", - "{ models: [{ id, label? }] }", - )], - }, - "voice_test_provider" => ControllerSchema { - namespace: "voice", - function: "test_provider", - description: - "Test a voice provider endpoint without saving. STT transcribes a \ - silent audio clip; TTS synthesizes 'Hello' and discards.", - inputs: vec![ - required_string("workload", "Workload to test: 'stt' or 'tts'."), - required_string( - "provider", - "Provider string to test (e.g. 'deepgram:nova-2').", - ), - optional_bool( - "validate_only", - "When true, only validate the API key without synthesizing/transcribing.", - ), - ], - outputs: vec![json_output( - "result", - "{ ok: bool, detail: string, latency_ms?: number }", - )], - }, - "voice_cloud_transcribe" => ControllerSchema { - namespace: "voice", - function: "cloud_transcribe", - description: - "Transcribe audio bytes via the hosted backend's STT endpoint. Used by the \ - mascot's mic-only composer so we don't ship a provider API key in the desktop app.", - inputs: vec![ - required_string( - "audio_base64", - "Base64-encoded audio bytes (e.g. webm/opus from MediaRecorder).", - ), - optional_string("mime_type", "Audio MIME type (default: audio/webm)."), - optional_string("file_name", "Original filename hint (default: audio.webm)."), - optional_string("model", "Backend STT model id (default: whisper-v1)."), - optional_string("language", "BCP-47 language hint, e.g. 'en'."), - ], - outputs: vec![json_output("result", "CloudTranscribeResult: { text }.")], - }, - "voice_server_start" => ControllerSchema { - namespace: "voice", - function: "server_start", - description: - "Start the voice dictation server (hotkey → record → transcribe → insert text).", - inputs: vec![ - optional_string("hotkey", "Hotkey combination (default: Fn)."), - optional_string( - "activation_mode", - "Activation mode: tap or push (default: push).", - ), - optional_bool("skip_cleanup", "Skip LLM post-processing."), - ], - outputs: vec![json_output("status", "Voice server status after start.")], - }, - "voice_server_stop" => ControllerSchema { - namespace: "voice", - function: "server_stop", - description: "Stop the voice dictation server.", - inputs: vec![], - outputs: vec![json_output("status", "Voice server status after stop.")], - }, - "voice_server_status" => ControllerSchema { - namespace: "voice", - function: "server_status", - description: "Get the current voice dictation server status.", - inputs: vec![], - outputs: vec![json_output("status", "Current voice server status.")], - }, - "overlay_stt_notify" => ControllerSchema { - namespace: "voice", - function: "overlay_stt_notify", - description: - "Notify the overlay of a voice/STT state change from the chat prompt button.", - inputs: vec![ - required_string( - "state", - "State transition: recording_started, transcription_done, cancelled, error.", - ), - optional_string( - "text", - "Transcribed text (when state is transcription_done).", - ), - ], - outputs: vec![json_output("result", "Notification acknowledgement.")], - }, - _ => ControllerSchema { - namespace: "voice", - function: "unknown", - description: "Unknown voice controller.", - inputs: vec![], - outputs: vec![FieldSchema { - name: "error", - ty: TypeSchema::String, - comment: "Lookup error details.", - required: true, - }], - }, - } -} - -// --------------------------------------------------------------------------- -// Handlers -// --------------------------------------------------------------------------- - -fn handle_voice_status(_params: Map) -> ControllerFuture { - Box::pin(async move { - let config = config_rpc::load_config_with_timeout().await?; - to_json(crate::openhuman::voice::voice_status(&config).await?) - }) -} - -fn handle_voice_transcribe(params: Map) -> ControllerFuture { - Box::pin(async move { - let config = config_rpc::load_config_with_timeout().await?; - let p = deserialize_params::(params)?; - to_json( - crate::openhuman::voice::voice_transcribe( - &config, - &p.audio_path, - p.context.as_deref(), - p.skip_cleanup, - ) - .await?, - ) - }) -} - -fn handle_voice_transcribe_bytes(params: Map) -> ControllerFuture { - Box::pin(async move { - let config = config_rpc::load_config_with_timeout().await?; - let p = deserialize_params::(params)?; - to_json( - crate::openhuman::voice::voice_transcribe_bytes( - &config, - &p.audio_bytes, - p.extension, - p.context.as_deref(), - p.skip_cleanup, - ) - .await?, - ) - }) -} - -fn handle_voice_tts(params: Map) -> ControllerFuture { - Box::pin(async move { - let config = config_rpc::load_config_with_timeout().await?; - let p = deserialize_params::(params)?; - to_json( - crate::openhuman::voice::voice_tts(&config, &p.text, p.output_path.as_deref()).await?, - ) - }) -} - -fn handle_voice_reply_synthesize(params: Map) -> ControllerFuture { - Box::pin(async move { - let config = config_rpc::load_config_with_timeout().await?; - let p = deserialize_params::(params)?; - // Dispatch through the TTS factory so the user's `tts_provider` - // setting (cloud / piper / …) is honored on the spoken-reply path, - // not just the dedicated `voice_tts_dispatch` RPC. Without this - // routing, the settings dropdown was effectively decorative — - // selecting "piper" persisted to config but conversation replies - // still hit the cloud TTS proxy. - let provider_name = effective_tts_provider(&config); - // Only default to the Piper voice id when the active provider is - // actually Piper. Passing a Piper voice id to a cloud TTS provider - // would send an invalid voice to the upstream API. - let voice = p - .voice_id - .as_deref() - .map(str::trim) - .filter(|s| !s.is_empty()) - .map(str::to_string) - .unwrap_or_else(|| { - if provider_name == "piper" { - crate::openhuman::voice::DEFAULT_PIPER_VOICE.to_string() - } else { - String::new() - } - }); - let effective_voice = if voice.is_empty() { - None - } else { - Some(voice.as_str()) - }; - log::debug!( - "[voice-factory] voice_reply_synthesize dispatch provider={provider_name} voice={voice}" - ); - let provider = - crate::openhuman::voice::create_tts_provider(&provider_name, &voice, &config) - .map_err(|e| e.to_string())?; - to_json( - provider - .synthesize(&config, &p.text, effective_voice) - .await?, - ) - }) -} - -fn handle_voice_cloud_transcribe(params: Map) -> ControllerFuture { - Box::pin(async move { - let config = config_rpc::load_config_with_timeout().await?; - let p = deserialize_params::(params)?; - let opts = crate::openhuman::voice::cloud_transcribe::CloudTranscribeOptions { - model: p.model, - language: p.language, - mime_type: p.mime_type, - file_name: p.file_name, - }; - to_json( - crate::openhuman::voice::cloud_transcribe::transcribe_cloud( - &config, - &p.audio_base64, - &opts, - ) - .await?, - ) - }) -} - -fn handle_voice_stt_dispatch(params: Map) -> ControllerFuture { - Box::pin(async move { - let config = config_rpc::load_config_with_timeout().await?; - let p = deserialize_params::(params)?; - let provider_name = p - .provider - .as_deref() - .map(str::trim) - .filter(|s| !s.is_empty()) - .map(str::to_string) - .unwrap_or_else(|| effective_stt_provider(&config)); - let model = p - .model - .as_deref() - .map(str::trim) - .filter(|s| !s.is_empty()) - .map(str::to_string) - .unwrap_or_else(|| crate::openhuman::voice::DEFAULT_WHISPER_MODEL.to_string()); - - log::debug!( - "[voice-factory] RPC voice_stt_dispatch provider={provider_name} model={model}" - ); - let provider = - crate::openhuman::voice::create_stt_provider(&provider_name, &model, &config) - .map_err(|e| e.to_string())?; - let outcome = provider - .transcribe( - &config, - &p.audio_base64, - p.mime_type.as_deref(), - p.file_name.as_deref(), - p.language.as_deref(), - ) - .await?; - let value = serde_json::json!({ - "text": outcome.value.text, - "provider": outcome.value.provider, - }); - Ok(value) - }) -} - -fn handle_voice_tts_dispatch(params: Map) -> ControllerFuture { - Box::pin(async move { - let config = config_rpc::load_config_with_timeout().await?; - let p = deserialize_params::(params)?; - let provider_name = p - .provider - .as_deref() - .map(str::trim) - .filter(|s| !s.is_empty()) - .map(str::to_string) - .unwrap_or_else(|| effective_tts_provider(&config)); - // Only fall back to the Piper default voice id when the provider is - // Piper; sending a Piper voice id to a cloud TTS endpoint is invalid. - let voice = p - .voice - .as_deref() - .map(str::trim) - .filter(|s| !s.is_empty()) - .map(str::to_string) - .unwrap_or_else(|| { - if provider_name == "piper" { - crate::openhuman::voice::DEFAULT_PIPER_VOICE.to_string() - } else { - String::new() - } - }); - let effective_voice = if voice.is_empty() { - None - } else { - Some(voice.as_str()) - }; - - log::debug!( - "[voice-factory] RPC voice_tts_dispatch provider={provider_name} voice={voice}" - ); - let provider = - crate::openhuman::voice::create_tts_provider(&provider_name, &voice, &config) - .map_err(|e| e.to_string())?; - let outcome = provider - .synthesize(&config, &p.text, effective_voice) - .await?; - to_json(outcome) - }) -} - -fn handle_voice_set_providers(params: Map) -> ControllerFuture { - Box::pin(async move { - let p = deserialize_params::(params)?; - let mut config = config_rpc::load_config_with_timeout().await?; - - if let Some(stt) = p - .stt_provider - .as_deref() - .map(str::trim) - .filter(|s| !s.is_empty()) - { - validate_stt_provider(stt)?; - config.local_ai.stt_provider = stt.to_string(); - config.stt_provider = Some(stt.to_string()); - } - if let Some(tts) = p - .tts_provider - .as_deref() - .map(str::trim) - .filter(|s| !s.is_empty()) - { - validate_tts_provider(tts)?; - config.local_ai.tts_provider = tts.to_string(); - config.tts_provider = Some(tts.to_string()); - } - if let Some(model) = p - .stt_model - .as_deref() - .map(str::trim) - .filter(|s| !s.is_empty()) - { - config.local_ai.stt_model_id = model.to_string(); - } - if let Some(voice) = p - .tts_voice - .as_deref() - .map(str::trim) - .filter(|s| !s.is_empty()) - { - config.local_ai.tts_voice_id = voice.to_string(); - } - - config.save().await.map_err(|e| e.to_string())?; - log::debug!( - "[voice-factory] persisted providers stt={} tts={} stt_model={} tts_voice={}", - config.local_ai.stt_provider, - config.local_ai.tts_provider, - config.local_ai.stt_model_id, - config.local_ai.tts_voice_id - ); - - Ok(serde_json::json!({ - "stt_provider": config.local_ai.stt_provider, - "tts_provider": config.local_ai.tts_provider, - "stt_model_id": config.local_ai.stt_model_id, - "tts_voice_id": config.local_ai.tts_voice_id, - })) - }) -} - -fn handle_voice_update_provider_settings(params: Map) -> ControllerFuture { - Box::pin(async move { - use crate::openhuman::config::schema::voice_providers::{ - generate_voice_provider_id, is_voice_slug_reserved, SttApiStyle, TtsApiStyle, - VoiceCapability, VoiceProviderCreds, - }; - - let p = deserialize_params::(params)?; - let mut config = config_rpc::load_config_with_timeout().await?; - - if let Some(providers) = p.voice_providers { - let mut new_entries = Vec::with_capacity(providers.len()); - for update in providers { - let slug = update.slug.trim().to_lowercase(); - if is_voice_slug_reserved(&slug) { - return Err(format!( - "slug '{}' is reserved and cannot be used for a voice provider", - slug - )); - } - - let capability = match update.capability.as_deref() { - Some("stt") => VoiceCapability::Stt, - Some("tts") => VoiceCapability::Tts, - Some("both") | None => VoiceCapability::Both, - Some(other) => { - return Err(format!( - "invalid capability '{other}' (valid: 'stt', 'tts', 'both')" - )) - } - }; - - let auth_style = match update.auth_style.as_deref() { - Some("bearer") | None => crate::openhuman::config::schema::AuthStyle::Bearer, - Some("none") => crate::openhuman::config::schema::AuthStyle::None, - Some(other) => { - return Err(format!( - "invalid auth_style '{other}' for voice provider (valid: 'bearer', 'none')" - )) - } - }; - - let stt_api_style = match update.stt_api_style.as_deref() { - Some("deepgram") => SttApiStyle::Deepgram, - Some("openai_audio") | None => SttApiStyle::OpenaiAudio, - Some(other) => { - return Err(format!( - "invalid stt_api_style '{other}' (valid: 'openai_audio', 'deepgram')" - )) - } - }; - - let tts_api_style = match update.tts_api_style.as_deref() { - Some("elevenlabs") => TtsApiStyle::ElevenLabs, - Some("openai_audio") | None => TtsApiStyle::OpenaiAudio, - Some(other) => { - return Err(format!( - "invalid tts_api_style '{other}' (valid: 'openai_audio', 'elevenlabs')" - )) - } - }; - - let id = update - .id - .filter(|id| !id.trim().is_empty()) - .or_else(|| { - config - .voice_providers - .iter() - .find(|e| e.slug == slug) - .map(|e| e.id.clone()) - }) - .unwrap_or_else(|| generate_voice_provider_id(&slug)); - - let label = update.label.unwrap_or_else(|| slug.clone()); - - let endpoint = update.endpoint.unwrap_or_default(); - - new_entries.push(VoiceProviderCreds { - id, - slug, - label, - endpoint, - auth_style, - capability, - stt_api_style, - tts_api_style, - default_stt_model: update.default_stt_model, - default_tts_voice: update.default_tts_voice, - }); - } - config.voice_providers = new_entries; - } - - if let Some(stt) = p.stt_provider { - let trimmed = stt.trim(); - if !trimmed.is_empty() { - validate_stt_provider(trimmed)?; - config.stt_provider = Some(trimmed.to_string()); - // Sync to legacy field so voice_status / voice_stt_dispatch - // pick up the change without waiting for a restart. - config.local_ai.stt_provider = trimmed.to_string(); - } - } - - if let Some(tts) = p.tts_provider { - let trimmed = tts.trim(); - if !trimmed.is_empty() { - validate_tts_provider(trimmed)?; - config.tts_provider = Some(trimmed.to_string()); - config.local_ai.tts_provider = trimmed.to_string(); - } - } - - config.save().await.map_err(|e| e.to_string())?; - - log::debug!( - "[voice-factory] persisted voice provider settings: {} providers, stt={:?}, tts={:?}", - config.voice_providers.len(), - config.stt_provider, - config.tts_provider - ); - - let providers_json: Vec = config - .voice_providers - .iter() - .map(|p| { - serde_json::json!({ - "id": p.id, - "slug": p.slug, - "label": p.label, - "endpoint": p.endpoint, - "auth_style": p.auth_style.as_str(), - "capability": p.capability.as_str(), - "stt_api_style": serde_json::to_value(&p.stt_api_style).unwrap_or_default(), - "tts_api_style": serde_json::to_value(&p.tts_api_style).unwrap_or_default(), - "default_stt_model": p.default_stt_model, - "default_tts_voice": p.default_tts_voice, - }) - }) - .collect(); - - Ok(serde_json::json!({ - "voice_providers": providers_json, - "stt_provider": config.stt_provider, - "tts_provider": config.tts_provider, - })) - }) -} - -fn handle_voice_list_models(params: Map) -> ControllerFuture { - Box::pin(async move { - let p = deserialize_params::(params)?; - let config = config_rpc::load_config_with_timeout().await?; - let provider_id = p.provider_id.trim(); - let capability = p.capability.as_deref().unwrap_or("both"); - - log::debug!( - "[voice-factory] voice_list_models provider_id={provider_id} capability={capability}" - ); - - let entry = config - .voice_providers - .iter() - .find(|e| e.id == provider_id || e.slug == provider_id); - - let models: Vec = match entry.map(|e| e.slug.as_str()) { - Some("deepgram") if capability != "tts" => { - vec![ - serde_json::json!({"id": "nova-2", "label": "Nova-2 (recommended)"}), - serde_json::json!({"id": "nova-2-general", "label": "Nova-2 General"}), - serde_json::json!({"id": "nova-2-meeting", "label": "Nova-2 Meeting"}), - serde_json::json!({"id": "nova-2-phonecall", "label": "Nova-2 Phone Call"}), - serde_json::json!({"id": "enhanced", "label": "Enhanced"}), - serde_json::json!({"id": "base", "label": "Base"}), - ] - } - Some("openai") if capability == "stt" => { - vec![serde_json::json!({"id": "whisper-1", "label": "Whisper v1"})] - } - Some("openai") if capability == "tts" => { - vec![ - serde_json::json!({"id": "alloy", "label": "Alloy"}), - serde_json::json!({"id": "echo", "label": "Echo"}), - serde_json::json!({"id": "fable", "label": "Fable"}), - serde_json::json!({"id": "onyx", "label": "Onyx"}), - serde_json::json!({"id": "nova", "label": "Nova"}), - serde_json::json!({"id": "shimmer", "label": "Shimmer"}), - ] - } - Some("openai") => { - let mut models = - vec![serde_json::json!({"id": "whisper-1", "label": "Whisper v1 (STT)"})]; - models.extend([ - serde_json::json!({"id": "alloy", "label": "Alloy (TTS)"}), - serde_json::json!({"id": "echo", "label": "Echo (TTS)"}), - serde_json::json!({"id": "fable", "label": "Fable (TTS)"}), - serde_json::json!({"id": "onyx", "label": "Onyx (TTS)"}), - serde_json::json!({"id": "nova", "label": "Nova (TTS)"}), - serde_json::json!({"id": "shimmer", "label": "Shimmer (TTS)"}), - ]); - models - } - Some("elevenlabs") if capability != "stt" => { - // ElevenLabs voices require an API call; return empty and let - // the frontend fetch from /voices if a key is configured. - vec![] - } - _ => vec![], - }; - - Ok(serde_json::json!({ "models": models })) - }) -} - -fn handle_voice_test_provider(params: Map) -> ControllerFuture { - Box::pin(async move { - let p = deserialize_params::(params)?; - let config = config_rpc::load_config_with_timeout().await?; - let start = std::time::Instant::now(); - - log::debug!( - "[voice-factory] voice_test_provider workload={} provider={}", - p.workload, - p.provider - ); - - match p.workload.as_str() { - "stt" => { - let provider = - crate::openhuman::voice::create_stt_provider(&p.provider, "", &config) - .map_err(|e| e.to_string())?; - - // 0.1s of silence as WAV (8kHz mono 16-bit PCM). - let silent_wav = generate_silent_wav(); - let audio_b64 = { - use base64::Engine; - base64::engine::general_purpose::STANDARD.encode(&silent_wav) - }; - - match provider - .transcribe(&config, &audio_b64, Some("audio/wav"), None, Some("en")) - .await - { - Ok(_outcome) => { - let elapsed = start.elapsed().as_millis(); - Ok(serde_json::json!({ - "ok": true, - "detail": format!("STT test passed ({elapsed}ms)"), - "latency_ms": elapsed, - })) - } - Err(e) => Ok(serde_json::json!({ - "ok": false, - "detail": format!("STT test failed: {e}"), - })), - } - } - "tts" => { - let trimmed = p.provider.trim(); - if p.validate_only && !matches!(trimmed, "cloud" | "openhuman" | "piper" | "") { - match validate_tts_provider_key(trimmed, &config).await { - Ok(detail) => { - let elapsed = start.elapsed().as_millis(); - Ok(serde_json::json!({ - "ok": true, - "detail": format!("{detail} ({elapsed}ms)"), - "latency_ms": elapsed, - })) - } - Err(e) => Ok(serde_json::json!({ - "ok": false, - "detail": format!("TTS test failed: {e}"), - })), - } - } else { - let provider = - crate::openhuman::voice::create_tts_provider(trimmed, "", &config) - .map_err(|e| e.to_string())?; - match provider.synthesize(&config, "Hello", None).await { - Ok(_outcome) => { - let elapsed = start.elapsed().as_millis(); - Ok(serde_json::json!({ - "ok": true, - "detail": format!("TTS test passed ({elapsed}ms)"), - "latency_ms": elapsed, - })) - } - Err(e) => Ok(serde_json::json!({ - "ok": false, - "detail": format!("TTS test failed: {e}"), - })), - } - } - } - other => Err(format!("invalid workload '{other}' (valid: 'stt', 'tts')")), - } - }) -} - -/// Validate a TTS provider's API key by hitting a lightweight read-only endpoint -/// rather than synthesizing audio (which requires a valid voice ID). -async fn validate_tts_provider_key( - provider: &str, - config: &crate::openhuman::config::Config, -) -> Result { - let (slug, _model) = if let Some(pos) = provider.find(':') { - (&provider[..pos], &provider[pos + 1..]) - } else { - (provider, "") - }; - - let entry = config - .voice_providers - .iter() - .find(|p| p.slug == slug) - .ok_or_else(|| format!("no voice provider with slug '{slug}'"))?; - - let api_key = crate::openhuman::inference::provider::factory::lookup_key_for_slug(slug, config) - .unwrap_or_default(); - - if api_key.is_empty() { - return Err("no API key configured for this provider".to_string()); - } - - let endpoint = entry.endpoint.trim_end_matches('/'); - let client = reqwest::Client::new(); - - // ElevenLabs: GET /user/subscription requires only basic auth (no - // extra scopes like voices_read). OpenAI / generic: GET /models. - let url = if slug == "elevenlabs" { - format!("{endpoint}/user/subscription") - } else { - format!("{endpoint}/models") - }; - - let mut req = client.get(&url); - if slug == "elevenlabs" { - req = req.header("xi-api-key", &api_key); - } else { - req = req.header("Authorization", format!("Bearer {api_key}")); - } - - let resp = req - .send() - .await - .map_err(|e| format!("request failed: {e}"))?; - - if resp.status().is_success() { - Ok("TTS provider key is valid".to_string()) - } else { - let status = resp.status(); - let body = resp.text().await.unwrap_or_default(); - Err(format!("API returned {status}: {body}")) - } -} - -/// Generate a minimal WAV file with ~0.1s of silence (8kHz mono 16-bit PCM). -fn generate_silent_wav() -> Vec { - let sample_rate: u32 = 8000; - let num_samples: u32 = 800; // 0.1s - let bits_per_sample: u16 = 16; - let num_channels: u16 = 1; - let byte_rate = sample_rate * u32::from(num_channels) * u32::from(bits_per_sample) / 8; - let block_align = num_channels * bits_per_sample / 8; - let data_size = num_samples * u32::from(block_align); - let file_size = 36 + data_size; - - let mut wav = Vec::with_capacity(44 + data_size as usize); - wav.extend_from_slice(b"RIFF"); - wav.extend_from_slice(&file_size.to_le_bytes()); - wav.extend_from_slice(b"WAVE"); - wav.extend_from_slice(b"fmt "); - wav.extend_from_slice(&16u32.to_le_bytes()); // subchunk1 size - wav.extend_from_slice(&1u16.to_le_bytes()); // PCM - wav.extend_from_slice(&num_channels.to_le_bytes()); - wav.extend_from_slice(&sample_rate.to_le_bytes()); - wav.extend_from_slice(&byte_rate.to_le_bytes()); - wav.extend_from_slice(&block_align.to_le_bytes()); - wav.extend_from_slice(&bits_per_sample.to_le_bytes()); - wav.extend_from_slice(b"data"); - wav.extend_from_slice(&data_size.to_le_bytes()); - wav.extend(std::iter::repeat(0u8).take(data_size as usize)); - wav -} - -fn validate_stt_provider(provider: &str) -> Result<(), String> { - match provider { - "cloud" | "openhuman" | "whisper" => Ok(()), - other => { - // Accept slug:model grammar or bare slug — the factory will - // validate against the voice_providers registry at dispatch time. - if other.contains(':') || !other.is_empty() { - Ok(()) - } else { - Err(format!( - "invalid stt_provider '{other}' (valid: 'cloud', 'whisper', or ':')" - )) - } - } - } -} - -fn validate_tts_provider(provider: &str) -> Result<(), String> { - match provider { - "cloud" | "openhuman" | "piper" => Ok(()), - other => { - if other.contains(':') || !other.is_empty() { - Ok(()) - } else { - Err(format!( - "invalid tts_provider '{other}' (valid: 'cloud', 'piper', or ':')" - )) - } - } - } -} - -fn effective_stt_provider(config: &crate::openhuman::config::Config) -> String { - crate::openhuman::voice::effective_stt_provider(config) -} - -fn effective_tts_provider(config: &crate::openhuman::config::Config) -> String { - crate::openhuman::voice::effective_tts_provider(config) -} - -fn handle_voice_server_start(params: Map) -> ControllerFuture { - Box::pin(async move { - use crate::openhuman::voice::hotkey::ActivationMode; - use crate::openhuman::voice::server::{global_server, VoiceServerConfig}; - - let config = config_rpc::load_config_with_timeout().await?; - - let hotkey = params - .get("hotkey") - .and_then(|v| v.as_str()) - .unwrap_or(&config.voice_server.hotkey) - .to_string(); - - let activation_mode = match params.get("activation_mode").and_then(|v| v.as_str()) { - Some("push") => ActivationMode::Push, - Some("tap") => ActivationMode::Tap, - Some(other) => { - log::warn!( - "[voice_server] unrecognized activation_mode '{}', defaulting to Push", - other - ); - ActivationMode::Push - } - None => match config.voice_server.activation_mode { - crate::openhuman::config::VoiceActivationMode::Push => ActivationMode::Push, - crate::openhuman::config::VoiceActivationMode::Tap => ActivationMode::Tap, - }, - }; - - let skip_cleanup = params - .get("skip_cleanup") - .and_then(|v| v.as_bool()) - .unwrap_or(config.voice_server.skip_cleanup); - - let server_config = VoiceServerConfig { - hotkey, - activation_mode, - skip_cleanup, - context: None, - min_duration_secs: config.voice_server.min_duration_secs, - silence_threshold: config.voice_server.silence_threshold, - custom_dictionary: config.voice_server.custom_dictionary.clone(), - }; - - // Check if a server is already running with a different config. - if let Some(existing) = crate::openhuman::voice::server::try_global_server() { - let existing_status = existing.status().await; - if existing_status.state != crate::openhuman::voice::server::ServerState::Stopped { - if existing_status.hotkey != server_config.hotkey - || existing_status.activation_mode != server_config.activation_mode - { - return Err(format!( - "voice server already running (hotkey={}, mode={:?}); \ - stop it first before starting with different config", - existing_status.hotkey, existing_status.activation_mode - )); - } - // Same config, already running — return current status. - return serde_json::to_value(existing_status) - .map_err(|e| format!("serialize error: {e}")); - } - } - - let server = global_server(server_config); - let config_clone = config.clone(); - let server_for_err = server.clone(); - - tokio::spawn(async move { - if let Err(e) = server.run(&config_clone).await { - log::error!("[voice_server] server exited with error: {e}"); - server_for_err.set_last_error(&e).await; - } - }); - - // Give the server a moment to start. - tokio::time::sleep(std::time::Duration::from_millis(200)).await; - - if let Some(s) = crate::openhuman::voice::server::try_global_server() { - let status = s.status().await; - serde_json::to_value(status).map_err(|e| format!("serialize error: {e}")) - } else { - Err("voice server failed to initialize".to_string()) - } - }) -} - -fn handle_voice_server_stop(_params: Map) -> ControllerFuture { - Box::pin(async move { - if let Some(server) = crate::openhuman::voice::server::try_global_server() { - server.stop().await; - tokio::time::sleep(std::time::Duration::from_millis(200)).await; - let status = server.status().await; - serde_json::to_value(status).map_err(|e| format!("serialize error: {e}")) - } else { - // Not running — return a stopped status rather than an error. - let status = crate::openhuman::voice::server::VoiceServerStatus { - state: crate::openhuman::voice::server::ServerState::Stopped, - hotkey: String::new(), - activation_mode: crate::openhuman::voice::hotkey::ActivationMode::Push, - transcription_count: 0, - last_error: None, - }; - serde_json::to_value(status).map_err(|e| format!("serialize error: {e}")) - } - }) -} - -fn handle_voice_server_status(_params: Map) -> ControllerFuture { - Box::pin(async move { - if let Some(server) = crate::openhuman::voice::server::try_global_server() { - let status = server.status().await; - serde_json::to_value(status).map_err(|e| format!("serialize error: {e}")) - } else { - let status = crate::openhuman::voice::server::VoiceServerStatus { - state: crate::openhuman::voice::server::ServerState::Stopped, - hotkey: String::new(), - activation_mode: crate::openhuman::voice::hotkey::ActivationMode::Push, - transcription_count: 0, - last_error: None, - }; - serde_json::to_value(status).map_err(|e| format!("serialize error: {e}")) - } - }) -} - -fn handle_overlay_stt_notify(params: Map) -> ControllerFuture { - Box::pin(async move { - let p = deserialize_params::(params)?; - log::debug!( - "[overlay_stt_notify] state={:?}, has_text={}, text_len={}", - p.state, - p.text.is_some(), - p.text.as_deref().map_or(0, |t| t.len()) - ); - - use crate::openhuman::voice::dictation_listener::{ - publish_dictation_event, publish_transcription, DictationEvent, - }; - - match p.state { - OverlaySttState::RecordingStarted => { - publish_dictation_event(DictationEvent { - event_type: "pressed".to_string(), - hotkey: "chat_button".to_string(), - activation_mode: "toggle".to_string(), - }); - } - OverlaySttState::TranscriptionDone => { - let text = p.text.ok_or_else(|| { - "invalid params: `text` is required for transcription_done".to_string() - })?; - publish_transcription(text); - publish_dictation_event(DictationEvent { - event_type: "released".to_string(), - hotkey: "chat_button".to_string(), - activation_mode: "toggle".to_string(), - }); - } - OverlaySttState::Cancelled | OverlaySttState::Error => { - publish_dictation_event(DictationEvent { - event_type: "released".to_string(), - hotkey: "chat_button".to_string(), - activation_mode: "toggle".to_string(), - }); - } - } - - Ok(serde_json::json!({ "ok": true })) - }) -} - -// --------------------------------------------------------------------------- -// Helpers -// --------------------------------------------------------------------------- - -fn to_json(outcome: RpcOutcome) -> Result { - let json_val = - serde_json::to_value(outcome.value).map_err(|e| format!("serialize error: {e}"))?; - Ok(json_val) -} - -fn deserialize_params(params: Map) -> Result { - serde_json::from_value(Value::Object(params)).map_err(|e| format!("invalid params: {e}")) -} - -fn required_string(name: &'static str, comment: &'static str) -> FieldSchema { - FieldSchema { - name, - ty: TypeSchema::String, - comment, - required: true, - } -} - -fn optional_string(name: &'static str, comment: &'static str) -> FieldSchema { - FieldSchema { - name, - ty: TypeSchema::Option(Box::new(TypeSchema::String)), - comment, - required: false, - } -} - -fn optional_bool(name: &'static str, comment: &'static str) -> FieldSchema { - FieldSchema { - name, - ty: TypeSchema::Option(Box::new(TypeSchema::Bool)), - comment, - required: false, - } -} - -fn json_output(name: &'static str, comment: &'static str) -> FieldSchema { - FieldSchema { - name, - ty: TypeSchema::Json, - comment, - required: true, - } -} - -#[cfg(test)] -#[path = "schemas_tests.rs"] -mod tests; diff --git a/src/openhuman/voice/schemas/handlers.rs b/src/openhuman/voice/schemas/handlers.rs new file mode 100644 index 0000000000..d43fd63473 --- /dev/null +++ b/src/openhuman/voice/schemas/handlers.rs @@ -0,0 +1,19 @@ +//! RPC handler implementations for the voice domain. +//! +//! Handlers are split into two sub-modules by concern: +//! - `transcribe_tts`: transcription, synthesis, and factory-dispatch handlers +//! - `provider_server`: provider settings, model listing, testing, and server lifecycle + +mod provider_server; +mod transcribe_tts; + +pub(super) use provider_server::{ + handle_overlay_stt_notify, handle_voice_list_models, handle_voice_server_start, + handle_voice_server_status, handle_voice_server_stop, handle_voice_set_providers, + handle_voice_test_provider, handle_voice_update_provider_settings, +}; +pub(super) use transcribe_tts::{ + handle_voice_cloud_transcribe, handle_voice_reply_synthesize, handle_voice_status, + handle_voice_stt_dispatch, handle_voice_transcribe, handle_voice_transcribe_bytes, + handle_voice_tts, handle_voice_tts_dispatch, +}; diff --git a/src/openhuman/voice/schemas/handlers/provider_server.rs b/src/openhuman/voice/schemas/handlers/provider_server.rs new file mode 100644 index 0000000000..3d643b4669 --- /dev/null +++ b/src/openhuman/voice/schemas/handlers/provider_server.rs @@ -0,0 +1,560 @@ +//! Handlers for provider settings, model listing, provider testing, and server lifecycle. + +use serde_json::{Map, Value}; + +use crate::core::all::ControllerFuture; +use crate::openhuman::config::rpc as config_rpc; + +use crate::openhuman::voice::schemas::helpers::{ + deserialize_params, generate_silent_wav, validate_stt_provider, validate_tts_provider, + validate_tts_provider_key, +}; +use crate::openhuman::voice::schemas::params::{ + OverlaySttNotifyParams, OverlaySttState, SetProvidersParams, VoiceListModelsParams, + VoiceTestProviderParams, VoiceUpdateProviderSettingsParams, +}; + +// --------------------------------------------------------------------------- +// Provider configuration handlers +// --------------------------------------------------------------------------- + +pub(crate) fn handle_voice_set_providers(params: Map) -> ControllerFuture { + Box::pin(async move { + let p = deserialize_params::(params)?; + let mut config = config_rpc::load_config_with_timeout().await?; + + if let Some(stt) = p + .stt_provider + .as_deref() + .map(str::trim) + .filter(|s| !s.is_empty()) + { + validate_stt_provider(stt)?; + config.local_ai.stt_provider = stt.to_string(); + config.stt_provider = Some(stt.to_string()); + } + if let Some(tts) = p + .tts_provider + .as_deref() + .map(str::trim) + .filter(|s| !s.is_empty()) + { + validate_tts_provider(tts)?; + config.local_ai.tts_provider = tts.to_string(); + config.tts_provider = Some(tts.to_string()); + } + if let Some(model) = p + .stt_model + .as_deref() + .map(str::trim) + .filter(|s| !s.is_empty()) + { + config.local_ai.stt_model_id = model.to_string(); + } + if let Some(voice) = p + .tts_voice + .as_deref() + .map(str::trim) + .filter(|s| !s.is_empty()) + { + config.local_ai.tts_voice_id = voice.to_string(); + } + + config.save().await.map_err(|e| e.to_string())?; + log::debug!( + "[voice-factory] persisted providers stt={} tts={} stt_model={} tts_voice={}", + config.local_ai.stt_provider, + config.local_ai.tts_provider, + config.local_ai.stt_model_id, + config.local_ai.tts_voice_id + ); + + Ok(serde_json::json!({ + "stt_provider": config.local_ai.stt_provider, + "tts_provider": config.local_ai.tts_provider, + "stt_model_id": config.local_ai.stt_model_id, + "tts_voice_id": config.local_ai.tts_voice_id, + })) + }) +} + +pub(crate) fn handle_voice_update_provider_settings( + params: Map, +) -> ControllerFuture { + Box::pin(async move { + use crate::openhuman::config::schema::voice_providers::{ + generate_voice_provider_id, is_voice_slug_reserved, SttApiStyle, TtsApiStyle, + VoiceCapability, VoiceProviderCreds, + }; + + let p = deserialize_params::(params)?; + let mut config = config_rpc::load_config_with_timeout().await?; + + if let Some(providers) = p.voice_providers { + let mut new_entries = Vec::with_capacity(providers.len()); + for update in providers { + let slug = update.slug.trim().to_lowercase(); + if is_voice_slug_reserved(&slug) { + return Err(format!( + "slug '{}' is reserved and cannot be used for a voice provider", + slug + )); + } + + let capability = match update.capability.as_deref() { + Some("stt") => VoiceCapability::Stt, + Some("tts") => VoiceCapability::Tts, + Some("both") | None => VoiceCapability::Both, + Some(other) => { + return Err(format!( + "invalid capability '{other}' (valid: 'stt', 'tts', 'both')" + )) + } + }; + + let auth_style = match update.auth_style.as_deref() { + Some("bearer") | None => crate::openhuman::config::schema::AuthStyle::Bearer, + Some("none") => crate::openhuman::config::schema::AuthStyle::None, + Some(other) => { + return Err(format!( + "invalid auth_style '{other}' for voice provider (valid: 'bearer', 'none')" + )) + } + }; + + let stt_api_style = match update.stt_api_style.as_deref() { + Some("deepgram") => SttApiStyle::Deepgram, + Some("openai_audio") | None => SttApiStyle::OpenaiAudio, + Some(other) => { + return Err(format!( + "invalid stt_api_style '{other}' (valid: 'openai_audio', 'deepgram')" + )) + } + }; + + let tts_api_style = match update.tts_api_style.as_deref() { + Some("elevenlabs") => TtsApiStyle::ElevenLabs, + Some("openai_audio") | None => TtsApiStyle::OpenaiAudio, + Some(other) => { + return Err(format!( + "invalid tts_api_style '{other}' (valid: 'openai_audio', 'elevenlabs')" + )) + } + }; + + let id = update + .id + .filter(|id| !id.trim().is_empty()) + .or_else(|| { + config + .voice_providers + .iter() + .find(|e| e.slug == slug) + .map(|e| e.id.clone()) + }) + .unwrap_or_else(|| generate_voice_provider_id(&slug)); + + let label = update.label.unwrap_or_else(|| slug.clone()); + + let endpoint = update.endpoint.unwrap_or_default(); + + new_entries.push(VoiceProviderCreds { + id, + slug, + label, + endpoint, + auth_style, + capability, + stt_api_style, + tts_api_style, + default_stt_model: update.default_stt_model, + default_tts_voice: update.default_tts_voice, + }); + } + config.voice_providers = new_entries; + } + + if let Some(stt) = p.stt_provider { + let trimmed = stt.trim(); + if !trimmed.is_empty() { + validate_stt_provider(trimmed)?; + config.stt_provider = Some(trimmed.to_string()); + // Sync to legacy field so voice_status / voice_stt_dispatch + // pick up the change without waiting for a restart. + config.local_ai.stt_provider = trimmed.to_string(); + } + } + + if let Some(tts) = p.tts_provider { + let trimmed = tts.trim(); + if !trimmed.is_empty() { + validate_tts_provider(trimmed)?; + config.tts_provider = Some(trimmed.to_string()); + config.local_ai.tts_provider = trimmed.to_string(); + } + } + + config.save().await.map_err(|e| e.to_string())?; + + log::debug!( + "[voice-factory] persisted voice provider settings: {} providers, stt={:?}, tts={:?}", + config.voice_providers.len(), + config.stt_provider, + config.tts_provider + ); + + let providers_json: Vec = config + .voice_providers + .iter() + .map(|p| { + serde_json::json!({ + "id": p.id, + "slug": p.slug, + "label": p.label, + "endpoint": p.endpoint, + "auth_style": p.auth_style.as_str(), + "capability": p.capability.as_str(), + "stt_api_style": serde_json::to_value(&p.stt_api_style).unwrap_or_default(), + "tts_api_style": serde_json::to_value(&p.tts_api_style).unwrap_or_default(), + "default_stt_model": p.default_stt_model, + "default_tts_voice": p.default_tts_voice, + }) + }) + .collect(); + + Ok(serde_json::json!({ + "voice_providers": providers_json, + "stt_provider": config.stt_provider, + "tts_provider": config.tts_provider, + })) + }) +} + +pub(crate) fn handle_voice_list_models(params: Map) -> ControllerFuture { + Box::pin(async move { + let p = deserialize_params::(params)?; + let config = config_rpc::load_config_with_timeout().await?; + let provider_id = p.provider_id.trim(); + let capability = p.capability.as_deref().unwrap_or("both"); + + log::debug!( + "[voice-factory] voice_list_models provider_id={provider_id} capability={capability}" + ); + + let entry = config + .voice_providers + .iter() + .find(|e| e.id == provider_id || e.slug == provider_id); + + let models: Vec = match entry.map(|e| e.slug.as_str()) { + Some("deepgram") if capability != "tts" => { + vec![ + serde_json::json!({"id": "nova-2", "label": "Nova-2 (recommended)"}), + serde_json::json!({"id": "nova-2-general", "label": "Nova-2 General"}), + serde_json::json!({"id": "nova-2-meeting", "label": "Nova-2 Meeting"}), + serde_json::json!({"id": "nova-2-phonecall", "label": "Nova-2 Phone Call"}), + serde_json::json!({"id": "enhanced", "label": "Enhanced"}), + serde_json::json!({"id": "base", "label": "Base"}), + ] + } + Some("openai") if capability == "stt" => { + vec![serde_json::json!({"id": "whisper-1", "label": "Whisper v1"})] + } + Some("openai") if capability == "tts" => { + vec![ + serde_json::json!({"id": "alloy", "label": "Alloy"}), + serde_json::json!({"id": "echo", "label": "Echo"}), + serde_json::json!({"id": "fable", "label": "Fable"}), + serde_json::json!({"id": "onyx", "label": "Onyx"}), + serde_json::json!({"id": "nova", "label": "Nova"}), + serde_json::json!({"id": "shimmer", "label": "Shimmer"}), + ] + } + Some("openai") => { + let mut models = + vec![serde_json::json!({"id": "whisper-1", "label": "Whisper v1 (STT)"})]; + models.extend([ + serde_json::json!({"id": "alloy", "label": "Alloy (TTS)"}), + serde_json::json!({"id": "echo", "label": "Echo (TTS)"}), + serde_json::json!({"id": "fable", "label": "Fable (TTS)"}), + serde_json::json!({"id": "onyx", "label": "Onyx (TTS)"}), + serde_json::json!({"id": "nova", "label": "Nova (TTS)"}), + serde_json::json!({"id": "shimmer", "label": "Shimmer (TTS)"}), + ]); + models + } + Some("elevenlabs") if capability != "stt" => { + // ElevenLabs voices require an API call; return empty and let + // the frontend fetch from /voices if a key is configured. + vec![] + } + _ => vec![], + }; + + Ok(serde_json::json!({ "models": models })) + }) +} + +pub(crate) fn handle_voice_test_provider(params: Map) -> ControllerFuture { + Box::pin(async move { + let p = deserialize_params::(params)?; + let config = config_rpc::load_config_with_timeout().await?; + let start = std::time::Instant::now(); + + log::debug!( + "[voice-factory] voice_test_provider workload={} provider={}", + p.workload, + p.provider + ); + + match p.workload.as_str() { + "stt" => { + let provider = + crate::openhuman::voice::create_stt_provider(&p.provider, "", &config) + .map_err(|e| e.to_string())?; + + // 0.1s of silence as WAV (8kHz mono 16-bit PCM). + let silent_wav = generate_silent_wav(); + let audio_b64 = { + use base64::Engine; + base64::engine::general_purpose::STANDARD.encode(&silent_wav) + }; + + match provider + .transcribe(&config, &audio_b64, Some("audio/wav"), None, Some("en")) + .await + { + Ok(_outcome) => { + let elapsed = start.elapsed().as_millis(); + Ok(serde_json::json!({ + "ok": true, + "detail": format!("STT test passed ({elapsed}ms)"), + "latency_ms": elapsed, + })) + } + Err(e) => Ok(serde_json::json!({ + "ok": false, + "detail": format!("STT test failed: {e}"), + })), + } + } + "tts" => { + let trimmed = p.provider.trim(); + if p.validate_only && !matches!(trimmed, "cloud" | "openhuman" | "piper" | "") { + match validate_tts_provider_key(trimmed, &config).await { + Ok(detail) => { + let elapsed = start.elapsed().as_millis(); + Ok(serde_json::json!({ + "ok": true, + "detail": format!("{detail} ({elapsed}ms)"), + "latency_ms": elapsed, + })) + } + Err(e) => Ok(serde_json::json!({ + "ok": false, + "detail": format!("TTS test failed: {e}"), + })), + } + } else { + let provider = + crate::openhuman::voice::create_tts_provider(trimmed, "", &config) + .map_err(|e| e.to_string())?; + match provider.synthesize(&config, "Hello", None).await { + Ok(_outcome) => { + let elapsed = start.elapsed().as_millis(); + Ok(serde_json::json!({ + "ok": true, + "detail": format!("TTS test passed ({elapsed}ms)"), + "latency_ms": elapsed, + })) + } + Err(e) => Ok(serde_json::json!({ + "ok": false, + "detail": format!("TTS test failed: {e}"), + })), + } + } + } + other => Err(format!("invalid workload '{other}' (valid: 'stt', 'tts')")), + } + }) +} + +// --------------------------------------------------------------------------- +// Voice server lifecycle handlers +// --------------------------------------------------------------------------- + +pub(crate) fn handle_voice_server_start(params: Map) -> ControllerFuture { + Box::pin(async move { + use crate::openhuman::voice::hotkey::ActivationMode; + use crate::openhuman::voice::server::{global_server, VoiceServerConfig}; + + let config = config_rpc::load_config_with_timeout().await?; + + let hotkey = params + .get("hotkey") + .and_then(|v| v.as_str()) + .unwrap_or(&config.voice_server.hotkey) + .to_string(); + + let activation_mode = match params.get("activation_mode").and_then(|v| v.as_str()) { + Some("push") => ActivationMode::Push, + Some("tap") => ActivationMode::Tap, + Some(other) => { + log::warn!( + "[voice_server] unrecognized activation_mode '{}', defaulting to Push", + other + ); + ActivationMode::Push + } + None => match config.voice_server.activation_mode { + crate::openhuman::config::VoiceActivationMode::Push => ActivationMode::Push, + crate::openhuman::config::VoiceActivationMode::Tap => ActivationMode::Tap, + }, + }; + + let skip_cleanup = params + .get("skip_cleanup") + .and_then(|v| v.as_bool()) + .unwrap_or(config.voice_server.skip_cleanup); + + let server_config = VoiceServerConfig { + hotkey, + activation_mode, + skip_cleanup, + context: None, + min_duration_secs: config.voice_server.min_duration_secs, + silence_threshold: config.voice_server.silence_threshold, + custom_dictionary: config.voice_server.custom_dictionary.clone(), + }; + + // Check if a server is already running with a different config. + if let Some(existing) = crate::openhuman::voice::server::try_global_server() { + let existing_status = existing.status().await; + if existing_status.state != crate::openhuman::voice::server::ServerState::Stopped { + if existing_status.hotkey != server_config.hotkey + || existing_status.activation_mode != server_config.activation_mode + { + return Err(format!( + "voice server already running (hotkey={}, mode={:?}); \ + stop it first before starting with different config", + existing_status.hotkey, existing_status.activation_mode + )); + } + // Same config, already running — return current status. + return serde_json::to_value(existing_status) + .map_err(|e| format!("serialize error: {e}")); + } + } + + let server = global_server(server_config); + let config_clone = config.clone(); + let server_for_err = server.clone(); + + tokio::spawn(async move { + if let Err(e) = server.run(&config_clone).await { + log::error!("[voice_server] server exited with error: {e}"); + server_for_err.set_last_error(&e).await; + } + }); + + // Give the server a moment to start. + tokio::time::sleep(std::time::Duration::from_millis(200)).await; + + if let Some(s) = crate::openhuman::voice::server::try_global_server() { + let status = s.status().await; + serde_json::to_value(status).map_err(|e| format!("serialize error: {e}")) + } else { + Err("voice server failed to initialize".to_string()) + } + }) +} + +pub(crate) fn handle_voice_server_stop(_params: Map) -> ControllerFuture { + Box::pin(async move { + if let Some(server) = crate::openhuman::voice::server::try_global_server() { + server.stop().await; + tokio::time::sleep(std::time::Duration::from_millis(200)).await; + let status = server.status().await; + serde_json::to_value(status).map_err(|e| format!("serialize error: {e}")) + } else { + // Not running — return a stopped status rather than an error. + let status = crate::openhuman::voice::server::VoiceServerStatus { + state: crate::openhuman::voice::server::ServerState::Stopped, + hotkey: String::new(), + activation_mode: crate::openhuman::voice::hotkey::ActivationMode::Push, + transcription_count: 0, + last_error: None, + }; + serde_json::to_value(status).map_err(|e| format!("serialize error: {e}")) + } + }) +} + +pub(crate) fn handle_voice_server_status(_params: Map) -> ControllerFuture { + Box::pin(async move { + if let Some(server) = crate::openhuman::voice::server::try_global_server() { + let status = server.status().await; + serde_json::to_value(status).map_err(|e| format!("serialize error: {e}")) + } else { + let status = crate::openhuman::voice::server::VoiceServerStatus { + state: crate::openhuman::voice::server::ServerState::Stopped, + hotkey: String::new(), + activation_mode: crate::openhuman::voice::hotkey::ActivationMode::Push, + transcription_count: 0, + last_error: None, + }; + serde_json::to_value(status).map_err(|e| format!("serialize error: {e}")) + } + }) +} + +// --------------------------------------------------------------------------- +// Overlay STT notify handler +// --------------------------------------------------------------------------- + +pub(crate) fn handle_overlay_stt_notify(params: Map) -> ControllerFuture { + Box::pin(async move { + let p = deserialize_params::(params)?; + log::debug!( + "[overlay_stt_notify] state={:?}, has_text={}, text_len={}", + p.state, + p.text.is_some(), + p.text.as_deref().map_or(0, |t| t.len()) + ); + + use crate::openhuman::voice::dictation_listener::{ + publish_dictation_event, publish_transcription, DictationEvent, + }; + + match p.state { + OverlaySttState::RecordingStarted => { + publish_dictation_event(DictationEvent { + event_type: "pressed".to_string(), + hotkey: "chat_button".to_string(), + activation_mode: "toggle".to_string(), + }); + } + OverlaySttState::TranscriptionDone => { + let text = p.text.ok_or_else(|| { + "invalid params: `text` is required for transcription_done".to_string() + })?; + publish_transcription(text); + publish_dictation_event(DictationEvent { + event_type: "released".to_string(), + hotkey: "chat_button".to_string(), + activation_mode: "toggle".to_string(), + }); + } + OverlaySttState::Cancelled | OverlaySttState::Error => { + publish_dictation_event(DictationEvent { + event_type: "released".to_string(), + hotkey: "chat_button".to_string(), + activation_mode: "toggle".to_string(), + }); + } + } + + Ok(serde_json::json!({ "ok": true })) + }) +} diff --git a/src/openhuman/voice/schemas/handlers/transcribe_tts.rs b/src/openhuman/voice/schemas/handlers/transcribe_tts.rs new file mode 100644 index 0000000000..4b06f10b98 --- /dev/null +++ b/src/openhuman/voice/schemas/handlers/transcribe_tts.rs @@ -0,0 +1,218 @@ +//! Handlers for transcription, TTS synthesis, and factory-dispatch RPCs. + +use serde_json::{Map, Value}; + +use crate::core::all::ControllerFuture; +use crate::openhuman::config::rpc as config_rpc; + +use crate::openhuman::voice::schemas::helpers::{ + deserialize_params, effective_stt_provider, effective_tts_provider, to_json, +}; +use crate::openhuman::voice::schemas::params::{ + CloudTranscribeParams, ReplySynthesizeParams, SttDispatchParams, TranscribeBytesParams, + TranscribeParams, TtsDispatchParams, TtsParams, +}; + +pub(crate) fn handle_voice_status(_params: Map) -> ControllerFuture { + Box::pin(async move { + let config = config_rpc::load_config_with_timeout().await?; + to_json(crate::openhuman::voice::voice_status(&config).await?) + }) +} + +pub(crate) fn handle_voice_transcribe(params: Map) -> ControllerFuture { + Box::pin(async move { + let config = config_rpc::load_config_with_timeout().await?; + let p = deserialize_params::(params)?; + to_json( + crate::openhuman::voice::voice_transcribe( + &config, + &p.audio_path, + p.context.as_deref(), + p.skip_cleanup, + ) + .await?, + ) + }) +} + +pub(crate) fn handle_voice_transcribe_bytes(params: Map) -> ControllerFuture { + Box::pin(async move { + let config = config_rpc::load_config_with_timeout().await?; + let p = deserialize_params::(params)?; + to_json( + crate::openhuman::voice::voice_transcribe_bytes( + &config, + &p.audio_bytes, + p.extension, + p.context.as_deref(), + p.skip_cleanup, + ) + .await?, + ) + }) +} + +pub(crate) fn handle_voice_tts(params: Map) -> ControllerFuture { + Box::pin(async move { + let config = config_rpc::load_config_with_timeout().await?; + let p = deserialize_params::(params)?; + to_json( + crate::openhuman::voice::voice_tts(&config, &p.text, p.output_path.as_deref()).await?, + ) + }) +} + +pub(crate) fn handle_voice_reply_synthesize(params: Map) -> ControllerFuture { + Box::pin(async move { + let config = config_rpc::load_config_with_timeout().await?; + let p = deserialize_params::(params)?; + // Dispatch through the TTS factory so the user's `tts_provider` + // setting (cloud / piper / …) is honored on the spoken-reply path, + // not just the dedicated `voice_tts_dispatch` RPC. Without this + // routing, the settings dropdown was effectively decorative — + // selecting "piper" persisted to config but conversation replies + // still hit the cloud TTS proxy. + let provider_name = effective_tts_provider(&config); + // Only default to the Piper voice id when the active provider is + // actually Piper. Passing a Piper voice id to a cloud TTS provider + // would send an invalid voice to the upstream API. + let voice = p + .voice_id + .as_deref() + .map(str::trim) + .filter(|s| !s.is_empty()) + .map(str::to_string) + .unwrap_or_else(|| { + if provider_name == "piper" { + crate::openhuman::voice::DEFAULT_PIPER_VOICE.to_string() + } else { + String::new() + } + }); + let effective_voice = if voice.is_empty() { + None + } else { + Some(voice.as_str()) + }; + log::debug!( + "[voice-factory] voice_reply_synthesize dispatch provider={provider_name} voice={voice}" + ); + let provider = + crate::openhuman::voice::create_tts_provider(&provider_name, &voice, &config) + .map_err(|e| e.to_string())?; + to_json( + provider + .synthesize(&config, &p.text, effective_voice) + .await?, + ) + }) +} + +pub(crate) fn handle_voice_cloud_transcribe(params: Map) -> ControllerFuture { + Box::pin(async move { + let config = config_rpc::load_config_with_timeout().await?; + let p = deserialize_params::(params)?; + let opts = crate::openhuman::voice::cloud_transcribe::CloudTranscribeOptions { + model: p.model, + language: p.language, + mime_type: p.mime_type, + file_name: p.file_name, + }; + to_json( + crate::openhuman::voice::cloud_transcribe::transcribe_cloud( + &config, + &p.audio_base64, + &opts, + ) + .await?, + ) + }) +} + +pub(crate) fn handle_voice_stt_dispatch(params: Map) -> ControllerFuture { + Box::pin(async move { + let config = config_rpc::load_config_with_timeout().await?; + let p = deserialize_params::(params)?; + let provider_name = p + .provider + .as_deref() + .map(str::trim) + .filter(|s| !s.is_empty()) + .map(str::to_string) + .unwrap_or_else(|| effective_stt_provider(&config)); + let model = p + .model + .as_deref() + .map(str::trim) + .filter(|s| !s.is_empty()) + .map(str::to_string) + .unwrap_or_else(|| crate::openhuman::voice::DEFAULT_WHISPER_MODEL.to_string()); + + log::debug!( + "[voice-factory] RPC voice_stt_dispatch provider={provider_name} model={model}" + ); + let provider = + crate::openhuman::voice::create_stt_provider(&provider_name, &model, &config) + .map_err(|e| e.to_string())?; + let outcome = provider + .transcribe( + &config, + &p.audio_base64, + p.mime_type.as_deref(), + p.file_name.as_deref(), + p.language.as_deref(), + ) + .await?; + let value = serde_json::json!({ + "text": outcome.value.text, + "provider": outcome.value.provider, + }); + Ok(value) + }) +} + +pub(crate) fn handle_voice_tts_dispatch(params: Map) -> ControllerFuture { + Box::pin(async move { + let config = config_rpc::load_config_with_timeout().await?; + let p = deserialize_params::(params)?; + let provider_name = p + .provider + .as_deref() + .map(str::trim) + .filter(|s| !s.is_empty()) + .map(str::to_string) + .unwrap_or_else(|| effective_tts_provider(&config)); + // Only fall back to the Piper default voice id when the provider is + // Piper; sending a Piper voice id to a cloud TTS endpoint is invalid. + let voice = p + .voice + .as_deref() + .map(str::trim) + .filter(|s| !s.is_empty()) + .map(str::to_string) + .unwrap_or_else(|| { + if provider_name == "piper" { + crate::openhuman::voice::DEFAULT_PIPER_VOICE.to_string() + } else { + String::new() + } + }); + let effective_voice = if voice.is_empty() { + None + } else { + Some(voice.as_str()) + }; + + log::debug!( + "[voice-factory] RPC voice_tts_dispatch provider={provider_name} voice={voice}" + ); + let provider = + crate::openhuman::voice::create_tts_provider(&provider_name, &voice, &config) + .map_err(|e| e.to_string())?; + let outcome = provider + .synthesize(&config, &p.text, effective_voice) + .await?; + to_json(outcome) + }) +} diff --git a/src/openhuman/voice/schemas/helpers.rs b/src/openhuman/voice/schemas/helpers.rs new file mode 100644 index 0000000000..1b79c4904e --- /dev/null +++ b/src/openhuman/voice/schemas/helpers.rs @@ -0,0 +1,181 @@ +//! Shared helper utilities for voice controller schemas. + +use serde::de::DeserializeOwned; +use serde_json::{Map, Value}; + +use crate::core::{FieldSchema, TypeSchema}; +use crate::rpc::RpcOutcome; + +pub(super) fn to_json(outcome: RpcOutcome) -> Result { + let json_val = + serde_json::to_value(outcome.value).map_err(|e| format!("serialize error: {e}"))?; + Ok(json_val) +} + +pub(super) fn deserialize_params( + params: Map, +) -> Result { + serde_json::from_value(Value::Object(params)).map_err(|e| format!("invalid params: {e}")) +} + +pub(super) fn required_string(name: &'static str, comment: &'static str) -> FieldSchema { + FieldSchema { + name, + ty: TypeSchema::String, + comment, + required: true, + } +} + +pub(super) fn optional_string(name: &'static str, comment: &'static str) -> FieldSchema { + FieldSchema { + name, + ty: TypeSchema::Option(Box::new(TypeSchema::String)), + comment, + required: false, + } +} + +pub(super) fn optional_bool(name: &'static str, comment: &'static str) -> FieldSchema { + FieldSchema { + name, + ty: TypeSchema::Option(Box::new(TypeSchema::Bool)), + comment, + required: false, + } +} + +pub(super) fn json_output(name: &'static str, comment: &'static str) -> FieldSchema { + FieldSchema { + name, + ty: TypeSchema::Json, + comment, + required: true, + } +} + +pub(super) fn validate_stt_provider(provider: &str) -> Result<(), String> { + match provider { + "cloud" | "openhuman" | "whisper" => Ok(()), + other => { + // Accept slug:model grammar or bare slug — the factory will + // validate against the voice_providers registry at dispatch time. + if other.contains(':') || !other.is_empty() { + Ok(()) + } else { + Err(format!( + "invalid stt_provider '{other}' (valid: 'cloud', 'whisper', or ':')" + )) + } + } + } +} + +pub(super) fn validate_tts_provider(provider: &str) -> Result<(), String> { + match provider { + "cloud" | "openhuman" | "piper" => Ok(()), + other => { + if other.contains(':') || !other.is_empty() { + Ok(()) + } else { + Err(format!( + "invalid tts_provider '{other}' (valid: 'cloud', 'piper', or ':')" + )) + } + } + } +} + +pub(super) fn effective_stt_provider(config: &crate::openhuman::config::Config) -> String { + crate::openhuman::voice::effective_stt_provider(config) +} + +pub(super) fn effective_tts_provider(config: &crate::openhuman::config::Config) -> String { + crate::openhuman::voice::effective_tts_provider(config) +} + +/// Validate a TTS provider's API key by hitting a lightweight read-only endpoint +/// rather than synthesizing audio (which requires a valid voice ID). +pub(super) async fn validate_tts_provider_key( + provider: &str, + config: &crate::openhuman::config::Config, +) -> Result { + let (slug, _model) = if let Some(pos) = provider.find(':') { + (&provider[..pos], &provider[pos + 1..]) + } else { + (provider, "") + }; + + let entry = config + .voice_providers + .iter() + .find(|p| p.slug == slug) + .ok_or_else(|| format!("no voice provider with slug '{slug}'"))?; + + let api_key = crate::openhuman::inference::provider::factory::lookup_key_for_slug(slug, config) + .unwrap_or_default(); + + if api_key.is_empty() { + return Err("no API key configured for this provider".to_string()); + } + + let endpoint = entry.endpoint.trim_end_matches('/'); + let client = reqwest::Client::new(); + + // ElevenLabs: GET /user/subscription requires only basic auth (no + // extra scopes like voices_read). OpenAI / generic: GET /models. + let url = if slug == "elevenlabs" { + format!("{endpoint}/user/subscription") + } else { + format!("{endpoint}/models") + }; + + let mut req = client.get(&url); + if slug == "elevenlabs" { + req = req.header("xi-api-key", &api_key); + } else { + req = req.header("Authorization", format!("Bearer {api_key}")); + } + + let resp = req + .send() + .await + .map_err(|e| format!("request failed: {e}"))?; + + if resp.status().is_success() { + Ok("TTS provider key is valid".to_string()) + } else { + let status = resp.status(); + let body = resp.text().await.unwrap_or_default(); + Err(format!("API returned {status}: {body}")) + } +} + +/// Generate a minimal WAV file with ~0.1s of silence (8kHz mono 16-bit PCM). +pub(super) fn generate_silent_wav() -> Vec { + let sample_rate: u32 = 8000; + let num_samples: u32 = 800; // 0.1s + let bits_per_sample: u16 = 16; + let num_channels: u16 = 1; + let byte_rate = sample_rate * u32::from(num_channels) * u32::from(bits_per_sample) / 8; + let block_align = num_channels * bits_per_sample / 8; + let data_size = num_samples * u32::from(block_align); + let file_size = 36 + data_size; + + let mut wav = Vec::with_capacity(44 + data_size as usize); + wav.extend_from_slice(b"RIFF"); + wav.extend_from_slice(&file_size.to_le_bytes()); + wav.extend_from_slice(b"WAVE"); + wav.extend_from_slice(b"fmt "); + wav.extend_from_slice(&16u32.to_le_bytes()); // subchunk1 size + wav.extend_from_slice(&1u16.to_le_bytes()); // PCM + wav.extend_from_slice(&num_channels.to_le_bytes()); + wav.extend_from_slice(&sample_rate.to_le_bytes()); + wav.extend_from_slice(&byte_rate.to_le_bytes()); + wav.extend_from_slice(&block_align.to_le_bytes()); + wav.extend_from_slice(&bits_per_sample.to_le_bytes()); + wav.extend_from_slice(b"data"); + wav.extend_from_slice(&data_size.to_le_bytes()); + wav.extend(std::iter::repeat(0u8).take(data_size as usize)); + wav +} diff --git a/src/openhuman/voice/schemas/mod.rs b/src/openhuman/voice/schemas/mod.rs new file mode 100644 index 0000000000..e4900a2bdc --- /dev/null +++ b/src/openhuman/voice/schemas/mod.rs @@ -0,0 +1,45 @@ +//! Controller schemas and RPC handler dispatch for the voice domain. + +mod handlers; +mod helpers; +mod params; +mod registry; + +// Re-export the public API that callers outside this module use. +pub use registry::{all_voice_controller_schemas, all_voice_registered_controllers, voice_schemas}; + +// --------------------------------------------------------------------------- +// Internal re-exports used by the test companion file. +// --------------------------------------------------------------------------- + +#[cfg(test)] +use handlers::{ + handle_overlay_stt_notify, handle_voice_server_start, handle_voice_server_status, + handle_voice_server_stop, +}; +#[cfg(test)] +use helpers::{ + deserialize_params, generate_silent_wav, to_json, validate_stt_provider, validate_tts_provider, +}; +#[cfg(test)] +use params::{ + OverlaySttNotifyParams, OverlaySttState, ReplySynthesizeParams, SetProvidersParams, + SttDispatchParams, TranscribeBytesParams, TranscribeParams, TtsDispatchParams, TtsParams, + VoiceListModelsParams, VoiceProviderCredUpdate, VoiceTestProviderParams, + VoiceUpdateProviderSettingsParams, +}; + +#[cfg(test)] +use crate::rpc::RpcOutcome; +#[cfg(test)] +use serde_json::Map; +#[cfg(test)] +use serde_json::Value; + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[cfg(test)] +#[path = "../schemas_tests.rs"] +mod tests; diff --git a/src/openhuman/voice/schemas/params.rs b/src/openhuman/voice/schemas/params.rs new file mode 100644 index 0000000000..9b917bc508 --- /dev/null +++ b/src/openhuman/voice/schemas/params.rs @@ -0,0 +1,177 @@ +//! Param structs for voice controller RPC handlers. + +use serde::Deserialize; + +#[derive(Debug, Deserialize)] +pub(super) struct TranscribeParams { + pub(super) audio_path: String, + /// Optional conversation context for LLM post-processing. + #[serde(default)] + pub(super) context: Option, + /// Skip LLM cleanup and return raw whisper output. + #[serde(default)] + pub(super) skip_cleanup: bool, +} + +#[derive(Debug, Deserialize)] +pub(super) struct TranscribeBytesParams { + pub(super) audio_bytes: Vec, + #[serde(default)] + pub(super) extension: Option, + /// Optional conversation context for LLM post-processing. + #[serde(default)] + pub(super) context: Option, + /// Skip LLM cleanup and return raw whisper output. + #[serde(default)] + pub(super) skip_cleanup: bool, +} + +#[derive(Debug, Deserialize)] +pub(super) struct TtsParams { + pub(super) text: String, + #[serde(default)] + pub(super) output_path: Option, +} + +#[derive(Debug, Deserialize)] +pub(super) struct CloudTranscribeParams { + pub(super) audio_base64: String, + #[serde(default)] + pub(super) mime_type: Option, + #[serde(default)] + pub(super) file_name: Option, + #[serde(default)] + pub(super) model: Option, + #[serde(default)] + pub(super) language: Option, +} + +/// Factory-dispatched STT request. The caller can either pin a provider +/// explicitly (`"cloud"` / `"whisper"`) or let the controller resolve the +/// effective provider from `config.local_ai.stt_provider`. Keeps the +/// existing `voice_cloud_transcribe` RPC intact for back-compat — older +/// renderers still pin the cloud path directly. +#[derive(Debug, Deserialize)] +pub(super) struct SttDispatchParams { + pub(super) audio_base64: String, + /// Provider override; falls back to `config.local_ai.stt_provider`. + #[serde(default)] + pub(super) provider: Option, + /// Model override (cloud branch ignores it). + #[serde(default)] + pub(super) model: Option, + #[serde(default)] + pub(super) mime_type: Option, + #[serde(default)] + pub(super) file_name: Option, + #[serde(default)] + pub(super) language: Option, +} + +/// Factory-dispatched TTS request. Same provider-resolution rule as +/// [`SttDispatchParams`]. +#[derive(Debug, Deserialize)] +pub(super) struct TtsDispatchParams { + pub(super) text: String, + #[serde(default)] + pub(super) provider: Option, + #[serde(default)] + pub(super) voice: Option, +} + +/// Settings-panel update for the STT/TTS provider selectors. Both are +/// optional; omitted fields are left at their current value. +#[derive(Debug, Deserialize)] +pub(super) struct SetProvidersParams { + #[serde(default)] + pub(super) stt_provider: Option, + #[serde(default)] + pub(super) tts_provider: Option, + #[serde(default)] + pub(super) stt_model: Option, + #[serde(default)] + pub(super) tts_voice: Option, +} + +#[derive(Debug, Deserialize)] +pub(super) struct ReplySynthesizeParams { + pub(super) text: String, + #[serde(default)] + pub(super) voice_id: Option, + #[serde(default)] + pub(super) model_id: Option, + #[serde(default)] + pub(super) output_format: Option, +} + +/// Voice provider registry update. Mirrors `InferenceUpdateModelSettingsParams`. +#[derive(Debug, Deserialize)] +pub(super) struct VoiceUpdateProviderSettingsParams { + #[serde(default)] + pub(super) voice_providers: Option>, + #[serde(default)] + pub(super) stt_provider: Option, + #[serde(default)] + pub(super) tts_provider: Option, +} + +/// Wire format for a single voice provider entry in the update call. +#[derive(Debug, Deserialize)] +pub(super) struct VoiceProviderCredUpdate { + #[serde(default)] + pub(super) id: Option, + pub(super) slug: String, + #[serde(default)] + pub(super) label: Option, + #[serde(default)] + pub(super) endpoint: Option, + #[serde(default)] + pub(super) auth_style: Option, + #[serde(default)] + pub(super) capability: Option, + #[serde(default)] + pub(super) stt_api_style: Option, + #[serde(default)] + pub(super) tts_api_style: Option, + #[serde(default)] + pub(super) default_stt_model: Option, + #[serde(default)] + pub(super) default_tts_voice: Option, +} + +/// List models/voices for a voice provider. +#[derive(Debug, Deserialize)] +pub(super) struct VoiceListModelsParams { + pub(super) provider_id: String, + #[serde(default)] + pub(super) capability: Option, +} + +/// Test a voice provider endpoint. +#[derive(Debug, Deserialize)] +pub(super) struct VoiceTestProviderParams { + pub(super) workload: String, + pub(super) provider: String, + /// When true, only validate the API key (lightweight GET) without + /// synthesizing or transcribing. Used by the provider-enable modal. + #[serde(default)] + pub(super) validate_only: bool, +} + +#[derive(Debug, Deserialize)] +#[serde(rename_all = "snake_case")] +pub(super) enum OverlaySttState { + RecordingStarted, + TranscriptionDone, + Cancelled, + Error, +} + +#[derive(Debug, Deserialize)] +pub(super) struct OverlaySttNotifyParams { + /// Voice state transition. + pub(super) state: OverlaySttState, + /// Transcribed text (required when state is "transcription_done"). + #[serde(default)] + pub(super) text: Option, +} diff --git a/src/openhuman/voice/schemas/registry.rs b/src/openhuman/voice/schemas/registry.rs new file mode 100644 index 0000000000..2cf941951e --- /dev/null +++ b/src/openhuman/voice/schemas/registry.rs @@ -0,0 +1,404 @@ +//! Schema definitions and controller registry for the voice domain. + +use crate::core::all::RegisteredController; +use crate::core::{ControllerSchema, FieldSchema, TypeSchema}; + +use super::handlers::{ + handle_overlay_stt_notify, handle_voice_cloud_transcribe, handle_voice_list_models, + handle_voice_reply_synthesize, handle_voice_server_start, handle_voice_server_status, + handle_voice_server_stop, handle_voice_set_providers, handle_voice_status, + handle_voice_stt_dispatch, handle_voice_test_provider, handle_voice_transcribe, + handle_voice_transcribe_bytes, handle_voice_tts, handle_voice_tts_dispatch, + handle_voice_update_provider_settings, +}; +use super::helpers::{json_output, optional_bool, optional_string, required_string}; + +pub fn all_voice_controller_schemas() -> Vec { + vec![ + voice_schemas("voice_status"), + voice_schemas("voice_transcribe"), + voice_schemas("voice_transcribe_bytes"), + voice_schemas("voice_tts"), + voice_schemas("voice_reply_synthesize"), + voice_schemas("voice_cloud_transcribe"), + voice_schemas("voice_stt_dispatch"), + voice_schemas("voice_tts_dispatch"), + voice_schemas("voice_set_providers"), + voice_schemas("voice_update_provider_settings"), + voice_schemas("voice_list_models"), + voice_schemas("voice_test_provider"), + voice_schemas("voice_server_start"), + voice_schemas("voice_server_stop"), + voice_schemas("voice_server_status"), + voice_schemas("overlay_stt_notify"), + ] +} + +pub fn all_voice_registered_controllers() -> Vec { + vec![ + RegisteredController { + schema: voice_schemas("voice_status"), + handler: handle_voice_status, + }, + RegisteredController { + schema: voice_schemas("voice_transcribe"), + handler: handle_voice_transcribe, + }, + RegisteredController { + schema: voice_schemas("voice_transcribe_bytes"), + handler: handle_voice_transcribe_bytes, + }, + RegisteredController { + schema: voice_schemas("voice_tts"), + handler: handle_voice_tts, + }, + RegisteredController { + schema: voice_schemas("voice_reply_synthesize"), + handler: handle_voice_reply_synthesize, + }, + RegisteredController { + schema: voice_schemas("voice_cloud_transcribe"), + handler: handle_voice_cloud_transcribe, + }, + RegisteredController { + schema: voice_schemas("voice_stt_dispatch"), + handler: handle_voice_stt_dispatch, + }, + RegisteredController { + schema: voice_schemas("voice_tts_dispatch"), + handler: handle_voice_tts_dispatch, + }, + RegisteredController { + schema: voice_schemas("voice_set_providers"), + handler: handle_voice_set_providers, + }, + RegisteredController { + schema: voice_schemas("voice_update_provider_settings"), + handler: handle_voice_update_provider_settings, + }, + RegisteredController { + schema: voice_schemas("voice_list_models"), + handler: handle_voice_list_models, + }, + RegisteredController { + schema: voice_schemas("voice_test_provider"), + handler: handle_voice_test_provider, + }, + RegisteredController { + schema: voice_schemas("voice_server_start"), + handler: handle_voice_server_start, + }, + RegisteredController { + schema: voice_schemas("voice_server_stop"), + handler: handle_voice_server_stop, + }, + RegisteredController { + schema: voice_schemas("voice_server_status"), + handler: handle_voice_server_status, + }, + RegisteredController { + schema: voice_schemas("overlay_stt_notify"), + handler: handle_overlay_stt_notify, + }, + ] +} + +pub fn voice_schemas(function: &str) -> ControllerSchema { + match function { + "voice_status" => ControllerSchema { + namespace: "voice", + function: "status", + description: "Check availability of STT/TTS binaries and models.", + inputs: vec![], + outputs: vec![json_output("status", "Voice availability status.")], + }, + "voice_transcribe" => ControllerSchema { + namespace: "voice", + function: "transcribe", + description: + "Transcribe audio from a file path using whisper.cpp, with optional LLM cleanup.", + inputs: vec![ + required_string("audio_path", "Path to the audio file."), + optional_string("context", "Conversation context for LLM post-processing."), + optional_bool( + "skip_cleanup", + "Skip LLM cleanup, return raw whisper output.", + ), + ], + outputs: vec![json_output( + "speech", + "Transcription result with text and raw_text.", + )], + }, + "voice_transcribe_bytes" => ControllerSchema { + namespace: "voice", + function: "transcribe_bytes", + description: + "Transcribe audio from raw bytes using whisper.cpp, with optional LLM cleanup.", + inputs: vec![ + FieldSchema { + name: "audio_bytes", + ty: TypeSchema::Bytes, + comment: "Raw audio bytes.", + required: true, + }, + optional_string("extension", "Audio file extension (default: webm)."), + optional_string("context", "Conversation context for LLM post-processing."), + optional_bool( + "skip_cleanup", + "Skip LLM cleanup, return raw whisper output.", + ), + ], + outputs: vec![json_output( + "speech", + "Transcription result with text and raw_text.", + )], + }, + "voice_tts" => ControllerSchema { + namespace: "voice", + function: "tts", + description: "Synthesize speech from text using piper.", + inputs: vec![ + required_string("text", "Text to synthesize."), + optional_string("output_path", "Optional output file path."), + ], + outputs: vec![json_output("tts", "TTS result with output path.")], + }, + "voice_reply_synthesize" => ControllerSchema { + namespace: "voice", + function: "reply_synthesize", + description: + "Synthesize an agent reply via the hosted backend (ElevenLabs) and return \ + base64 audio plus an Oculus-15 viseme alignment for mascot lip-sync.", + inputs: vec![ + required_string("text", "Text to synthesize."), + optional_string( + "voice_id", + "Override voice id (defaults to backend selection).", + ), + optional_string("model_id", "Override model id."), + optional_string("output_format", "Override audio format (e.g. mp3_44100)."), + ], + outputs: vec![json_output( + "reply", + "ReplySpeechResult: { audio_base64, audio_mime, visemes, alignment? }.", + )], + }, + "voice_stt_dispatch" => ControllerSchema { + namespace: "voice", + function: "stt_dispatch", + description: + "Factory-dispatched speech-to-text. Routes to the cloud Whisper proxy or the \ + local whisper.cpp binary based on `provider` (or `config.local_ai.stt_provider` \ + when unspecified). Returns the same `{ text }` payload either way.", + inputs: vec![ + required_string( + "audio_base64", + "Base64-encoded audio bytes (e.g. webm/opus from MediaRecorder).", + ), + optional_string( + "provider", + "Override provider: 'cloud' or 'whisper'. Defaults to config.local_ai.stt_provider.", + ), + optional_string("model", "Whisper model id (whisper branch only)."), + optional_string("mime_type", "Audio MIME type (default: audio/webm)."), + optional_string("file_name", "Filename hint (default: audio.webm)."), + optional_string("language", "BCP-47 language hint, e.g. 'en'."), + ], + outputs: vec![json_output( + "result", + "SttResult: { text, provider }.", + )], + }, + "voice_tts_dispatch" => ControllerSchema { + namespace: "voice", + function: "tts_dispatch", + description: + "Factory-dispatched text-to-speech. Routes to the cloud ElevenLabs proxy \ + (returns rich viseme alignment) or local Piper (returns audio + a synthetic \ + viseme timeline) based on `provider` (or `config.local_ai.tts_provider`).", + inputs: vec![ + required_string("text", "Text to synthesize."), + optional_string( + "provider", + "Override provider: 'cloud' or 'piper'. Defaults to config.local_ai.tts_provider.", + ), + optional_string( + "voice", + "Voice id (provider-specific). Piper expects an id like 'en_US-lessac-medium'.", + ), + ], + outputs: vec![json_output( + "reply", + "ReplySpeechResult: { audio_base64, audio_mime, visemes, alignment? }.", + )], + }, + "voice_set_providers" => ControllerSchema { + namespace: "voice", + function: "set_providers", + description: + "Persist the STT / TTS provider selection (and optional model/voice id) into \ + `config.local_ai.{stt,tts}_provider` so subsequent voice_stt_dispatch / \ + voice_tts_dispatch calls resolve without an explicit provider param.", + inputs: vec![ + optional_string( + "stt_provider", + "STT provider id ('cloud' or 'whisper'). Omitted = unchanged.", + ), + optional_string( + "tts_provider", + "TTS provider id ('cloud' or 'piper'). Omitted = unchanged.", + ), + optional_string("stt_model", "Whisper model id (e.g. 'whisper-large-v3-turbo')."), + optional_string("tts_voice", "Piper voice id (e.g. 'en_US-lessac-medium')."), + ], + outputs: vec![json_output( + "providers", + "Updated provider selectors: { stt_provider, tts_provider, stt_model_id, tts_voice_id }.", + )], + }, + "voice_update_provider_settings" => ControllerSchema { + namespace: "voice", + function: "update_provider_settings", + description: + "Persist the voice provider registry and STT/TTS routing strings. \ + Mirrors openhuman.inference_update_model_settings for the voice domain.", + inputs: vec![ + FieldSchema { + name: "voice_providers", + ty: TypeSchema::Option(Box::new(TypeSchema::Json)), + comment: "Array of voice provider entries (VoiceProviderCreds shape).", + required: false, + }, + optional_string( + "stt_provider", + "STT routing string ('cloud', 'whisper', or ':').", + ), + optional_string( + "tts_provider", + "TTS routing string ('cloud', 'piper', or ':').", + ), + ], + outputs: vec![json_output( + "settings", + "Updated voice_providers + routing strings snapshot.", + )], + }, + "voice_list_models" => ControllerSchema { + namespace: "voice", + function: "list_models", + description: + "List available models or voices for a voice provider. Returns static \ + presets for built-in slugs; probes /models for custom providers.", + inputs: vec![ + required_string("provider_id", "Provider id or slug."), + optional_string( + "capability", + "Filter by capability: 'stt' or 'tts'. Defaults to both.", + ), + ], + outputs: vec![json_output( + "models", + "{ models: [{ id, label? }] }", + )], + }, + "voice_test_provider" => ControllerSchema { + namespace: "voice", + function: "test_provider", + description: + "Test a voice provider endpoint without saving. STT transcribes a \ + silent audio clip; TTS synthesizes 'Hello' and discards.", + inputs: vec![ + required_string("workload", "Workload to test: 'stt' or 'tts'."), + required_string( + "provider", + "Provider string to test (e.g. 'deepgram:nova-2').", + ), + optional_bool( + "validate_only", + "When true, only validate the API key without synthesizing/transcribing.", + ), + ], + outputs: vec![json_output( + "result", + "{ ok: bool, detail: string, latency_ms?: number }", + )], + }, + "voice_cloud_transcribe" => ControllerSchema { + namespace: "voice", + function: "cloud_transcribe", + description: + "Transcribe audio bytes via the hosted backend's STT endpoint. Used by the \ + mascot's mic-only composer so we don't ship a provider API key in the desktop app.", + inputs: vec![ + required_string( + "audio_base64", + "Base64-encoded audio bytes (e.g. webm/opus from MediaRecorder).", + ), + optional_string("mime_type", "Audio MIME type (default: audio/webm)."), + optional_string("file_name", "Original filename hint (default: audio.webm)."), + optional_string("model", "Backend STT model id (default: whisper-v1)."), + optional_string("language", "BCP-47 language hint, e.g. 'en'."), + ], + outputs: vec![json_output("result", "CloudTranscribeResult: { text }.")], + }, + "voice_server_start" => ControllerSchema { + namespace: "voice", + function: "server_start", + description: + "Start the voice dictation server (hotkey → record → transcribe → insert text).", + inputs: vec![ + optional_string("hotkey", "Hotkey combination (default: Fn)."), + optional_string( + "activation_mode", + "Activation mode: tap or push (default: push).", + ), + optional_bool("skip_cleanup", "Skip LLM post-processing."), + ], + outputs: vec![json_output("status", "Voice server status after start.")], + }, + "voice_server_stop" => ControllerSchema { + namespace: "voice", + function: "server_stop", + description: "Stop the voice dictation server.", + inputs: vec![], + outputs: vec![json_output("status", "Voice server status after stop.")], + }, + "voice_server_status" => ControllerSchema { + namespace: "voice", + function: "server_status", + description: "Get the current voice dictation server status.", + inputs: vec![], + outputs: vec![json_output("status", "Current voice server status.")], + }, + "overlay_stt_notify" => ControllerSchema { + namespace: "voice", + function: "overlay_stt_notify", + description: + "Notify the overlay of a voice/STT state change from the chat prompt button.", + inputs: vec![ + required_string( + "state", + "State transition: recording_started, transcription_done, cancelled, error.", + ), + optional_string( + "text", + "Transcribed text (when state is transcription_done).", + ), + ], + outputs: vec![json_output("result", "Notification acknowledgement.")], + }, + _ => ControllerSchema { + namespace: "voice", + function: "unknown", + description: "Unknown voice controller.", + inputs: vec![], + outputs: vec![FieldSchema { + name: "error", + ty: TypeSchema::String, + comment: "Lookup error details.", + required: true, + }], + }, + } +} diff --git a/src/openhuman/workflows/schemas.rs b/src/openhuman/workflows/schemas.rs deleted file mode 100644 index 3796887be0..0000000000 --- a/src/openhuman/workflows/schemas.rs +++ /dev/null @@ -1,1429 +0,0 @@ -//! JSON-RPC / CLI controller surface for the skills domain. -//! -//! Exposes: -//! * `skills.list` — enumerate SKILL.md / legacy skills discovered in the -//! current user home and workspace. -//! * `skills.read_resource` — read a single bundled resource file, with path -//! traversal, symlink, size and UTF-8 guards. -//! * `skills.create` — scaffold a new SKILL.md skill under the user or -//! workspace scope. -//! * `skills.install_from_url` — install a remote skill by fetching its -//! `SKILL.md` over HTTPS (size-capped, timeout-clamped) and writing it into -//! the user-scope skills directory. Rejects non-https, private-IP, and -//! non-SKILL.md URLs; normalises `github.com/.../blob/...` → raw. -//! -//! All controllers resolve the active workspace via the persisted config -//! layer (`config::load_config_with_timeout`) so the CLI and UI see the same -//! skills catalog without the caller having to thread a workspace path. - -use std::path::{Path, PathBuf}; - -use serde::de::DeserializeOwned; -use serde::{Deserialize, Serialize}; -use serde_json::{Map, Value}; - -use crate::core::all::{ControllerFuture, RegisteredController}; -use crate::core::{ControllerSchema, FieldSchema, TypeSchema}; -use crate::openhuman::config::Config; -use crate::openhuman::workflows::ops::{ - create_workflow, discover_workflows, install_workflow_from_url, is_workspace_trusted, - read_workflow_resource, uninstall_workflow, CreateWorkflowParams, InstallWorkflowFromUrlParams, - UninstallWorkflowParams, Workflow, WorkflowCreateInputDef, WorkflowScope, -}; -use crate::rpc::RpcOutcome; - -use crate::openhuman::agent::harness::session::Agent; -use crate::openhuman::agent::harness::subagent_runner::with_autonomous_iter_cap; -use crate::openhuman::workflows::{preflight, registry, run_log}; - -/// Iteration cap for an autonomous skill run (orchestrator + sub-agents). High -/// enough to "run until done", while the repeated-failure circuit breaker still -/// stops dead-end grinding — deliberately bounded (not infinite) to cap spend. -const WORKFLOW_RUN_MAX_ITERATIONS: usize = 200; - -#[derive(Debug, Deserialize, Default)] -struct WorkflowsListParams { - // No params today. Kept as an empty struct so future filters (scope, - // search, etc.) can slot in without breaking older clients. -} - -#[derive(Debug, Deserialize)] -struct WorkflowsReadResourceParams { - workflow_id: String, - relative_path: String, -} - -#[derive(Debug, Deserialize)] -struct WorkflowsCreateParams { - name: String, - description: String, - /// Optional trigger/goal — *when* an agent should reach for this workflow. - /// Merges the old agent-workflow's `when_to_use` into the unified create - /// form; written to `skill.toml`. Falls back to `description` when omitted. - #[serde(default)] - when_to_use: Option, - #[serde(default)] - scope: WorkflowScope, - #[serde(default)] - license: Option, - #[serde(default)] - author: Option, - #[serde(default)] - tags: Vec, - #[serde(default, rename = "allowed-tools", alias = "allowed_tools")] - allowed_tools: Vec, - /// Declared `[[inputs]]` entries supplied by the Create-a-Workflow form. - /// Empty when the user added no rows; otherwise written into a sibling - /// `skill.toml` alongside `SKILL.md` so the Skills Runner can render - /// dynamic form controls at run time. Wire-shape per row: - /// `{ name, description?, required, type? }` — see - /// [`WorkflowCreateInputDef`] in `ops_create.rs`. - #[serde(default)] - inputs: Vec, -} - -impl From for CreateWorkflowParams { - fn from(p: WorkflowsCreateParams) -> Self { - CreateWorkflowParams { - name: p.name, - description: p.description, - when_to_use: p.when_to_use, - scope: p.scope, - license: p.license, - author: p.author, - tags: p.tags, - allowed_tools: p.allowed_tools, - inputs: p.inputs, - overwrite: false, - } - } -} - -/// Wire-format representation of a discovered skill. Mirrors the fields in -/// [`Workflow`] that are useful to the UI while hiding the -/// `frontmatter` blob (which includes a flatten'd forward-compat hatch and -/// can balloon with arbitrary YAML). -#[derive(Debug, Serialize)] -struct WorkflowSummary { - id: String, - name: String, - description: String, - version: String, - author: Option, - tags: Vec, - platforms: Vec, - related_skills: Vec, - source_format: String, - tools: Vec, - prompts: Vec, - location: Option, - resources: Vec, - scope: WorkflowScope, - legacy: bool, - warnings: Vec, -} - -impl From for WorkflowSummary { - fn from(s: Workflow) -> Self { - // `id` is the on-disk slug the uninstall RPC resolves against. - // Prefer `dir_name`, but fall back to `name` for back-compat on - // deserialised `Workflow` values written before `dir_name` existed - // (default empty string). - let id = if s.dir_name.is_empty() { - s.name.clone() - } else { - s.dir_name.clone() - }; - WorkflowSummary { - id, - name: s.name, - description: s.description, - version: s.version, - author: s.author, - tags: s.tags, - platforms: s.platforms, - related_skills: s.related_skills, - source_format: if s.source_format.is_empty() { - if s.legacy { - "legacy".to_string() - } else { - "openhuman".to_string() - } - } else { - s.source_format - }, - tools: s.tools, - prompts: s.prompts, - location: s.location.as_ref().map(|p| p.display().to_string()), - resources: s - .resources - .into_iter() - .map(|p| p.display().to_string()) - .collect(), - scope: s.scope, - legacy: s.legacy, - warnings: s.warnings, - } - } -} - -#[derive(Debug, Serialize)] -struct WorkflowsListResult { - skills: Vec, -} - -#[derive(Debug, Serialize)] -struct WorkflowsReadResourceResult { - workflow_id: String, - relative_path: String, - content: String, - bytes: usize, -} - -#[derive(Debug, Serialize)] -struct WorkflowsCreateResult { - skill: WorkflowSummary, -} - -#[derive(Debug, Deserialize)] -struct WorkflowsInstallFromUrlParamsWire { - url: String, - #[serde(default)] - timeout_secs: Option, -} - -impl From for InstallWorkflowFromUrlParams { - fn from(p: WorkflowsInstallFromUrlParamsWire) -> Self { - InstallWorkflowFromUrlParams { - url: p.url, - timeout_secs: p.timeout_secs, - } - } -} - -#[derive(Debug, Serialize)] -struct WorkflowsInstallFromUrlResult { - url: String, - stdout: String, - stderr: String, - new_skills: Vec, -} - -#[derive(Debug, Serialize)] -struct WorkflowsUninstallResult { - name: String, - removed_path: String, - scope: WorkflowScope, -} - -pub fn all_workflows_controller_schemas() -> Vec { - vec![ - workflows_schemas("workflows_list"), - workflows_schemas("workflows_describe"), - workflows_schemas("workflows_recent_runs"), - workflows_schemas("workflows_read_run_log"), - workflows_schemas("workflows_read_resource"), - workflows_schemas("workflows_create"), - workflows_schemas("workflows_update"), - workflows_schemas("workflows_install_from_url"), - workflows_schemas("workflows_uninstall"), - workflows_schemas("workflows_run"), - workflows_schemas("workflows_cancel"), - ] -} - -pub fn all_workflows_registered_controllers() -> Vec { - vec![ - RegisteredController { - schema: workflows_schemas("workflows_list"), - handler: handle_workflows_list, - }, - RegisteredController { - schema: workflows_schemas("workflows_describe"), - handler: handle_workflows_describe, - }, - RegisteredController { - schema: workflows_schemas("workflows_recent_runs"), - handler: handle_workflows_recent_runs, - }, - RegisteredController { - schema: workflows_schemas("workflows_read_run_log"), - handler: handle_workflows_read_run_log, - }, - RegisteredController { - schema: workflows_schemas("workflows_read_resource"), - handler: handle_workflows_read_resource, - }, - RegisteredController { - schema: workflows_schemas("workflows_create"), - handler: handle_workflows_create, - }, - RegisteredController { - schema: workflows_schemas("workflows_update"), - handler: handle_workflows_update, - }, - RegisteredController { - schema: workflows_schemas("workflows_install_from_url"), - handler: handle_workflows_install_from_url, - }, - RegisteredController { - schema: workflows_schemas("workflows_uninstall"), - handler: handle_workflows_uninstall, - }, - RegisteredController { - schema: workflows_schemas("workflows_run"), - handler: handle_workflows_run, - }, - RegisteredController { - schema: workflows_schemas("workflows_cancel"), - handler: handle_workflows_cancel, - }, - ] -} - -pub fn workflows_schemas(function: &str) -> ControllerSchema { - match function { - "workflows_list" => ControllerSchema { - namespace: "workflows", - function: "list", - description: "List SKILL.md and legacy skills discovered in the user home and workspace.", - inputs: vec![], - outputs: vec![FieldSchema { - name: "skills", - ty: TypeSchema::Array(Box::new(TypeSchema::Ref("WorkflowSummary"))), - comment: "Discovered skills (sorted by name, project-scope shadows user-scope).", - required: true, - }], - }, - "workflows_run" => ControllerSchema { - namespace: "workflows", - function: "run", - description: "Start a skill in the background: run the orchestrator agent focused by the skill's SKILL.md + the given inputs, streaming every step to a per-run log file. Validates required inputs and returns immediately with a run id and the log path.", - inputs: vec![ - FieldSchema { - name: "workflow_id", - ty: TypeSchema::String, - comment: "Id of the skill to run (matches WorkflowDefinition.id).", - required: true, - }, - FieldSchema { - name: "inputs", - ty: TypeSchema::Json, - comment: "Object of input values keyed by the skill's declared input names.", - required: false, - }, - ], - outputs: vec![ - FieldSchema { - name: "run_id", - ty: TypeSchema::String, - comment: "Id for this background run.", - required: true, - }, - FieldSchema { - name: "status", - ty: TypeSchema::String, - comment: "Always \"started\" — the orchestrator runs in the background.", - required: true, - }, - FieldSchema { - name: "workflow_id", - ty: TypeSchema::String, - comment: "Echo of the requested skill id.", - required: true, - }, - FieldSchema { - name: "log", - ty: TypeSchema::String, - comment: "Path to the per-run streaming log (/skills/.runs/_.log).", - required: true, - }, - ], - }, - "workflows_cancel" => ControllerSchema { - namespace: "workflows", - function: "cancel", - description: "Request cancellation of an in-flight workflow run by run_id. The run stops at its next await point and records a CANCELLED footer. Returns cancelled=false if the run id is unknown (already finished or never existed).", - inputs: vec![FieldSchema { - name: "run_id", - ty: TypeSchema::String, - comment: "Id of the running workflow run to cancel (from workflows_run).", - required: true, - }], - outputs: vec![ - FieldSchema { - name: "run_id", - ty: TypeSchema::String, - comment: "Echo of the requested run id.", - required: true, - }, - FieldSchema { - name: "cancelled", - ty: TypeSchema::Bool, - comment: "True if a live run was found and signalled; false if unknown.", - required: true, - }, - ], - }, - "workflows_read_resource" => ControllerSchema { - namespace: "workflows", - function: "read_resource", - description: "Read a single bundled SKILL resource file, hardened against traversal, symlink escape, and oversized payloads.", - inputs: vec![ - FieldSchema { - name: "workflow_id", - ty: TypeSchema::String, - comment: "Name of the skill (matches WorkflowSummary.id / Workflow.name).", - required: true, - }, - FieldSchema { - name: "relative_path", - ty: TypeSchema::String, - comment: "Path to the resource file, relative to the skill root (e.g. 'scripts/foo.sh').", - required: true, - }, - ], - outputs: vec![ - FieldSchema { - name: "workflow_id", - ty: TypeSchema::String, - comment: "Echo of the requested skill id.", - required: true, - }, - FieldSchema { - name: "relative_path", - ty: TypeSchema::String, - comment: "Echo of the requested relative path.", - required: true, - }, - FieldSchema { - name: "content", - ty: TypeSchema::String, - comment: "File contents (UTF-8, <= 128 KB).", - required: true, - }, - FieldSchema { - name: "bytes", - ty: TypeSchema::U64, - comment: "Size of the file on disk, in bytes.", - required: true, - }, - ], - }, - "workflows_create" => ControllerSchema { - namespace: "workflows", - function: "create", - description: "Scaffold a new SKILL.md skill under the user or workspace scope.", - inputs: vec![ - FieldSchema { - name: "name", - ty: TypeSchema::String, - comment: "Human-readable name (slugified into the on-disk directory).", - required: true, - }, - FieldSchema { - name: "description", - ty: TypeSchema::String, - comment: "One-line description written into SKILL.md frontmatter.", - required: true, - }, - FieldSchema { - name: "when_to_use", - ty: TypeSchema::String, - comment: "Optional 'when to run me' trigger. Written to the sibling skill.toml; the registry surfaces it as the workflow's when_to_use (falls back to description).", - required: false, - }, - FieldSchema { - name: "scope", - ty: TypeSchema::String, - comment: "Target scope: 'user' (default) or 'project' (requires trust marker).", - required: false, - }, - FieldSchema { - name: "license", - ty: TypeSchema::String, - comment: "Optional SPDX license identifier.", - required: false, - }, - FieldSchema { - name: "author", - ty: TypeSchema::String, - comment: "Optional author name (written under frontmatter.metadata.author).", - required: false, - }, - FieldSchema { - name: "tags", - ty: TypeSchema::Array(Box::new(TypeSchema::String)), - comment: "Optional tags for the skill.", - required: false, - }, - FieldSchema { - name: "allowed_tools", - ty: TypeSchema::Array(Box::new(TypeSchema::String)), - comment: "Optional tool hints (maps to frontmatter.allowed-tools).", - required: false, - }, - FieldSchema { - name: "inputs", - ty: TypeSchema::Json, - comment: "Optional declared `[[inputs]]` entries (each `{ name, description, required, type }`). When non-empty, a sibling `skill.toml` is written alongside `SKILL.md` so the Skills Runner can render dynamic form controls at run time.", - required: false, - }, - ], - outputs: vec![FieldSchema { - name: "skill", - ty: TypeSchema::Ref("WorkflowSummary"), - comment: "The newly created skill, re-discovered through the standard pipeline.", - required: true, - }], - }, - // Same wire shape as create; overwrites the workflow at the resolved - // slug (frontmatter + workflow.toml) while preserving the body. - "workflows_update" => { - let mut s = workflows_schemas("workflows_create"); - s.function = "update"; - s.description = - "Edit an existing workflow: overwrite frontmatter + workflow.toml at the resolved slug, preserving the hand-authored body."; - s - } - "workflows_install_from_url" => ControllerSchema { - namespace: "workflows", - function: "install_from_url", - description: "Install a remote skill by fetching its SKILL.md over HTTPS and writing it into the user-scope skills directory. URL must be https, resolve to a public host, and point at a single `.md` file (`github.com/.../blob/...` auto-rewrites to raw). Default 60s timeout, max 600s.", - inputs: vec![ - FieldSchema { - name: "url", - ty: TypeSchema::String, - comment: "Remote skill package URL (https only; loopback / private / link-local hosts rejected).", - required: true, - }, - FieldSchema { - name: "timeout_secs", - ty: TypeSchema::U64, - comment: "Optional wall-clock override in seconds. Default 60, capped at 600.", - required: false, - }, - ], - outputs: vec![ - FieldSchema { - name: "url", - ty: TypeSchema::String, - comment: "Echo of the installed URL.", - required: true, - }, - FieldSchema { - name: "stdout", - ty: TypeSchema::String, - comment: "Human-readable diagnostic summary (bytes fetched, target path).", - required: true, - }, - FieldSchema { - name: "stderr", - ty: TypeSchema::String, - comment: "Non-fatal frontmatter parse warnings, joined by newlines.", - required: true, - }, - FieldSchema { - name: "new_skills", - ty: TypeSchema::Array(Box::new(TypeSchema::String)), - comment: "Slugs of skills that appeared in the catalog as a result of the install.", - required: true, - }, - ], - }, - "workflows_read_run_log" => ControllerSchema { - namespace: "workflows", - function: "read_run_log", - description: "Read a slice of a skill run's streaming log file by run_id. The FE Skills Runner panel opens this on click of a Recent Runs row and re-calls it every 2s while the run's `status` is RUNNING to tail new bytes (use the returned `offset` as the next call's `offset`). The run id resolves to a path internally — callers don't supply a path, so no traversal surface. `max_bytes` is clamped to 262144 (256 KiB) per call; pages by re-issuing with the returned `offset`.", - inputs: vec![ - FieldSchema { - name: "run_id", - ty: TypeSchema::String, - comment: "Run id from `skills_recent_runs.runs[].run_id` (matched by 8-char prefix against the log filename).", - required: true, - }, - FieldSchema { - name: "offset", - ty: TypeSchema::U64, - comment: "Byte offset to start reading from. Default 0 (read from start); the FE passes the previous response's `offset` for tail-mode polling.", - required: false, - }, - FieldSchema { - name: "max_bytes", - ty: TypeSchema::U64, - comment: "Max bytes to return in this slice. Default 65536 (64 KiB), capped at 262144 (256 KiB).", - required: false, - }, - ], - outputs: vec![ - FieldSchema { - name: "offset", - ty: TypeSchema::U64, - comment: "New read cursor — pass this as the next call's `offset` to tail forward.", - required: true, - }, - FieldSchema { - name: "bytes_read", - ty: TypeSchema::U64, - comment: "Number of bytes returned in this slice.", - required: true, - }, - FieldSchema { - name: "content", - ty: TypeSchema::String, - comment: "The slice contents (UTF-8, lossy-decoded so a partial multibyte tail doesn't error).", - required: true, - }, - FieldSchema { - name: "eof", - ty: TypeSchema::Bool, - comment: "True if the read reached end-of-file. May still be FALSE-complete (run still streaming).", - required: true, - }, - FieldSchema { - name: "complete", - ty: TypeSchema::Bool, - comment: "True once the run footer (`--- result ---`) has landed in the file. The FE stops polling when this flips true.", - required: true, - }, - ], - }, - "workflows_recent_runs" => ControllerSchema { - namespace: "workflows", - function: "recent_runs", - description: "List recent autonomous skill runs by scanning `/skills/.runs/`. Returns one entry per log file (header: workflow_id, run_id, started; footer: status, duration_ms, finished) sorted by `started` descending. `status` is `RUNNING` while the footer hasn't landed yet, then `DONE` / `DEGENERATE` / `FAILED`. Optionally filter by `workflow_id` to scope to one skill; `limit` (default 20, max 100) caps the result. Cheap: reads the files top-to-bottom and short-circuits — no schema parsing of the streaming body.", - inputs: vec![ - FieldSchema { - name: "workflow_id", - ty: TypeSchema::String, - comment: "Optional: restrict results to runs of one skill (e.g. \"github-issue-crusher\"). Omit to return runs across every skill.", - required: false, - }, - FieldSchema { - name: "limit", - ty: TypeSchema::U64, - comment: "Cap on the number of entries returned. Default 20, clamped to 100.", - required: false, - }, - ], - outputs: vec![FieldSchema { - name: "runs", - ty: TypeSchema::Json, - comment: "Array of `{ run_id, workflow_id, started, status, duration_ms, finished, log_path }` — see crate::openhuman::workflows::run_log::ScannedRun.", - required: true, - }], - }, - "workflows_describe" => ControllerSchema { - namespace: "workflows", - function: "describe", - description: "Describe a single skill by id — returns its display name, summary, and the declared `[[inputs]]` block. Used by the Settings → Skills Runner panel to render dynamic input controls and let the user fill in the right fields before clicking Run Now or scheduling a cron. `skills_list` does NOT carry `inputs` (it stays the lightweight enumeration); call this once per skill the user picks.", - inputs: vec![FieldSchema { - name: "workflow_id", - ty: TypeSchema::String, - comment: "Workflow id from `skills_list` (e.g. \"github-issue-crusher\", \"pr-review-shepherd\", \"dev-workflow\").", - required: true, - }], - outputs: vec![ - FieldSchema { - name: "id", - ty: TypeSchema::String, - comment: "Echo of the resolved skill id.", - required: true, - }, - FieldSchema { - name: "display_name", - ty: TypeSchema::String, - comment: "Human-friendly display name (falls back to the id when unset).", - required: true, - }, - FieldSchema { - name: "when_to_use", - ty: TypeSchema::String, - comment: "Short one-line summary from skill.toml `when_to_use` — what the skill does and when to pick it.", - required: true, - }, - // Wire shape: array of objects. `handle_workflows_describe` - // serialises this as a real array of `WorkflowInputDescription` - // objects — `{name, description, required, type}` per entry — - // so the controller-catalog type is `Json`, matching the - // payload rather than coercing it to a scalar string. - FieldSchema { - name: "inputs", - ty: TypeSchema::Json, - comment: "Array of `[[inputs]]` entries; each entry: `{ name, description, required, type }`. Renderable as a dynamic form.", - required: true, - }, - ], - }, - "workflows_uninstall" => ControllerSchema { - namespace: "workflows", - function: "uninstall", - description: "Remove an installed user-scope SKILL.md skill from `~/.openhuman/skills//`. Only user-scope installs are supported; project-scope and legacy skills are read-only. Rejects path separators and traversal; canonicalises before delete.", - inputs: vec![FieldSchema { - name: "name", - ty: TypeSchema::String, - comment: "Exact on-disk slug of the installed skill — matches WorkflowSummary.id (the directory under ~/.openhuman/skills/), which may differ from the frontmatter display name in Workflow.name.", - required: true, - }], - outputs: vec![ - FieldSchema { - name: "name", - ty: TypeSchema::String, - comment: "Echo of the removed skill slug.", - required: true, - }, - FieldSchema { - name: "removed_path", - ty: TypeSchema::String, - comment: "Canonical on-disk path that was deleted.", - required: true, - }, - FieldSchema { - name: "scope", - ty: TypeSchema::String, - comment: "Scope the uninstall applied to. Always `user` today.", - required: true, - }, - ], - }, - _ => ControllerSchema { - namespace: "workflows", - function: "unknown", - description: "Unknown skills controller.", - inputs: vec![], - outputs: vec![FieldSchema { - name: "error", - ty: TypeSchema::String, - comment: "Lookup error details.", - required: true, - }], - }, - } -} - -fn handle_workflows_list(params: Map) -> ControllerFuture { - Box::pin(async move { - let _ = deserialize_params::(params)?; - tracing::debug!("[skills][rpc] list skills"); - let workspace = resolve_workspace_dir().await; - let trusted = is_workspace_trusted(&workspace); - let home = dirs::home_dir(); - let skills = discover_workflows(home.as_deref(), Some(workspace.as_path()), trusted); - tracing::debug!( - count = skills.len(), - workspace = %workspace.display(), - trusted, - "[skills][rpc] list result" - ); - let summaries = skills.into_iter().map(WorkflowSummary::from).collect(); - to_json(RpcOutcome::new( - WorkflowsListResult { skills: summaries }, - Vec::new(), - )) - }) -} - -#[derive(serde::Deserialize)] -struct WorkflowsDescribeParams { - workflow_id: String, -} - -/// One input declaration as serialised over the wire to the FE form -/// renderer. Mirrors `registry::WorkflowInput` but with a fully-explicit -/// `type` field (the FE renders different controls per kind) and stable -/// JSON keys regardless of frontmatter casing. -#[derive(serde::Serialize)] -struct WorkflowInputDescription { - name: String, - description: String, - required: bool, - #[serde(rename = "type")] - kind: String, -} - -#[derive(serde::Serialize)] -struct WorkflowsDescribeResult { - id: String, - display_name: String, - when_to_use: String, - inputs: Vec, -} - -/// `openhuman.workflows_describe` — return a single skill's display metadata -/// and its declared `[[inputs]]` so the Skills Runner panel can render -/// the right form controls. `skills_list` deliberately stays the cheap -/// enumeration without input declarations (its `Workflow` source struct -/// predates `[[inputs]]`); on the user picking one we fetch the full -/// `WorkflowDefinition` (which carries inputs) and project the small, -/// FE-shaped subset they need. -fn handle_workflows_describe(params: Map) -> ControllerFuture { - Box::pin(async move { - let payload = deserialize_params::(params)?; - let workspace = resolve_workspace_dir().await; - let skill = registry::get_workflow(&workspace, &payload.workflow_id).ok_or_else(|| { - format!( - "workflows_describe: unknown skill '{}'", - payload.workflow_id - ) - })?; - let inputs = skill - .inputs - .iter() - .map(|i| WorkflowInputDescription { - name: i.name.clone(), - description: i.description.clone(), - required: i.required, - kind: i.kind.clone().unwrap_or_else(|| "string".to_string()), - }) - .collect(); - let display_name = skill - .definition - .display_name - .clone() - .unwrap_or_else(|| skill.definition.id.clone()); - to_json(RpcOutcome::new( - WorkflowsDescribeResult { - id: skill.definition.id.clone(), - display_name, - when_to_use: skill.definition.when_to_use.clone(), - inputs, - }, - Vec::new(), - )) - }) -} - -#[derive(serde::Deserialize)] -struct WorkflowsReadRunLogParams { - run_id: String, - #[serde(default)] - offset: Option, - #[serde(default)] - max_bytes: Option, -} - -/// `openhuman.workflows_read_run_log` — return a slice of a skill run's -/// log file, identified by `run_id` (NOT a path — no traversal surface). -/// FE Skills Runner panel uses this to render the streaming log inline -/// when the user clicks a Recent Runs row, and tails it every 2s while -/// `complete` is false. -fn handle_workflows_read_run_log(params: Map) -> ControllerFuture { - Box::pin(async move { - let payload = deserialize_params::(params)?; - let workspace = resolve_workspace_dir().await; - let path = run_log::find_run_log_path(&workspace, &payload.run_id).ok_or_else(|| { - format!( - "workflows_read_run_log: unknown run_id '{}'", - payload.run_id - ) - })?; - let offset = payload.offset.unwrap_or(0); - // 64 KiB default per-call slice, hard cap at 256 KiB to keep the - // RPC response sane; the FE re-issues with the returned offset - // to page through larger logs. - let max_bytes = payload.max_bytes.unwrap_or(64 * 1024).min(256 * 1024) as usize; - match run_log::read_run_log_slice(&path, offset, max_bytes) { - Ok(slice) => to_json(RpcOutcome::new(slice, Vec::new())), - Err(e) => Err(format!("workflows_read_run_log: read failed: {e}")), - } - }) -} - -#[derive(serde::Deserialize)] -struct WorkflowsRecentRunsParams { - #[serde(default)] - workflow_id: Option, - #[serde(default)] - limit: Option, -} - -#[derive(serde::Serialize)] -struct WorkflowsRecentRunsResult { - runs: Vec, -} - -/// `openhuman.workflows_recent_runs` — list runs from `/skills/.runs/` -/// (most-recent first), optionally filtered to one skill, capped by `limit`. -/// Powers the Skills Runner panel's "Recent runs" section + future live-log -/// tail. Delegates the actual scan + parse to `run_log::scan_runs`. -fn handle_workflows_recent_runs(params: Map) -> ControllerFuture { - Box::pin(async move { - let payload = deserialize_params::(params)?; - let limit = payload.limit.unwrap_or(20).min(100) as usize; - let workspace = resolve_workspace_dir().await; - let runs = run_log::scan_runs(&workspace, payload.workflow_id.as_deref(), limit); - tracing::debug!( - count = runs.len(), - filter = ?payload.workflow_id, - limit, - "[skills][rpc] recent_runs" - ); - to_json(RpcOutcome::new( - WorkflowsRecentRunsResult { runs }, - Vec::new(), - )) - }) -} - -#[derive(serde::Deserialize)] -struct WorkflowsRunParams { - workflow_id: String, - #[serde(default)] - inputs: Option, -} - -/// Outcome of [`spawn_workflow_run_background`]: the new run's `run_id`, the -/// canonical `workflow_id` the registry resolved it to, and the path of the -/// streaming log file every step + the footer get written to. -pub(crate) struct WorkflowRunStarted { - pub run_id: String, - pub workflow_id: String, - pub log_path: std::path::PathBuf, -} - -/// Spawn a single autonomous workflow_run as a detached `tokio::spawn`. Used by -/// both the `openhuman.workflows_run` JSON-RPC controller and the `run_skill` -/// agent tool (which lets the orchestrator chain one skill into another — -/// e.g. `github-issue-crusher` → `pr-review-shepherd` once the draft PR is -/// open). -/// -/// Returns immediately with the run handle; the actual work runs in the -/// background until DONE / DEGENERATE / FAILED. Errors (unknown skill, -/// missing required inputs) surface as `Err(String)` *before* the spawn so -/// callers can reject malformed invocations synchronously. -pub(crate) async fn spawn_workflow_run_background( - skill_id_param: String, - inputs_param: Option, -) -> Result { - let workspace = resolve_workspace_dir().await; - let skill = registry::get_workflow(&workspace, &skill_id_param) - .ok_or_else(|| format!("workflow_run: unknown skill '{skill_id_param}'"))?; - let inputs = inputs_param.unwrap_or(Value::Null); - let missing = registry::missing_required_inputs(&skill.inputs, &inputs); - if !missing.is_empty() { - return Err(format!( - "workflow_run: missing required inputs: {}", - missing.join(", ") - )); - } - - // ── Preflight gates ───────────────────────────────────────────── - // Run BEFORE the orchestrator is built so failures surface - // synchronously to the caller (skills_run RPC or the run_skill - // agent tool) instead of leaking through as cryptic orchestrator - // output. Today only the [github] gate exists; future gates can - // chain here. - if let Some(github_cfg) = skill.github.as_ref() { - let config_snapshot = match Config::load_or_init().await { - Ok(c) => c, - Err(e) => { - return Err(format!( - "workflow_run preflight: failed to load config to gate `{}`: {e:#}", - skill.definition.id - )); - } - }; - let probes = preflight::LivePreflightProbes::new(&config_snapshot); - if let Err(gate_err) = preflight::run_github_preflight(Some(github_cfg), &probes).await { - let tag = gate_err.tag(); - // Materialise a run-log entry on disk so the gate failure - // shows up in `/skills/.runs/` (and therefore - // in the FE's "Recent runs" list / log viewer) even though - // the orchestrator never booted. We write a header then a - // matching FAILED footer so `scan_runs` parses it cleanly. - let gate_run_id = uuid::Uuid::new_v4().to_string(); - let gate_log_path = - run_log::run_log_path(&workspace, &skill.definition.id, &gate_run_id); - let body = gate_err.to_user_message(Some(&gate_log_path.display().to_string())); - let header_prompt = format!( - "preflight gate: github\n\ - gate decision: FAILED ({tag})\n\ - detail: {body}" - ); - if let Err(e) = run_log::write_header( - &gate_log_path, - &skill.definition.id, - &gate_run_id, - &inputs, - &header_prompt, - ) - .await - { - tracing::warn!( - error = %e, - "[skills] preflight gate: failed to write run-log header" - ); - } - if let Err(e) = run_log::write_footer(&gate_log_path, "FAILED", 0, &body).await { - tracing::warn!( - error = %e, - "[skills] preflight gate: failed to write run-log footer" - ); - } - tracing::warn!( - workflow_id = %skill.definition.id, - gate = "github", - tag = %tag, - gate_log = %gate_log_path.display(), - "[skills] spawn_workflow_run_background: preflight gate failed" - ); - return Err(format!("[preflight:github:{tag}] {body}")); - } - tracing::info!( - workflow_id = %skill.definition.id, - "[skills] spawn_workflow_run_background: github preflight passed" - ); - } - - // Focus the orchestrator on this single skill: its SKILL.md rides in - // the task prompt as guidelines + the resolved inputs; the - // orchestrator's own system prompt and full tool access are kept. - let guidelines = match &skill.definition.system_prompt { - crate::openhuman::agent::harness::definition::PromptSource::Inline(s) => s.clone(), - _ => String::new(), - }; - let inputs_block = registry::render_inputs_block(&skill.inputs, &inputs); - let workflow_id = skill.definition.id.clone(); - let task_prompt = format!( - "You are running a single skill: **{workflow_id}**. Follow these guidelines exactly and \ - focus solely on completing this one task — do not pick up unrelated work.\n\n\ - # Workflow guidelines\n{guidelines}\n\n{inputs_block}", - ); - let run_id = uuid::Uuid::new_v4().to_string(); - let log_path = run_log::run_log_path(&workspace, &workflow_id, &run_id); - tracing::info!( - workflow_id = %workflow_id, - run_id = %run_id, - log = %log_path.display(), - "[skills] spawn_workflow_run_background: starting orchestrator run" - ); - - // Detached: build the orchestrator Agent inside the spawn so config / - // toolchain are loaded fresh per run; the parent returns the handle - // immediately. Same flow handle_workflows_run used to inline — extracted - // so the `run_skill` agent tool can re-use it for skill chaining. - let inherited_origin = crate::openhuman::agent::turn_origin::current() - .unwrap_or(crate::openhuman::agent::turn_origin::AgentTurnOrigin::Cli); - { - let run_id = run_id.clone(); - let workflow_id = workflow_id.clone(); - let inputs = inputs.clone(); - let log_path = log_path.clone(); - let inherited_origin = inherited_origin.clone(); - tokio::spawn(async move { - if let Err(e) = - run_log::write_header(&log_path, &workflow_id, &run_id, &inputs, &task_prompt).await - { - tracing::warn!(run_id = %run_id, error = %e, "[skills] workflow_run: header write failed"); - } - let mut config = match Config::load_or_init().await { - Ok(c) => c, - Err(e) => { - let _ = run_log::write_footer( - &log_path, - "FAILED", - 0, - &format!("load config: {e:#}"), - ) - .await; - return; - } - }; - config.agent.max_tool_iterations = WORKFLOW_RUN_MAX_ITERATIONS; - // Only apply the permissive wildcard default when the operator - // hasn't configured an explicit allow-list — preserve any - // configured egress policy instead of unconditionally widening it. - if config.http_request.allowed_domains.is_empty() { - config.http_request.allowed_domains = vec!["*".to_string()]; - } - let mut agent = match Agent::from_config_for_agent(&config, "orchestrator") { - Ok(a) => a, - Err(e) => { - let _ = run_log::write_footer( - &log_path, - "FAILED", - 0, - &format!("build agent: {e:#}"), - ) - .await; - return; - } - }; - agent.set_event_context(run_id.clone(), "skill"); - agent.set_agent_definition_name(format!( - "orchestrator-skill-{}", - &run_id.get(..8).unwrap_or(&run_id) - )); - let (tx, rx) = tokio::sync::mpsc::channel(256); - agent.set_on_progress(Some(tx)); - let bridge = tokio::spawn(run_log::drain_to_log(rx, log_path.clone())); - - // Register the cancellation token now (after the run can actually - // start) so `workflows_cancel` can stop it; a config/agent-build - // failure above returns before this, leaving nothing to leak. - let cancel_token = run_log::register_run_cancel(&run_id); - - let started = std::time::Instant::now(); - // Inherit the parent turn's origin so a skill triggered from an - // ExternalChannel / tainted context retains its provenance - // through the approval gate. Falls back to Cli for direct - // user-initiated RPC / CLI flows. - // - // Race the run against its cancellation token: if `workflows_cancel` - // fires the token, the run future is dropped (cancelled at its next - // await) and we record a CANCELLED footer. `Some(_)` ⇒ ran to a - // natural end; `None` ⇒ cancelled. - let result = tokio::select! { - biased; - _ = cancel_token.cancelled() => None, - r = crate::openhuman::agent::turn_origin::with_origin( - inherited_origin, - with_autonomous_iter_cap( - WORKFLOW_RUN_MAX_ITERATIONS, - agent.run_single(&task_prompt), - ), - ) => Some(r), - }; - agent.set_on_progress(None); - drop(agent); - let _ = bridge.await; - - let ms = started.elapsed().as_millis() as u64; - run_log::unregister_run_cancel(&run_id); - match result { - None => { - let _ = - run_log::write_footer(&log_path, "CANCELLED", ms, "Run stopped by user.") - .await; - tracing::info!(run_id = %run_id, "[workflows] workflow_run: cancelled"); - } - Some(Ok(out)) => { - if let Some((line, count)) = run_log::detect_repeated_line(&out, 30, 4) { - let preview = line.chars().take(160).collect::(); - let body = format!( - "degenerate-response: autonomous run halted before marking DONE.\n\ - the model's final assistant message repeats the same line {count}× — \ - this is the known one-generation low-entropy loop failure mode, not a real result.\n\n\ - repeated line (truncated to 160 chars):\n {preview}\n\n\ - full final output follows below for forensic review:\n\n{out}", - ); - let _ = run_log::write_footer(&log_path, "DEGENERATE", ms, &body).await; - tracing::warn!( - run_id = %run_id, - repeats = count, - "[skills] workflow_run: degenerate final response rejected" - ); - } else { - let _ = run_log::write_footer(&log_path, "DONE", ms, &out).await; - tracing::info!(run_id = %run_id, "[skills] workflow_run: completed"); - } - } - Some(Err(e)) => { - let _ = run_log::write_footer(&log_path, "FAILED", ms, &format!("{e:#}")).await; - tracing::warn!(run_id = %run_id, error = ?e, "[skills] workflow_run: failed"); - } - } - }); - } - - Ok(WorkflowRunStarted { - run_id, - workflow_id, - log_path, - }) -} - -/// Poll a spawned run's log file until its terminal footer lands or the -/// `budget` elapses. Returns `Some(outcome)` the moment the footer is -/// readable (DONE / DEGENERATE / FAILED), or `None` if the run is still -/// `RUNNING` when the budget runs out — the caller then auto-detaches and -/// hands back the `run_id` so the work continues in the background. -/// -/// The poll happens in the runtime (a tokio sleep loop), NOT in the LLM — -/// the model issues one `run_workflow` tool call and gets either the result -/// or a "still running" handle back, never a busy-wait it has to drive. -pub(crate) async fn await_run_outcome( - log_path: &std::path::Path, - budget: std::time::Duration, -) -> Option { - // Tight enough that a fast workflow returns inline promptly; loose - // enough that polling a finished-but-slow log isn't a hot spin. - const POLL_INTERVAL: std::time::Duration = std::time::Duration::from_millis(750); - let deadline = tokio::time::Instant::now() + budget; - loop { - if let Some(outcome) = run_log::read_terminal_outcome(log_path) { - return Some(outcome); - } - if tokio::time::Instant::now() >= deadline { - return None; - } - let remaining = deadline.saturating_duration_since(tokio::time::Instant::now()); - tokio::time::sleep(POLL_INTERVAL.min(remaining)).await; - } -} - -fn handle_workflows_run(params: Map) -> ControllerFuture { - Box::pin(async move { - let payload = deserialize_params::(params)?; - let started = match spawn_workflow_run_background(payload.workflow_id, payload.inputs).await - { - Ok(s) => s, - Err(e) => return Err(e), - }; - to_json(RpcOutcome::new( - serde_json::json!({ - "run_id": started.run_id, - "status": "started", - "workflow_id": started.workflow_id, - "log": started.log_path.display().to_string(), - }), - Vec::new(), - )) - }) -} - -#[derive(Debug, Deserialize)] -struct WorkflowsCancelParams { - run_id: String, -} - -/// `openhuman.workflows_cancel` — request cancellation of an in-flight run. -/// Fires the run's cancellation token; the run stops at its next await and -/// writes a `CANCELLED` footer. Returns `cancelled: false` when the run id is -/// unknown (already finished or never existed). -fn handle_workflows_cancel(params: Map) -> ControllerFuture { - Box::pin(async move { - let payload = deserialize_params::(params)?; - let cancelled = run_log::cancel_run(&payload.run_id); - tracing::info!(run_id = %payload.run_id, cancelled, "[workflows][rpc] cancel"); - to_json(RpcOutcome::new( - serde_json::json!({ "run_id": payload.run_id, "cancelled": cancelled }), - Vec::new(), - )) - }) -} - -fn handle_workflows_read_resource(params: Map) -> ControllerFuture { - Box::pin(async move { - let payload = deserialize_params::(params)?; - tracing::debug!( - workflow_id = %payload.workflow_id, - relative_path = %payload.relative_path, - "[skills][rpc] read_resource" - ); - let workspace = resolve_workspace_dir().await; - let relative = Path::new(&payload.relative_path); - match read_workflow_resource(workspace.as_path(), &payload.workflow_id, relative) { - Ok(content) => { - let bytes = content.len(); - to_json(RpcOutcome::new( - WorkflowsReadResourceResult { - workflow_id: payload.workflow_id, - relative_path: payload.relative_path, - content, - bytes, - }, - Vec::new(), - )) - } - Err(err) => { - tracing::debug!( - error = %err, - "[skills][rpc] read_resource: rejected" - ); - Err(err) - } - } - }) -} - -fn handle_workflows_create(params: Map) -> ControllerFuture { - Box::pin(async move { - let payload = deserialize_params::(params)?; - tracing::debug!( - name = %payload.name, - scope = ?payload.scope, - "[skills][rpc] create" - ); - let workspace = resolve_workspace_dir().await; - match create_workflow(workspace.as_path(), payload.into()) { - Ok(skill) => { - tracing::debug!( - skill = %skill.name, - location = ?skill.location, - "[skills][rpc] create: ok" - ); - to_json(RpcOutcome::new( - WorkflowsCreateResult { - skill: WorkflowSummary::from(skill), - }, - Vec::new(), - )) - } - Err(err) => { - tracing::debug!(error = %err, "[skills][rpc] create: rejected"); - Err(err) - } - } - }) -} - -/// `openhuman.workflows_update` — edit an existing workflow. Same payload as -/// create, but overwrites the workflow at the resolved slug (frontmatter + -/// workflow.toml rewritten; the hand-authored body is preserved). -fn handle_workflows_update(params: Map) -> ControllerFuture { - Box::pin(async move { - let payload = deserialize_params::(params)?; - tracing::debug!( - name = %payload.name, - scope = ?payload.scope, - "[workflows][rpc] update" - ); - let workspace = resolve_workspace_dir().await; - let mut create_params: CreateWorkflowParams = payload.into(); - create_params.overwrite = true; - match create_workflow(workspace.as_path(), create_params) { - Ok(skill) => to_json(RpcOutcome::new( - WorkflowsCreateResult { - skill: WorkflowSummary::from(skill), - }, - Vec::new(), - )), - Err(err) => { - tracing::debug!(error = %err, "[workflows][rpc] update: rejected"); - Err(err) - } - } - }) -} - -fn handle_workflows_install_from_url(params: Map) -> ControllerFuture { - Box::pin(async move { - let wire = deserialize_params::(params)?; - tracing::debug!( - url = %wire.url, - timeout_secs = ?wire.timeout_secs, - "[skills][rpc] install_from_url" - ); - let config = resolve_config().await; - let workspace = config.workspace_dir.clone(); - let payload: InstallWorkflowFromUrlParams = wire.into(); - match install_workflow_from_url(workspace.as_path(), payload).await { - Ok(outcome) => { - tracing::debug!( - url = %outcome.url, - new_count = outcome.new_skills.len(), - "[skills][rpc] install_from_url: ok" - ); - to_json(RpcOutcome::new( - WorkflowsInstallFromUrlResult { - url: outcome.url, - stdout: outcome.stdout, - stderr: outcome.stderr, - new_skills: outcome.new_skills, - }, - Vec::new(), - )) - } - Err(err) => { - tracing::debug!(error = %err, "[skills][rpc] install_from_url: rejected"); - Err(err) - } - } - }) -} - -fn handle_workflows_uninstall(params: Map) -> ControllerFuture { - Box::pin(async move { - let payload = deserialize_params::(params)?; - tracing::debug!(name = %payload.name, "[skills][rpc] uninstall"); - match uninstall_workflow(payload, None) { - Ok(outcome) => { - tracing::debug!( - name = %outcome.name, - removed_path = %outcome.removed_path, - "[skills][rpc] uninstall: ok" - ); - to_json(RpcOutcome::new( - WorkflowsUninstallResult { - name: outcome.name, - removed_path: outcome.removed_path, - scope: outcome.scope, - }, - Vec::new(), - )) - } - Err(err) => { - tracing::debug!(error = %err, "[skills][rpc] uninstall: rejected"); - Err(err) - } - } - }) -} - -/// Resolve the active [`Config`]. Falls back to `Config::default()` with a -/// best-effort workspace directory if the persisted load times out or errors, -/// so headless diagnostics still work in partially-initialized environments. -async fn resolve_config() -> Config { - match tokio::time::timeout(std::time::Duration::from_secs(30), Config::load_or_init()).await { - Ok(Ok(cfg)) => cfg, - Ok(Err(err)) => { - tracing::debug!( - error = %err, - "[skills][rpc] config load failed; falling back to default config" - ); - fallback_config() - } - Err(_) => { - tracing::debug!("[skills][rpc] config load timed out; falling back to default config"); - fallback_config() - } - } -} - -fn fallback_config() -> Config { - Config { - workspace_dir: fallback_workspace_dir(), - ..Default::default() - } -} - -/// Resolve the active workspace directory. Falls back to the runtime default -/// if the persisted config fails to load so the CLI and headless diagnostics -/// still work in partially-initialized environments. -pub(crate) async fn resolve_workspace_dir() -> PathBuf { - match tokio::time::timeout(std::time::Duration::from_secs(30), Config::load_or_init()).await { - Ok(Ok(cfg)) => cfg.workspace_dir, - Ok(Err(err)) => { - tracing::debug!( - error = %err, - "[skills][rpc] config load failed; falling back to default workspace" - ); - fallback_workspace_dir() - } - Err(_) => { - tracing::debug!( - "[skills][rpc] config load timed out; falling back to default workspace" - ); - fallback_workspace_dir() - } - } -} - -fn fallback_workspace_dir() -> PathBuf { - crate::openhuman::config::default_root_openhuman_dir() - .unwrap_or_else(|_| PathBuf::from(".openhuman")) - .join("workspace") -} - -fn deserialize_params(params: Map) -> Result { - serde_json::from_value(Value::Object(params)).map_err(|e| format!("invalid params: {e}")) -} - -fn to_json(outcome: RpcOutcome) -> Result { - outcome.into_cli_compatible_json() -} - -#[cfg(test)] -#[path = "schemas_tests.rs"] -mod tests; diff --git a/src/openhuman/workflows/schemas/controller_schemas.rs b/src/openhuman/workflows/schemas/controller_schemas.rs new file mode 100644 index 0000000000..9f2ccf83d9 --- /dev/null +++ b/src/openhuman/workflows/schemas/controller_schemas.rs @@ -0,0 +1,501 @@ +//! Controller schema definitions for every `openhuman.workflows_*` RPC method. +//! +//! `workflows_schemas(function)` returns the [`ControllerSchema`] for the +//! named function. `all_workflows_controller_schemas` and +//! `all_workflows_registered_controllers` wire everything into the global +//! registry in `src/core/all.rs`. + +use crate::core::all::RegisteredController; +use crate::core::{ControllerSchema, FieldSchema, TypeSchema}; + +use super::handlers::{ + handle_workflows_cancel, handle_workflows_create, handle_workflows_describe, + handle_workflows_install_from_url, handle_workflows_list, handle_workflows_read_resource, + handle_workflows_read_run_log, handle_workflows_recent_runs, handle_workflows_run, + handle_workflows_uninstall, handle_workflows_update, +}; + +pub fn all_workflows_controller_schemas() -> Vec { + vec![ + workflows_schemas("workflows_list"), + workflows_schemas("workflows_describe"), + workflows_schemas("workflows_recent_runs"), + workflows_schemas("workflows_read_run_log"), + workflows_schemas("workflows_read_resource"), + workflows_schemas("workflows_create"), + workflows_schemas("workflows_update"), + workflows_schemas("workflows_install_from_url"), + workflows_schemas("workflows_uninstall"), + workflows_schemas("workflows_run"), + workflows_schemas("workflows_cancel"), + ] +} + +pub fn all_workflows_registered_controllers() -> Vec { + vec![ + RegisteredController { + schema: workflows_schemas("workflows_list"), + handler: handle_workflows_list, + }, + RegisteredController { + schema: workflows_schemas("workflows_describe"), + handler: handle_workflows_describe, + }, + RegisteredController { + schema: workflows_schemas("workflows_recent_runs"), + handler: handle_workflows_recent_runs, + }, + RegisteredController { + schema: workflows_schemas("workflows_read_run_log"), + handler: handle_workflows_read_run_log, + }, + RegisteredController { + schema: workflows_schemas("workflows_read_resource"), + handler: handle_workflows_read_resource, + }, + RegisteredController { + schema: workflows_schemas("workflows_create"), + handler: handle_workflows_create, + }, + RegisteredController { + schema: workflows_schemas("workflows_update"), + handler: handle_workflows_update, + }, + RegisteredController { + schema: workflows_schemas("workflows_install_from_url"), + handler: handle_workflows_install_from_url, + }, + RegisteredController { + schema: workflows_schemas("workflows_uninstall"), + handler: handle_workflows_uninstall, + }, + RegisteredController { + schema: workflows_schemas("workflows_run"), + handler: handle_workflows_run, + }, + RegisteredController { + schema: workflows_schemas("workflows_cancel"), + handler: handle_workflows_cancel, + }, + ] +} + +pub fn workflows_schemas(function: &str) -> ControllerSchema { + match function { + "workflows_list" => ControllerSchema { + namespace: "workflows", + function: "list", + description: "List SKILL.md and legacy skills discovered in the user home and workspace.", + inputs: vec![], + outputs: vec![FieldSchema { + name: "skills", + ty: TypeSchema::Array(Box::new(TypeSchema::Ref("WorkflowSummary"))), + comment: "Discovered skills (sorted by name, project-scope shadows user-scope).", + required: true, + }], + }, + "workflows_run" => ControllerSchema { + namespace: "workflows", + function: "run", + description: "Start a skill in the background: run the orchestrator agent focused by the skill's SKILL.md + the given inputs, streaming every step to a per-run log file. Validates required inputs and returns immediately with a run id and the log path.", + inputs: vec![ + FieldSchema { + name: "workflow_id", + ty: TypeSchema::String, + comment: "Id of the skill to run (matches WorkflowDefinition.id).", + required: true, + }, + FieldSchema { + name: "inputs", + ty: TypeSchema::Json, + comment: "Object of input values keyed by the skill's declared input names.", + required: false, + }, + ], + outputs: vec![ + FieldSchema { + name: "run_id", + ty: TypeSchema::String, + comment: "Id for this background run.", + required: true, + }, + FieldSchema { + name: "status", + ty: TypeSchema::String, + comment: "Always \"started\" — the orchestrator runs in the background.", + required: true, + }, + FieldSchema { + name: "workflow_id", + ty: TypeSchema::String, + comment: "Echo of the requested skill id.", + required: true, + }, + FieldSchema { + name: "log", + ty: TypeSchema::String, + comment: "Path to the per-run streaming log (/skills/.runs/_.log).", + required: true, + }, + ], + }, + "workflows_cancel" => ControllerSchema { + namespace: "workflows", + function: "cancel", + description: "Request cancellation of an in-flight workflow run by run_id. The run stops at its next await point and records a CANCELLED footer. Returns cancelled=false if the run id is unknown (already finished or never existed).", + inputs: vec![FieldSchema { + name: "run_id", + ty: TypeSchema::String, + comment: "Id of the running workflow run to cancel (from workflows_run).", + required: true, + }], + outputs: vec![ + FieldSchema { + name: "run_id", + ty: TypeSchema::String, + comment: "Echo of the requested run id.", + required: true, + }, + FieldSchema { + name: "cancelled", + ty: TypeSchema::Bool, + comment: "True if a live run was found and signalled; false if unknown.", + required: true, + }, + ], + }, + "workflows_read_resource" => ControllerSchema { + namespace: "workflows", + function: "read_resource", + description: "Read a single bundled SKILL resource file, hardened against traversal, symlink escape, and oversized payloads.", + inputs: vec![ + FieldSchema { + name: "workflow_id", + ty: TypeSchema::String, + comment: "Name of the skill (matches WorkflowSummary.id / Workflow.name).", + required: true, + }, + FieldSchema { + name: "relative_path", + ty: TypeSchema::String, + comment: "Path to the resource file, relative to the skill root (e.g. 'scripts/foo.sh').", + required: true, + }, + ], + outputs: vec![ + FieldSchema { + name: "workflow_id", + ty: TypeSchema::String, + comment: "Echo of the requested skill id.", + required: true, + }, + FieldSchema { + name: "relative_path", + ty: TypeSchema::String, + comment: "Echo of the requested relative path.", + required: true, + }, + FieldSchema { + name: "content", + ty: TypeSchema::String, + comment: "File contents (UTF-8, <= 128 KB).", + required: true, + }, + FieldSchema { + name: "bytes", + ty: TypeSchema::U64, + comment: "Size of the file on disk, in bytes.", + required: true, + }, + ], + }, + "workflows_create" => ControllerSchema { + namespace: "workflows", + function: "create", + description: "Scaffold a new SKILL.md skill under the user or workspace scope.", + inputs: vec![ + FieldSchema { + name: "name", + ty: TypeSchema::String, + comment: "Human-readable name (slugified into the on-disk directory).", + required: true, + }, + FieldSchema { + name: "description", + ty: TypeSchema::String, + comment: "One-line description written into SKILL.md frontmatter.", + required: true, + }, + FieldSchema { + name: "when_to_use", + ty: TypeSchema::String, + comment: "Optional 'when to run me' trigger. Written to the sibling skill.toml; the registry surfaces it as the workflow's when_to_use (falls back to description).", + required: false, + }, + FieldSchema { + name: "scope", + ty: TypeSchema::String, + comment: "Target scope: 'user' (default) or 'project' (requires trust marker).", + required: false, + }, + FieldSchema { + name: "license", + ty: TypeSchema::String, + comment: "Optional SPDX license identifier.", + required: false, + }, + FieldSchema { + name: "author", + ty: TypeSchema::String, + comment: "Optional author name (written under frontmatter.metadata.author).", + required: false, + }, + FieldSchema { + name: "tags", + ty: TypeSchema::Array(Box::new(TypeSchema::String)), + comment: "Optional tags for the skill.", + required: false, + }, + FieldSchema { + name: "allowed_tools", + ty: TypeSchema::Array(Box::new(TypeSchema::String)), + comment: "Optional tool hints (maps to frontmatter.allowed-tools).", + required: false, + }, + FieldSchema { + name: "inputs", + ty: TypeSchema::Json, + comment: "Optional declared `[[inputs]]` entries (each `{ name, description, required, type }`). When non-empty, a sibling `skill.toml` is written alongside `SKILL.md` so the Skills Runner can render dynamic form controls at run time.", + required: false, + }, + ], + outputs: vec![FieldSchema { + name: "skill", + ty: TypeSchema::Ref("WorkflowSummary"), + comment: "The newly created skill, re-discovered through the standard pipeline.", + required: true, + }], + }, + // Same wire shape as create; overwrites the workflow at the resolved + // slug (frontmatter + workflow.toml) while preserving the body. + "workflows_update" => { + let mut s = workflows_schemas("workflows_create"); + s.function = "update"; + s.description = + "Edit an existing workflow: overwrite frontmatter + workflow.toml at the resolved slug, preserving the hand-authored body."; + s + } + "workflows_install_from_url" => ControllerSchema { + namespace: "workflows", + function: "install_from_url", + description: "Install a remote skill by fetching its SKILL.md over HTTPS and writing it into the user-scope skills directory. URL must be https, resolve to a public host, and point at a single `.md` file (`github.com/.../blob/...` auto-rewrites to raw). Default 60s timeout, max 600s.", + inputs: vec![ + FieldSchema { + name: "url", + ty: TypeSchema::String, + comment: "Remote skill package URL (https only; loopback / private / link-local hosts rejected).", + required: true, + }, + FieldSchema { + name: "timeout_secs", + ty: TypeSchema::U64, + comment: "Optional wall-clock override in seconds. Default 60, capped at 600.", + required: false, + }, + ], + outputs: vec![ + FieldSchema { + name: "url", + ty: TypeSchema::String, + comment: "Echo of the installed URL.", + required: true, + }, + FieldSchema { + name: "stdout", + ty: TypeSchema::String, + comment: "Human-readable diagnostic summary (bytes fetched, target path).", + required: true, + }, + FieldSchema { + name: "stderr", + ty: TypeSchema::String, + comment: "Non-fatal frontmatter parse warnings, joined by newlines.", + required: true, + }, + FieldSchema { + name: "new_skills", + ty: TypeSchema::Array(Box::new(TypeSchema::String)), + comment: "Slugs of skills that appeared in the catalog as a result of the install.", + required: true, + }, + ], + }, + "workflows_read_run_log" => ControllerSchema { + namespace: "workflows", + function: "read_run_log", + description: "Read a slice of a skill run's streaming log file by run_id. The FE Skills Runner panel opens this on click of a Recent Runs row and re-calls it every 2s while the run's `status` is RUNNING to tail new bytes (use the returned `offset` as the next call's `offset`). The run id resolves to a path internally — callers don't supply a path, so no traversal surface. `max_bytes` is clamped to 262144 (256 KiB) per call; pages by re-issuing with the returned `offset`.", + inputs: vec![ + FieldSchema { + name: "run_id", + ty: TypeSchema::String, + comment: "Run id from `skills_recent_runs.runs[].run_id` (matched by 8-char prefix against the log filename).", + required: true, + }, + FieldSchema { + name: "offset", + ty: TypeSchema::U64, + comment: "Byte offset to start reading from. Default 0 (read from start); the FE passes the previous response's `offset` for tail-mode polling.", + required: false, + }, + FieldSchema { + name: "max_bytes", + ty: TypeSchema::U64, + comment: "Max bytes to return in this slice. Default 65536 (64 KiB), capped at 262144 (256 KiB).", + required: false, + }, + ], + outputs: vec![ + FieldSchema { + name: "offset", + ty: TypeSchema::U64, + comment: "New read cursor — pass this as the next call's `offset` to tail forward.", + required: true, + }, + FieldSchema { + name: "bytes_read", + ty: TypeSchema::U64, + comment: "Number of bytes returned in this slice.", + required: true, + }, + FieldSchema { + name: "content", + ty: TypeSchema::String, + comment: "The slice contents (UTF-8, lossy-decoded so a partial multibyte tail doesn't error).", + required: true, + }, + FieldSchema { + name: "eof", + ty: TypeSchema::Bool, + comment: "True if the read reached end-of-file. May still be FALSE-complete (run still streaming).", + required: true, + }, + FieldSchema { + name: "complete", + ty: TypeSchema::Bool, + comment: "True once the run footer (`--- result ---`) has landed in the file. The FE stops polling when this flips true.", + required: true, + }, + ], + }, + "workflows_recent_runs" => ControllerSchema { + namespace: "workflows", + function: "recent_runs", + description: "List recent autonomous skill runs by scanning `/skills/.runs/`. Returns one entry per log file (header: workflow_id, run_id, started; footer: status, duration_ms, finished) sorted by `started` descending. `status` is `RUNNING` while the footer hasn't landed yet, then `DONE` / `DEGENERATE` / `FAILED`. Optionally filter by `workflow_id` to scope to one skill; `limit` (default 20, max 100) caps the result. Cheap: reads the files top-to-bottom and short-circuits — no schema parsing of the streaming body.", + inputs: vec![ + FieldSchema { + name: "workflow_id", + ty: TypeSchema::String, + comment: "Optional: restrict results to runs of one skill (e.g. \"github-issue-crusher\"). Omit to return runs across every skill.", + required: false, + }, + FieldSchema { + name: "limit", + ty: TypeSchema::U64, + comment: "Cap on the number of entries returned. Default 20, clamped to 100.", + required: false, + }, + ], + outputs: vec![FieldSchema { + name: "runs", + ty: TypeSchema::Json, + comment: "Array of `{ run_id, workflow_id, started, status, duration_ms, finished, log_path }` — see crate::openhuman::workflows::run_log::ScannedRun.", + required: true, + }], + }, + "workflows_describe" => ControllerSchema { + namespace: "workflows", + function: "describe", + description: "Describe a single skill by id — returns its display name, summary, and the declared `[[inputs]]` block. Used by the Settings → Skills Runner panel to render dynamic input controls and let the user fill in the right fields before clicking Run Now or scheduling a cron. `skills_list` does NOT carry `inputs` (it stays the lightweight enumeration); call this once per skill the user picks.", + inputs: vec![FieldSchema { + name: "workflow_id", + ty: TypeSchema::String, + comment: "Workflow id from `skills_list` (e.g. \"github-issue-crusher\", \"pr-review-shepherd\", \"dev-workflow\").", + required: true, + }], + outputs: vec![ + FieldSchema { + name: "id", + ty: TypeSchema::String, + comment: "Echo of the resolved skill id.", + required: true, + }, + FieldSchema { + name: "display_name", + ty: TypeSchema::String, + comment: "Human-friendly display name (falls back to the id when unset).", + required: true, + }, + FieldSchema { + name: "when_to_use", + ty: TypeSchema::String, + comment: "Short one-line summary from skill.toml `when_to_use` — what the skill does and when to pick it.", + required: true, + }, + // Wire shape: array of objects. `handle_workflows_describe` + // serialises this as a real array of `WorkflowInputDescription` + // objects — `{name, description, required, type}` per entry — + // so the controller-catalog type is `Json`, matching the + // payload rather than coercing it to a scalar string. + FieldSchema { + name: "inputs", + ty: TypeSchema::Json, + comment: "Array of `[[inputs]]` entries; each entry: `{ name, description, required, type }`. Renderable as a dynamic form.", + required: true, + }, + ], + }, + "workflows_uninstall" => ControllerSchema { + namespace: "workflows", + function: "uninstall", + description: "Remove an installed user-scope SKILL.md skill from `~/.openhuman/skills//`. Only user-scope installs are supported; project-scope and legacy skills are read-only. Rejects path separators and traversal; canonicalises before delete.", + inputs: vec![FieldSchema { + name: "name", + ty: TypeSchema::String, + comment: "Exact on-disk slug of the installed skill — matches WorkflowSummary.id (the directory under ~/.openhuman/skills/), which may differ from the frontmatter display name in Workflow.name.", + required: true, + }], + outputs: vec![ + FieldSchema { + name: "name", + ty: TypeSchema::String, + comment: "Echo of the removed skill slug.", + required: true, + }, + FieldSchema { + name: "removed_path", + ty: TypeSchema::String, + comment: "Canonical on-disk path that was deleted.", + required: true, + }, + FieldSchema { + name: "scope", + ty: TypeSchema::String, + comment: "Scope the uninstall applied to. Always `user` today.", + required: true, + }, + ], + }, + _ => ControllerSchema { + namespace: "workflows", + function: "unknown", + description: "Unknown skills controller.", + inputs: vec![], + outputs: vec![FieldSchema { + name: "error", + ty: TypeSchema::String, + comment: "Lookup error details.", + required: true, + }], + }, + } +} diff --git a/src/openhuman/workflows/schemas/handlers.rs b/src/openhuman/workflows/schemas/handlers.rs new file mode 100644 index 0000000000..e69eeb7a72 --- /dev/null +++ b/src/openhuman/workflows/schemas/handlers.rs @@ -0,0 +1,338 @@ +//! RPC handler functions for all `openhuman.workflows_*` controllers. +//! +//! Each `handle_*` function deserialises its params, calls into the domain +//! ops layer, and serialises the result back as JSON. Business logic lives in +//! `ops.rs` / `run_machinery.rs`; this layer is intentionally thin. + +use std::path::Path; + +use serde_json::{Map, Value}; + +use crate::core::all::ControllerFuture; +use crate::openhuman::workflows::ops::{ + create_workflow, discover_workflows, install_workflow_from_url, is_workspace_trusted, + read_workflow_resource, uninstall_workflow, CreateWorkflowParams, UninstallWorkflowParams, +}; +use crate::openhuman::workflows::{registry, run_log}; +use crate::rpc::RpcOutcome; + +use super::helpers::{deserialize_params, resolve_config, resolve_workspace_dir, to_json}; +use super::run_machinery::spawn_workflow_run_background; +use super::wire_types::{ + WorkflowInputDescription, WorkflowSummary, WorkflowsCancelParams, WorkflowsCreateParams, + WorkflowsCreateResult, WorkflowsDescribeParams, WorkflowsDescribeResult, + WorkflowsInstallFromUrlParamsWire, WorkflowsInstallFromUrlResult, WorkflowsListParams, + WorkflowsListResult, WorkflowsReadResourceParams, WorkflowsReadResourceResult, + WorkflowsReadRunLogParams, WorkflowsRecentRunsParams, WorkflowsRecentRunsResult, + WorkflowsRunParams, WorkflowsUninstallResult, +}; + +pub(super) fn handle_workflows_list(params: Map) -> ControllerFuture { + Box::pin(async move { + let _ = deserialize_params::(params)?; + tracing::debug!("[skills][rpc] list skills"); + let workspace = resolve_workspace_dir().await; + let trusted = is_workspace_trusted(&workspace); + let home = dirs::home_dir(); + let skills = discover_workflows(home.as_deref(), Some(workspace.as_path()), trusted); + tracing::debug!( + count = skills.len(), + workspace = %workspace.display(), + trusted, + "[skills][rpc] list result" + ); + let summaries = skills.into_iter().map(WorkflowSummary::from).collect(); + to_json(RpcOutcome::new( + WorkflowsListResult { skills: summaries }, + Vec::new(), + )) + }) +} + +/// `openhuman.workflows_describe` — return a single skill's display metadata +/// and its declared `[[inputs]]` so the Skills Runner panel can render +/// the right form controls. `skills_list` deliberately stays the cheap +/// enumeration without input declarations (its `Workflow` source struct +/// predates `[[inputs]]`); on the user picking one we fetch the full +/// `WorkflowDefinition` (which carries inputs) and project the small, +/// FE-shaped subset they need. +pub(super) fn handle_workflows_describe(params: Map) -> ControllerFuture { + Box::pin(async move { + let payload = deserialize_params::(params)?; + let workspace = resolve_workspace_dir().await; + let skill = registry::get_workflow(&workspace, &payload.workflow_id).ok_or_else(|| { + format!( + "workflows_describe: unknown skill '{}'", + payload.workflow_id + ) + })?; + let inputs = skill + .inputs + .iter() + .map(|i| WorkflowInputDescription { + name: i.name.clone(), + description: i.description.clone(), + required: i.required, + kind: i.kind.clone().unwrap_or_else(|| "string".to_string()), + }) + .collect(); + let display_name = skill + .definition + .display_name + .clone() + .unwrap_or_else(|| skill.definition.id.clone()); + to_json(RpcOutcome::new( + WorkflowsDescribeResult { + id: skill.definition.id.clone(), + display_name, + when_to_use: skill.definition.when_to_use.clone(), + inputs, + }, + Vec::new(), + )) + }) +} + +/// `openhuman.workflows_read_run_log` — return a slice of a skill run's +/// log file, identified by `run_id` (NOT a path — no traversal surface). +/// FE Skills Runner panel uses this to render the streaming log inline +/// when the user clicks a Recent Runs row, and tails it every 2s while +/// `complete` is false. +pub(super) fn handle_workflows_read_run_log(params: Map) -> ControllerFuture { + Box::pin(async move { + let payload = deserialize_params::(params)?; + let workspace = resolve_workspace_dir().await; + let path = run_log::find_run_log_path(&workspace, &payload.run_id).ok_or_else(|| { + format!( + "workflows_read_run_log: unknown run_id '{}'", + payload.run_id + ) + })?; + let offset = payload.offset.unwrap_or(0); + // 64 KiB default per-call slice, hard cap at 256 KiB to keep the + // RPC response sane; the FE re-issues with the returned offset + // to page through larger logs. + let max_bytes = payload.max_bytes.unwrap_or(64 * 1024).min(256 * 1024) as usize; + match run_log::read_run_log_slice(&path, offset, max_bytes) { + Ok(slice) => to_json(RpcOutcome::new(slice, Vec::new())), + Err(e) => Err(format!("workflows_read_run_log: read failed: {e}")), + } + }) +} + +/// `openhuman.workflows_recent_runs` — list runs from `/skills/.runs/` +/// (most-recent first), optionally filtered to one skill, capped by `limit`. +/// Powers the Skills Runner panel's "Recent runs" section + future live-log +/// tail. Delegates the actual scan + parse to `run_log::scan_runs`. +pub(super) fn handle_workflows_recent_runs(params: Map) -> ControllerFuture { + Box::pin(async move { + let payload = deserialize_params::(params)?; + let limit = payload.limit.unwrap_or(20).min(100) as usize; + let workspace = resolve_workspace_dir().await; + let runs = run_log::scan_runs(&workspace, payload.workflow_id.as_deref(), limit); + tracing::debug!( + count = runs.len(), + filter = ?payload.workflow_id, + limit, + "[skills][rpc] recent_runs" + ); + to_json(RpcOutcome::new( + WorkflowsRecentRunsResult { runs }, + Vec::new(), + )) + }) +} + +pub(super) fn handle_workflows_run(params: Map) -> ControllerFuture { + Box::pin(async move { + let payload = deserialize_params::(params)?; + let started = match spawn_workflow_run_background(payload.workflow_id, payload.inputs).await + { + Ok(s) => s, + Err(e) => return Err(e), + }; + to_json(RpcOutcome::new( + serde_json::json!({ + "run_id": started.run_id, + "status": "started", + "workflow_id": started.workflow_id, + "log": started.log_path.display().to_string(), + }), + Vec::new(), + )) + }) +} + +/// `openhuman.workflows_cancel` — request cancellation of an in-flight run. +/// Fires the run's cancellation token; the run stops at its next await and +/// writes a `CANCELLED` footer. Returns `cancelled: false` when the run id is +/// unknown (already finished or never existed). +pub(super) fn handle_workflows_cancel(params: Map) -> ControllerFuture { + Box::pin(async move { + let payload = deserialize_params::(params)?; + let cancelled = run_log::cancel_run(&payload.run_id); + tracing::info!(run_id = %payload.run_id, cancelled, "[workflows][rpc] cancel"); + to_json(RpcOutcome::new( + serde_json::json!({ "run_id": payload.run_id, "cancelled": cancelled }), + Vec::new(), + )) + }) +} + +pub(super) fn handle_workflows_read_resource(params: Map) -> ControllerFuture { + Box::pin(async move { + let payload = deserialize_params::(params)?; + tracing::debug!( + workflow_id = %payload.workflow_id, + relative_path = %payload.relative_path, + "[skills][rpc] read_resource" + ); + let workspace = resolve_workspace_dir().await; + let relative = Path::new(&payload.relative_path); + match read_workflow_resource(workspace.as_path(), &payload.workflow_id, relative) { + Ok(content) => { + let bytes = content.len(); + to_json(RpcOutcome::new( + WorkflowsReadResourceResult { + workflow_id: payload.workflow_id, + relative_path: payload.relative_path, + content, + bytes, + }, + Vec::new(), + )) + } + Err(err) => { + tracing::debug!( + error = %err, + "[skills][rpc] read_resource: rejected" + ); + Err(err) + } + } + }) +} + +pub(super) fn handle_workflows_create(params: Map) -> ControllerFuture { + Box::pin(async move { + let payload = deserialize_params::(params)?; + tracing::debug!( + name = %payload.name, + scope = ?payload.scope, + "[skills][rpc] create" + ); + let workspace = resolve_workspace_dir().await; + match create_workflow(workspace.as_path(), payload.into()) { + Ok(skill) => { + tracing::debug!( + skill = %skill.name, + location = ?skill.location, + "[skills][rpc] create: ok" + ); + to_json(RpcOutcome::new( + WorkflowsCreateResult { + skill: WorkflowSummary::from(skill), + }, + Vec::new(), + )) + } + Err(err) => { + tracing::debug!(error = %err, "[skills][rpc] create: rejected"); + Err(err) + } + } + }) +} + +/// `openhuman.workflows_update` — edit an existing workflow. Same payload as +/// create, but overwrites the workflow at the resolved slug (frontmatter + +/// workflow.toml rewritten; the hand-authored body is preserved). +pub(super) fn handle_workflows_update(params: Map) -> ControllerFuture { + Box::pin(async move { + let payload = deserialize_params::(params)?; + tracing::debug!( + name = %payload.name, + scope = ?payload.scope, + "[workflows][rpc] update" + ); + let workspace = resolve_workspace_dir().await; + let mut create_params: CreateWorkflowParams = payload.into(); + create_params.overwrite = true; + match create_workflow(workspace.as_path(), create_params) { + Ok(skill) => to_json(RpcOutcome::new( + WorkflowsCreateResult { + skill: WorkflowSummary::from(skill), + }, + Vec::new(), + )), + Err(err) => { + tracing::debug!(error = %err, "[workflows][rpc] update: rejected"); + Err(err) + } + } + }) +} + +pub(super) fn handle_workflows_install_from_url(params: Map) -> ControllerFuture { + Box::pin(async move { + let wire = deserialize_params::(params)?; + tracing::debug!( + url = %wire.url, + timeout_secs = ?wire.timeout_secs, + "[skills][rpc] install_from_url" + ); + let config = resolve_config().await; + let workspace = config.workspace_dir.clone(); + let payload = wire.into(); + match install_workflow_from_url(workspace.as_path(), payload).await { + Ok(outcome) => { + tracing::debug!( + url = %outcome.url, + new_count = outcome.new_skills.len(), + "[skills][rpc] install_from_url: ok" + ); + to_json(RpcOutcome::new( + WorkflowsInstallFromUrlResult { + url: outcome.url, + stdout: outcome.stdout, + stderr: outcome.stderr, + new_skills: outcome.new_skills, + }, + Vec::new(), + )) + } + Err(err) => { + tracing::debug!(error = %err, "[skills][rpc] install_from_url: rejected"); + Err(err) + } + } + }) +} + +pub(super) fn handle_workflows_uninstall(params: Map) -> ControllerFuture { + Box::pin(async move { + let payload = deserialize_params::(params)?; + tracing::debug!(name = %payload.name, "[skills][rpc] uninstall"); + match uninstall_workflow(payload, None) { + Ok(outcome) => { + tracing::debug!( + name = %outcome.name, + removed_path = %outcome.removed_path, + "[skills][rpc] uninstall: ok" + ); + to_json(RpcOutcome::new( + WorkflowsUninstallResult { + name: outcome.name, + removed_path: outcome.removed_path, + scope: outcome.scope, + }, + Vec::new(), + )) + } + Err(err) => { + tracing::debug!(error = %err, "[skills][rpc] uninstall: rejected"); + Err(err) + } + } + }) +} diff --git a/src/openhuman/workflows/schemas/helpers.rs b/src/openhuman/workflows/schemas/helpers.rs new file mode 100644 index 0000000000..85b0934090 --- /dev/null +++ b/src/openhuman/workflows/schemas/helpers.rs @@ -0,0 +1,81 @@ +//! Internal utility helpers shared across all workflow schema handlers. +//! +//! Provides config / workspace resolution with graceful fallbacks, and the +//! thin `deserialize_params` / `to_json` adapters used by every handler. + +use std::path::PathBuf; + +use serde::de::DeserializeOwned; +use serde_json::{Map, Value}; + +use crate::openhuman::config::Config; +use crate::rpc::RpcOutcome; + +// ── Config / workspace resolution ──────────────────────────────────────────── + +/// Resolve the active [`Config`]. Falls back to `Config::default()` with a +/// best-effort workspace directory if the persisted load times out or errors, +/// so headless diagnostics still work in partially-initialized environments. +pub(super) async fn resolve_config() -> Config { + match tokio::time::timeout(std::time::Duration::from_secs(30), Config::load_or_init()).await { + Ok(Ok(cfg)) => cfg, + Ok(Err(err)) => { + tracing::debug!( + error = %err, + "[skills][rpc] config load failed; falling back to default config" + ); + fallback_config() + } + Err(_) => { + tracing::debug!("[skills][rpc] config load timed out; falling back to default config"); + fallback_config() + } + } +} + +fn fallback_config() -> Config { + Config { + workspace_dir: fallback_workspace_dir(), + ..Default::default() + } +} + +/// Resolve the active workspace directory. Falls back to the runtime default +/// if the persisted config fails to load so the CLI and headless diagnostics +/// still work in partially-initialized environments. +pub(crate) async fn resolve_workspace_dir() -> PathBuf { + match tokio::time::timeout(std::time::Duration::from_secs(30), Config::load_or_init()).await { + Ok(Ok(cfg)) => cfg.workspace_dir, + Ok(Err(err)) => { + tracing::debug!( + error = %err, + "[skills][rpc] config load failed; falling back to default workspace" + ); + fallback_workspace_dir() + } + Err(_) => { + tracing::debug!( + "[skills][rpc] config load timed out; falling back to default workspace" + ); + fallback_workspace_dir() + } + } +} + +fn fallback_workspace_dir() -> PathBuf { + crate::openhuman::config::default_root_openhuman_dir() + .unwrap_or_else(|_| PathBuf::from(".openhuman")) + .join("workspace") +} + +// ── Serde adapters ──────────────────────────────────────────────────────────── + +pub(super) fn deserialize_params( + params: Map, +) -> Result { + serde_json::from_value(Value::Object(params)).map_err(|e| format!("invalid params: {e}")) +} + +pub(super) fn to_json(outcome: RpcOutcome) -> Result { + outcome.into_cli_compatible_json() +} diff --git a/src/openhuman/workflows/schemas/mod.rs b/src/openhuman/workflows/schemas/mod.rs new file mode 100644 index 0000000000..0531ad678b --- /dev/null +++ b/src/openhuman/workflows/schemas/mod.rs @@ -0,0 +1,58 @@ +//! JSON-RPC / CLI controller surface for the workflows domain. +//! +//! Exposes: +//! * `workflows.list` — enumerate SKILL.md / legacy skills discovered in the +//! current user home and workspace. +//! * `workflows.read_resource` — read a single bundled resource file, with path +//! traversal, symlink, size and UTF-8 guards. +//! * `workflows.create` — scaffold a new SKILL.md skill under the user or +//! workspace scope. +//! * `workflows.install_from_url` — install a remote skill by fetching its +//! `SKILL.md` over HTTPS (size-capped, timeout-clamped) and writing it into +//! the user-scope skills directory. Rejects non-https, private-IP, and +//! non-SKILL.md URLs; normalises `github.com/.../blob/...` → raw. +//! +//! All controllers resolve the active workspace via the persisted config +//! layer (`config::load_config_with_timeout`) so the CLI and UI see the same +//! skills catalog without the caller having to thread a workspace path. +//! +//! ## Sub-module layout +//! +//! | Module | Lines | Role | +//! |-----------------------|--------|-------------------------------------------------------------| +//! | `wire_types` | ~200 | Param / result structs and `WorkflowSummary`. | +//! | `helpers` | ~80 | Config/workspace resolution + `deserialize_params`/`to_json`.| +//! | `run_machinery` | ~230 | Background run spawning and outcome polling. | +//! | `handlers` | ~240 | Thin `handle_*` dispatcher functions. | +//! | `controller_schemas` | ~300 | `workflows_schemas` match + `all_*` registry functions. | + +mod controller_schemas; +mod handlers; +mod helpers; +mod run_machinery; +mod wire_types; + +// ── External API — preserved exactly from the original schemas.rs ───────────── + +pub use controller_schemas::{ + all_workflows_controller_schemas, all_workflows_registered_controllers, workflows_schemas, +}; + +// `WorkflowSummary` is used by the unit tests. +#[cfg(test)] +pub(crate) use wire_types::WorkflowSummary; + +// `Workflow` is used by the unit tests (skill_summary_round_trip_minimum_fields). +#[cfg(test)] +pub(crate) use crate::openhuman::workflows::ops::Workflow; + +// `resolve_workspace_dir` is used by the `run_workflow` agent tool. +pub(crate) use helpers::resolve_workspace_dir; + +// `spawn_workflow_run_background` and `await_run_outcome` are used by the +// `run_workflow` agent tool for skill chaining. +pub(crate) use run_machinery::{await_run_outcome, spawn_workflow_run_background}; + +#[cfg(test)] +#[path = "../schemas_tests.rs"] +mod tests; diff --git a/src/openhuman/workflows/schemas/run_machinery.rs b/src/openhuman/workflows/schemas/run_machinery.rs new file mode 100644 index 0000000000..70428ab53e --- /dev/null +++ b/src/openhuman/workflows/schemas/run_machinery.rs @@ -0,0 +1,310 @@ +//! Background workflow run spawning and outcome polling. +//! +//! `spawn_workflow_run_background` is re-used by both the `workflows_run` +//! JSON-RPC controller and the `run_skill` agent tool (skill chaining). +//! `await_run_outcome` lets the model poll a spawned run's log file for a +//! terminal result without busy-waiting. + +use serde_json::Value; + +use crate::openhuman::agent::harness::session::Agent; +use crate::openhuman::agent::harness::subagent_runner::with_autonomous_iter_cap; +use crate::openhuman::config::Config; +use crate::openhuman::workflows::{preflight, registry, run_log}; + +use super::helpers::resolve_workspace_dir; + +/// Iteration cap for an autonomous skill run (orchestrator + sub-agents). High +/// enough to "run until done", while the repeated-failure circuit breaker still +/// stops dead-end grinding — deliberately bounded (not infinite) to cap spend. +const WORKFLOW_RUN_MAX_ITERATIONS: usize = 200; + +/// Outcome of [`spawn_workflow_run_background`]: the new run's `run_id`, the +/// canonical `workflow_id` the registry resolved it to, and the path of the +/// streaming log file every step + the footer get written to. +pub(crate) struct WorkflowRunStarted { + pub run_id: String, + pub workflow_id: String, + pub log_path: std::path::PathBuf, +} + +/// Spawn a single autonomous workflow_run as a detached `tokio::spawn`. Used by +/// both the `openhuman.workflows_run` JSON-RPC controller and the `run_skill` +/// agent tool (which lets the orchestrator chain one skill into another — +/// e.g. `github-issue-crusher` → `pr-review-shepherd` once the draft PR is +/// open). +/// +/// Returns immediately with the run handle; the actual work runs in the +/// background until DONE / DEGENERATE / FAILED. Errors (unknown skill, +/// missing required inputs) surface as `Err(String)` *before* the spawn so +/// callers can reject malformed invocations synchronously. +pub(crate) async fn spawn_workflow_run_background( + skill_id_param: String, + inputs_param: Option, +) -> Result { + let workspace = resolve_workspace_dir().await; + let skill = registry::get_workflow(&workspace, &skill_id_param) + .ok_or_else(|| format!("workflow_run: unknown skill '{skill_id_param}'"))?; + let inputs = inputs_param.unwrap_or(Value::Null); + let missing = registry::missing_required_inputs(&skill.inputs, &inputs); + if !missing.is_empty() { + return Err(format!( + "workflow_run: missing required inputs: {}", + missing.join(", ") + )); + } + + // ── Preflight gates ───────────────────────────────────────────── + // Run BEFORE the orchestrator is built so failures surface + // synchronously to the caller (skills_run RPC or the run_skill + // agent tool) instead of leaking through as cryptic orchestrator + // output. Today only the [github] gate exists; future gates can + // chain here. + if let Some(github_cfg) = skill.github.as_ref() { + let config_snapshot = match Config::load_or_init().await { + Ok(c) => c, + Err(e) => { + return Err(format!( + "workflow_run preflight: failed to load config to gate `{}`: {e:#}", + skill.definition.id + )); + } + }; + let probes = preflight::LivePreflightProbes::new(&config_snapshot); + if let Err(gate_err) = preflight::run_github_preflight(Some(github_cfg), &probes).await { + let tag = gate_err.tag(); + // Materialise a run-log entry on disk so the gate failure + // shows up in `/skills/.runs/` (and therefore + // in the FE's "Recent runs" list / log viewer) even though + // the orchestrator never booted. We write a header then a + // matching FAILED footer so `scan_runs` parses it cleanly. + let gate_run_id = uuid::Uuid::new_v4().to_string(); + let gate_log_path = + run_log::run_log_path(&workspace, &skill.definition.id, &gate_run_id); + let body = gate_err.to_user_message(Some(&gate_log_path.display().to_string())); + let header_prompt = format!( + "preflight gate: github\n\ + gate decision: FAILED ({tag})\n\ + detail: {body}" + ); + if let Err(e) = run_log::write_header( + &gate_log_path, + &skill.definition.id, + &gate_run_id, + &inputs, + &header_prompt, + ) + .await + { + tracing::warn!( + error = %e, + "[skills] preflight gate: failed to write run-log header" + ); + } + if let Err(e) = run_log::write_footer(&gate_log_path, "FAILED", 0, &body).await { + tracing::warn!( + error = %e, + "[skills] preflight gate: failed to write run-log footer" + ); + } + tracing::warn!( + workflow_id = %skill.definition.id, + gate = "github", + tag = %tag, + gate_log = %gate_log_path.display(), + "[skills] spawn_workflow_run_background: preflight gate failed" + ); + return Err(format!("[preflight:github:{tag}] {body}")); + } + tracing::info!( + workflow_id = %skill.definition.id, + "[skills] spawn_workflow_run_background: github preflight passed" + ); + } + + // Focus the orchestrator on this single skill: its SKILL.md rides in + // the task prompt as guidelines + the resolved inputs; the + // orchestrator's own system prompt and full tool access are kept. + let guidelines = match &skill.definition.system_prompt { + crate::openhuman::agent::harness::definition::PromptSource::Inline(s) => s.clone(), + _ => String::new(), + }; + let inputs_block = registry::render_inputs_block(&skill.inputs, &inputs); + let workflow_id = skill.definition.id.clone(); + let task_prompt = format!( + "You are running a single skill: **{workflow_id}**. Follow these guidelines exactly and \ + focus solely on completing this one task — do not pick up unrelated work.\n\n\ + # Workflow guidelines\n{guidelines}\n\n{inputs_block}", + ); + let run_id = uuid::Uuid::new_v4().to_string(); + let log_path = run_log::run_log_path(&workspace, &workflow_id, &run_id); + tracing::info!( + workflow_id = %workflow_id, + run_id = %run_id, + log = %log_path.display(), + "[skills] spawn_workflow_run_background: starting orchestrator run" + ); + + // Detached: build the orchestrator Agent inside the spawn so config / + // toolchain are loaded fresh per run; the parent returns the handle + // immediately. Same flow handle_workflows_run used to inline — extracted + // so the `run_skill` agent tool can re-use it for skill chaining. + let inherited_origin = crate::openhuman::agent::turn_origin::current() + .unwrap_or(crate::openhuman::agent::turn_origin::AgentTurnOrigin::Cli); + { + let run_id = run_id.clone(); + let workflow_id = workflow_id.clone(); + let inputs = inputs.clone(); + let log_path = log_path.clone(); + let inherited_origin = inherited_origin.clone(); + tokio::spawn(async move { + if let Err(e) = + run_log::write_header(&log_path, &workflow_id, &run_id, &inputs, &task_prompt).await + { + tracing::warn!(run_id = %run_id, error = %e, "[skills] workflow_run: header write failed"); + } + let mut config = match Config::load_or_init().await { + Ok(c) => c, + Err(e) => { + let _ = run_log::write_footer( + &log_path, + "FAILED", + 0, + &format!("load config: {e:#}"), + ) + .await; + return; + } + }; + config.agent.max_tool_iterations = WORKFLOW_RUN_MAX_ITERATIONS; + // Only apply the permissive wildcard default when the operator + // hasn't configured an explicit allow-list — preserve any + // configured egress policy instead of unconditionally widening it. + if config.http_request.allowed_domains.is_empty() { + config.http_request.allowed_domains = vec!["*".to_string()]; + } + let mut agent = match Agent::from_config_for_agent(&config, "orchestrator") { + Ok(a) => a, + Err(e) => { + let _ = run_log::write_footer( + &log_path, + "FAILED", + 0, + &format!("build agent: {e:#}"), + ) + .await; + return; + } + }; + agent.set_event_context(run_id.clone(), "skill"); + agent.set_agent_definition_name(format!( + "orchestrator-skill-{}", + &run_id.get(..8).unwrap_or(&run_id) + )); + let (tx, rx) = tokio::sync::mpsc::channel(256); + agent.set_on_progress(Some(tx)); + let bridge = tokio::spawn(run_log::drain_to_log(rx, log_path.clone())); + + // Register the cancellation token now (after the run can actually + // start) so `workflows_cancel` can stop it; a config/agent-build + // failure above returns before this, leaving nothing to leak. + let cancel_token = run_log::register_run_cancel(&run_id); + + let started = std::time::Instant::now(); + // Inherit the parent turn's origin so a skill triggered from an + // ExternalChannel / tainted context retains its provenance + // through the approval gate. Falls back to Cli for direct + // user-initiated RPC / CLI flows. + // + // Race the run against its cancellation token: if `workflows_cancel` + // fires the token, the run future is dropped (cancelled at its next + // await) and we record a CANCELLED footer. `Some(_)` ⇒ ran to a + // natural end; `None` ⇒ cancelled. + let result = tokio::select! { + biased; + _ = cancel_token.cancelled() => None, + r = crate::openhuman::agent::turn_origin::with_origin( + inherited_origin, + with_autonomous_iter_cap( + WORKFLOW_RUN_MAX_ITERATIONS, + agent.run_single(&task_prompt), + ), + ) => Some(r), + }; + agent.set_on_progress(None); + drop(agent); + let _ = bridge.await; + + let ms = started.elapsed().as_millis() as u64; + run_log::unregister_run_cancel(&run_id); + match result { + None => { + let _ = + run_log::write_footer(&log_path, "CANCELLED", ms, "Run stopped by user.") + .await; + tracing::info!(run_id = %run_id, "[workflows] workflow_run: cancelled"); + } + Some(Ok(out)) => { + if let Some((line, count)) = run_log::detect_repeated_line(&out, 30, 4) { + let preview = line.chars().take(160).collect::(); + let body = format!( + "degenerate-response: autonomous run halted before marking DONE.\n\ + the model's final assistant message repeats the same line {count}× — \ + this is the known one-generation low-entropy loop failure mode, not a real result.\n\n\ + repeated line (truncated to 160 chars):\n {preview}\n\n\ + full final output follows below for forensic review:\n\n{out}", + ); + let _ = run_log::write_footer(&log_path, "DEGENERATE", ms, &body).await; + tracing::warn!( + run_id = %run_id, + repeats = count, + "[skills] workflow_run: degenerate final response rejected" + ); + } else { + let _ = run_log::write_footer(&log_path, "DONE", ms, &out).await; + tracing::info!(run_id = %run_id, "[skills] workflow_run: completed"); + } + } + Some(Err(e)) => { + let _ = run_log::write_footer(&log_path, "FAILED", ms, &format!("{e:#}")).await; + tracing::warn!(run_id = %run_id, error = ?e, "[skills] workflow_run: failed"); + } + } + }); + } + + Ok(WorkflowRunStarted { + run_id, + workflow_id, + log_path, + }) +} + +/// Poll a spawned run's log file until its terminal footer lands or the +/// `budget` elapses. Returns `Some(outcome)` the moment the footer is +/// readable (DONE / DEGENERATE / FAILED), or `None` if the run is still +/// `RUNNING` when the budget runs out — the caller then auto-detaches and +/// hands back the `run_id` so the work continues in the background. +/// +/// The poll happens in the runtime (a tokio sleep loop), NOT in the LLM — +/// the model issues one `run_workflow` tool call and gets either the result +/// or a "still running" handle back, never a busy-wait it has to drive. +pub(crate) async fn await_run_outcome( + log_path: &std::path::Path, + budget: std::time::Duration, +) -> Option { + // Tight enough that a fast workflow returns inline promptly; loose + // enough that polling a finished-but-slow log isn't a hot spin. + const POLL_INTERVAL: std::time::Duration = std::time::Duration::from_millis(750); + let deadline = tokio::time::Instant::now() + budget; + loop { + if let Some(outcome) = run_log::read_terminal_outcome(log_path) { + return Some(outcome); + } + if tokio::time::Instant::now() >= deadline { + return None; + } + let remaining = deadline.saturating_duration_since(tokio::time::Instant::now()); + tokio::time::sleep(POLL_INTERVAL.min(remaining)).await; + } +} diff --git a/src/openhuman/workflows/schemas/wire_types.rs b/src/openhuman/workflows/schemas/wire_types.rs new file mode 100644 index 0000000000..1cdb29592d --- /dev/null +++ b/src/openhuman/workflows/schemas/wire_types.rs @@ -0,0 +1,247 @@ +//! Wire-format types: RPC param structs, result structs, and `WorkflowSummary`. +//! +//! All types in this module are `pub(super)` so that sibling sub-modules +//! within the `schemas` directory can use them, while the external API +//! remains controlled through `schemas/mod.rs`. + +use serde::{Deserialize, Serialize}; + +use crate::openhuman::workflows::ops::{ + CreateWorkflowParams, InstallWorkflowFromUrlParams, Workflow, WorkflowCreateInputDef, + WorkflowScope, +}; + +// ── Params ──────────────────────────────────────────────────────────────────── + +#[derive(Debug, Deserialize, Default)] +pub(super) struct WorkflowsListParams { + // No params today. Kept as an empty struct so future filters (scope, + // search, etc.) can slot in without breaking older clients. +} + +#[derive(Debug, Deserialize)] +pub(super) struct WorkflowsReadResourceParams { + pub(super) workflow_id: String, + pub(super) relative_path: String, +} + +#[derive(Debug, Deserialize)] +pub(super) struct WorkflowsCreateParams { + pub(super) name: String, + pub(super) description: String, + /// Optional trigger/goal — *when* an agent should reach for this workflow. + /// Merges the old agent-workflow's `when_to_use` into the unified create + /// form; written to `skill.toml`. Falls back to `description` when omitted. + #[serde(default)] + pub(super) when_to_use: Option, + #[serde(default)] + pub(super) scope: WorkflowScope, + #[serde(default)] + pub(super) license: Option, + #[serde(default)] + pub(super) author: Option, + #[serde(default)] + pub(super) tags: Vec, + #[serde(default, rename = "allowed-tools", alias = "allowed_tools")] + pub(super) allowed_tools: Vec, + /// Declared `[[inputs]]` entries supplied by the Create-a-Workflow form. + /// Empty when the user added no rows; otherwise written into a sibling + /// `skill.toml` alongside `SKILL.md` so the Skills Runner can render + /// dynamic form controls at run time. Wire-shape per row: + /// `{ name, description?, required, type? }` — see + /// [`WorkflowCreateInputDef`] in `ops_create.rs`. + #[serde(default)] + pub(super) inputs: Vec, +} + +impl From for CreateWorkflowParams { + fn from(p: WorkflowsCreateParams) -> Self { + CreateWorkflowParams { + name: p.name, + description: p.description, + when_to_use: p.when_to_use, + scope: p.scope, + license: p.license, + author: p.author, + tags: p.tags, + allowed_tools: p.allowed_tools, + inputs: p.inputs, + overwrite: false, + } + } +} + +#[derive(Debug, Deserialize)] +pub(super) struct WorkflowsInstallFromUrlParamsWire { + pub(super) url: String, + #[serde(default)] + pub(super) timeout_secs: Option, +} + +impl From for InstallWorkflowFromUrlParams { + fn from(p: WorkflowsInstallFromUrlParamsWire) -> Self { + InstallWorkflowFromUrlParams { + url: p.url, + timeout_secs: p.timeout_secs, + } + } +} + +#[derive(Debug, Deserialize)] +pub(super) struct WorkflowsRunParams { + pub(super) workflow_id: String, + #[serde(default)] + pub(super) inputs: Option, +} + +#[derive(Debug, Deserialize)] +pub(super) struct WorkflowsCancelParams { + pub(super) run_id: String, +} + +#[derive(serde::Deserialize)] +pub(super) struct WorkflowsDescribeParams { + pub(super) workflow_id: String, +} + +#[derive(serde::Deserialize)] +pub(super) struct WorkflowsReadRunLogParams { + pub(super) run_id: String, + #[serde(default)] + pub(super) offset: Option, + #[serde(default)] + pub(super) max_bytes: Option, +} + +#[derive(serde::Deserialize)] +pub(super) struct WorkflowsRecentRunsParams { + #[serde(default)] + pub(super) workflow_id: Option, + #[serde(default)] + pub(super) limit: Option, +} + +// ── Results ─────────────────────────────────────────────────────────────────── + +/// Wire-format representation of a discovered skill. Mirrors the fields in +/// [`Workflow`] that are useful to the UI while hiding the +/// `frontmatter` blob (which includes a flatten'd forward-compat hatch and +/// can balloon with arbitrary YAML). +#[derive(Debug, Serialize)] +pub(crate) struct WorkflowSummary { + pub(super) id: String, + pub(super) name: String, + pub(super) description: String, + pub(super) version: String, + pub(super) author: Option, + pub(super) tags: Vec, + pub(super) platforms: Vec, + pub(super) related_skills: Vec, + pub(super) source_format: String, + pub(super) tools: Vec, + pub(super) prompts: Vec, + pub(super) location: Option, + pub(super) resources: Vec, + pub(super) scope: WorkflowScope, + pub(super) legacy: bool, + pub(super) warnings: Vec, +} + +impl From for WorkflowSummary { + fn from(s: Workflow) -> Self { + // `id` is the on-disk slug the uninstall RPC resolves against. + // Prefer `dir_name`, but fall back to `name` for back-compat on + // deserialised `Workflow` values written before `dir_name` existed + // (default empty string). + let id = if s.dir_name.is_empty() { + s.name.clone() + } else { + s.dir_name.clone() + }; + WorkflowSummary { + id, + name: s.name, + description: s.description, + version: s.version, + author: s.author, + tags: s.tags, + platforms: s.platforms, + related_skills: s.related_skills, + source_format: if s.source_format.is_empty() { + if s.legacy { + "legacy".to_string() + } else { + "openhuman".to_string() + } + } else { + s.source_format + }, + tools: s.tools, + prompts: s.prompts, + location: s.location.as_ref().map(|p| p.display().to_string()), + resources: s + .resources + .into_iter() + .map(|p| p.display().to_string()) + .collect(), + scope: s.scope, + legacy: s.legacy, + warnings: s.warnings, + } + } +} + +#[derive(Debug, Serialize)] +pub(super) struct WorkflowsListResult { + pub(super) skills: Vec, +} + +#[derive(Debug, Serialize)] +pub(super) struct WorkflowsReadResourceResult { + pub(super) workflow_id: String, + pub(super) relative_path: String, + pub(super) content: String, + pub(super) bytes: usize, +} + +#[derive(Debug, Serialize)] +pub(super) struct WorkflowsCreateResult { + pub(super) skill: WorkflowSummary, +} + +#[derive(Debug, Serialize)] +pub(super) struct WorkflowsInstallFromUrlResult { + pub(super) url: String, + pub(super) stdout: String, + pub(super) stderr: String, + pub(super) new_skills: Vec, +} + +#[derive(Debug, Serialize)] +pub(super) struct WorkflowsUninstallResult { + pub(super) name: String, + pub(super) removed_path: String, + pub(super) scope: WorkflowScope, +} + +#[derive(serde::Serialize)] +pub(super) struct WorkflowInputDescription { + pub(super) name: String, + pub(super) description: String, + pub(super) required: bool, + #[serde(rename = "type")] + pub(super) kind: String, +} + +#[derive(serde::Serialize)] +pub(super) struct WorkflowsDescribeResult { + pub(super) id: String, + pub(super) display_name: String, + pub(super) when_to_use: String, + pub(super) inputs: Vec, +} + +#[derive(serde::Serialize)] +pub(super) struct WorkflowsRecentRunsResult { + pub(super) runs: Vec, +}