From dd212eb8c0c938f38f2e0409633a9245f178e1f0 Mon Sep 17 00:00:00 2001 From: sanil-23 Date: Mon, 1 Jun 2026 20:10:06 +0530 Subject: [PATCH 01/12] fix(core): give the JSON-RPC runtime a 16 MiB worker stack to survive subagent delegation The standalone `openhuman-core run` server builds its tokio runtime with the default 2 MiB per-worker-thread stack. A single agent turn is already a very large async state machine (system prompt + hundreds of tool specs + the nested provider/tool loop); delegating to a sub-agent runs another full turn one level down. Even with the inner sub-agent future boxed (`subagent_runner::ops`, see #2234), that nesting overflows the 2 MiB stack and aborts the whole process: thread 'tokio-rt-worker' (...) has overflowed its stack fatal runtime error: stack overflow, aborting (SIGABRT) This takes the JSON-RPC server down mid-request. In the Playwright web E2E lane it manifests as `chat-harness-subagent` timing out (the orchestrator's final text never renders) followed by a cascade of `ECONNREFUSED` failures across every subsequent spec in the worker, because they all share the now-dead core. Set `thread_stack_size(16 MiB)` on the serve runtime so a subagent-nested agent turn fits comfortably. Reproduced and verified locally by driving the real `openhuman-core` + mock + web stack on isolated ports and running the subagent spec: - before: core aborts ("overflowed its stack"), spec fails at ~52s - after: core stays alive, spec passes in 7.4s, no overflow in core.log Co-Authored-By: Claude Opus 4.8 (1M context) --- src/core/cli.rs | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/core/cli.rs b/src/core/cli.rs index 52bc5d5641..5c4d592d24 100644 --- a/src/core/cli.rs +++ b/src/core/cli.rs @@ -278,8 +278,17 @@ fn run_server_command(args: &[String]) -> Result<()> { crate::core::logging::init_for_cli_run(verbose, log_scope); // Initialize the Tokio multi-threaded runtime. + // + // A single agent turn is a very large async state machine (system prompt + + // hundreds of tool specs + the nested provider/tool loop), and delegating + // to a sub-agent runs another full turn one level down. Even with the inner + // sub-agent future boxed (`subagent_runner::ops`), that nesting overflows + // tokio's default 2 MiB worker-thread stack and aborts the whole process + // (SIGABRT: "thread 'tokio-rt-worker' has overflowed its stack"), taking + // the JSON-RPC server down mid-request. Give workers a roomier stack. let rt = tokio::runtime::Builder::new_multi_thread() .enable_all() + .thread_stack_size(16 * 1024 * 1024) .build()?; rt.block_on(async { crate::core::jsonrpc::run_server(host.as_deref(), port, socketio_enabled).await From d5d5a64a7703684da236b85781f8277fd3638183 Mon Sep 17 00:00:00 2001 From: sanil-23 Date: Mon, 1 Jun 2026 20:40:42 +0530 Subject: [PATCH 02/12] test: green the Rust Core Coverage suite (stale assertions + env-race serialization) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The cargo-llvm-cov core-coverage job is red on every PR for two independent reasons, both fixed here: 1. Stale assertions left by feature PRs (mostly #3113's sync-flow redesign): - `toolkit_from_slug("MICROSOFT_TEAMS_*")` now returns the full toolkit `"microsoft_teams"`, not `"microsoft"` (3 sites). - memory schema/controller registries grew: legacy tree 19 -> 21, memory_sources 9 -> 10. - The `dedicated_thread` flag no longer short-circuits with a "temporarily disabled" message (see spawn_subagent::dedicated_thread_flag_no_longer_returns_disabled_error); two raw-coverage tests still asserted the old message. 2. Env-race flakes under llvm-cov: several raw-coverage tests set the process-global `OPENHUMAN_WORKSPACE` env var per-test. Under llvm-cov's slower instrumentation the parallel default lets them trample each other's workspace (flaky count/state assertions) — they pass single-threaded. Run the core llvm-cov suite with `--test-threads=1` (in both pr-ci.yml and coverage.yml) so these hermetic-only-when-serialized tests are stable. Verified locally: the fixed assertions pass single-threaded, and the racing tests pass under `--test-threads=1`. Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/workflows/coverage.yml | 5 ++++- .github/workflows/pr-ci.yml | 7 ++++++- tests/memory_raw_coverage_e2e.rs | 2 +- tests/memory_threads_raw_coverage_e2e.rs | 6 +++--- tests/memory_tree_sync_raw_coverage_e2e.rs | 2 +- tests/tools_agent_credentials_state_raw_coverage_e2e.rs | 4 +++- tests/tools_composio_network_leftovers_raw_coverage_e2e.rs | 4 +++- 7 files changed, 21 insertions(+), 9 deletions(-) diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index 9d108d9942..5f66f68cd0 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -94,7 +94,10 @@ jobs: - name: Install cargo-llvm-cov uses: taiki-e/install-action@cargo-llvm-cov - name: Run cargo llvm-cov for openhuman core - run: cargo llvm-cov -p openhuman --lcov --output-path lcov-core.info + # Single-threaded: several raw-coverage tests set the process-global + # OPENHUMAN_WORKSPACE per-test and trample each other under llvm-cov's + # slower instrumentation when run in parallel. Serialize for hermeticity. + run: cargo llvm-cov -p openhuman --lcov --output-path lcov-core.info -- --test-threads=1 - name: Upload core lcov uses: actions/upload-artifact@v5 with: diff --git a/.github/workflows/pr-ci.yml b/.github/workflows/pr-ci.yml index c1cc739455..6982e5c52d 100644 --- a/.github/workflows/pr-ci.yml +++ b/.github/workflows/pr-ci.yml @@ -315,7 +315,12 @@ jobs: uses: taiki-e/install-action@cargo-llvm-cov - name: Run cargo llvm-cov for openhuman core - run: bash scripts/ci-cancel-aware.sh cargo llvm-cov -p openhuman --lcov --output-path lcov-core.info + # Run the coverage suite single-threaded: several raw-coverage tests set + # the process-global OPENHUMAN_WORKSPACE env var per-test, and under + # llvm-cov's slower instrumentation the parallel default lets them + # trample each other's workspace (flaky count/state assertions). These + # tests are hermetic only when serialized. + run: bash scripts/ci-cancel-aware.sh cargo llvm-cov -p openhuman --lcov --output-path lcov-core.info -- --test-threads=1 - name: Upload core lcov uses: actions/upload-artifact@v5 diff --git a/tests/memory_raw_coverage_e2e.rs b/tests/memory_raw_coverage_e2e.rs index 3547810fae..b810775f8e 100644 --- a/tests/memory_raw_coverage_e2e.rs +++ b/tests/memory_raw_coverage_e2e.rs @@ -368,7 +368,7 @@ fn memory_sources_validation_and_sync_classification_edges() { assert_eq!(classify_unknown("GMAIL_FETCH_EMAILS"), ToolScope::Read); assert_eq!( toolkit_from_slug(" MICROSOFT_TEAMS_SEND "), - Some("microsoft".into()) + Some("microsoft_teams".into()) ); assert_eq!(toolkit_from_slug(""), None); let catalog = [CuratedTool { diff --git a/tests/memory_threads_raw_coverage_e2e.rs b/tests/memory_threads_raw_coverage_e2e.rs index a37cf5f4c9..cb52275fa1 100644 --- a/tests/memory_threads_raw_coverage_e2e.rs +++ b/tests/memory_threads_raw_coverage_e2e.rs @@ -989,7 +989,7 @@ fn memory_schema_registries_and_query_tool_metadata_cover_public_surfaces() { let legacy_tree_schemas = openhuman_core::openhuman::memory::schema::all_controller_schemas(); let legacy_tree_controllers = openhuman_core::openhuman::memory::schema::all_registered_controllers(); - assert_eq!(legacy_tree_schemas.len(), 19); + assert_eq!(legacy_tree_schemas.len(), 21); assert_eq!(legacy_tree_schemas.len(), legacy_tree_controllers.len()); for function in [ "ingest", @@ -1208,7 +1208,7 @@ fn memory_sync_composio_catalog_scope_and_state_helpers_cover_edge_cases() { assert_eq!(classify_unknown("GMAIL_FETCH_EMAILS"), ToolScope::Read); assert_eq!( toolkit_from_slug(" MICROSOFT_TEAMS_SEND_MESSAGE "), - Some("microsoft".into()) + Some("microsoft_teams".into()) ); assert_eq!(toolkit_from_slug(""), None); let catalog = &[CuratedTool { @@ -3761,7 +3761,7 @@ async fn memory_sources_registry_rpc_and_schema_handlers_cover_crud_edges() { let schemas = all_memory_sources_controller_schemas(); let controllers = all_memory_sources_registered_controllers(); - assert_eq!(schemas.len(), 9); + assert_eq!(schemas.len(), 10); assert_eq!(schemas.len(), controllers.len()); assert_eq!( openhuman_core::openhuman::memory_sources::schemas::schemas("read_item").function, diff --git a/tests/memory_tree_sync_raw_coverage_e2e.rs b/tests/memory_tree_sync_raw_coverage_e2e.rs index d3617b211b..4abcb32663 100644 --- a/tests/memory_tree_sync_raw_coverage_e2e.rs +++ b/tests/memory_tree_sync_raw_coverage_e2e.rs @@ -458,7 +458,7 @@ async fn composio_providers_sync_state_and_bus_surfaces_cover_read_write_edges() ); assert_eq!( toolkit_from_slug("MICROSOFT_TEAMS_SEND_MESSAGE").as_deref(), - Some("microsoft") + Some("microsoft_teams") ); assert_eq!(classify_unknown("GMAIL_DELETE_DRAFT"), ToolScope::Admin); assert_eq!(classify_unknown("NOTION_CREATE_PAGE"), ToolScope::Write); diff --git a/tests/tools_agent_credentials_state_raw_coverage_e2e.rs b/tests/tools_agent_credentials_state_raw_coverage_e2e.rs index 0e47477c5f..0f1aaa6804 100644 --- a/tests/tools_agent_credentials_state_raw_coverage_e2e.rs +++ b/tests/tools_agent_credentials_state_raw_coverage_e2e.rs @@ -655,7 +655,9 @@ async fn round16_spawn_subagent_tool_and_runner_error_success_paths() { .await .expect("dedicated thread returns tool result"); assert!(disabled_thread.is_error); - assert!(disabled_thread.output().contains("temporarily disabled")); + // The dedicated_thread flag no longer short-circuits with a "temporarily + // disabled" message (see spawn_subagent::dedicated_thread_flag_no_longer_returns_disabled_error). + assert!(!disabled_thread.output().contains("temporarily disabled")); let provider = Arc::new(ScriptedProvider::new(vec![response( Some("subagent final answer that will be clipped"), diff --git a/tests/tools_composio_network_leftovers_raw_coverage_e2e.rs b/tests/tools_composio_network_leftovers_raw_coverage_e2e.rs index 2ea64401ef..7d4aa4e8c4 100644 --- a/tests/tools_composio_network_leftovers_raw_coverage_e2e.rs +++ b/tests/tools_composio_network_leftovers_raw_coverage_e2e.rs @@ -592,7 +592,9 @@ async fn round20_spawn_subagent_covers_validation_schema_and_disabled_worker_bra .await .expect("dedicated thread disabled returns tool result"); assert!(dedicated_thread.is_error); - assert!(dedicated_thread.output().contains("temporarily disabled")); + // The dedicated_thread flag no longer short-circuits with a "temporarily + // disabled" message (see spawn_subagent::dedicated_thread_flag_no_longer_returns_disabled_error). + assert!(!dedicated_thread.output().contains("temporarily disabled")); } async fn start_loopback(app: Router) -> String { From 543b8473980e7f24c845a8f30e0d3ae31c4d1627 Mon Sep 17 00:00:00 2001 From: sanil-23 Date: Mon, 1 Jun 2026 21:06:52 +0530 Subject: [PATCH 03/12] test(coverage): serialize env-racing raw-coverage tests with env_lock (drop --test-threads=1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the blunt `--test-threads=1` coverage-job change with the per-test `env_lock()` pattern already used elsewhere in these suites, so the llvm-cov job keeps running tests in parallel (fast) and only the handful of tests that mutate the process-global `OPENHUMAN_WORKSPACE` serialize against each other. Four of the seven affected files already had an `env_lock()` helper — the racing test in each just wasn't taking it; the other three get the same self-contained helper. Compiles clean; the previously-flaky tests now hold the lock across their workspace setup so parallel llvm-cov runs stay hermetic. Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/workflows/coverage.yml | 5 +---- .github/workflows/pr-ci.yml | 7 +------ tests/memory_threads_raw_coverage_e2e.rs | 18 ++++++++++++++++++ tests/memory_tree_sync_raw_coverage_e2e.rs | 11 +++++++++++ tests/near90_closure_raw_coverage_e2e.rs | 1 + tests/owned_domain_raw_coverage_e2e.rs | 11 +++++++++++ .../tool_registry_approval_raw_coverage_e2e.rs | 1 + ...posio_network_leftovers_raw_coverage_e2e.rs | 1 + tests/worker_b_raw_coverage_e2e.rs | 1 + 9 files changed, 46 insertions(+), 10 deletions(-) diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index 5f66f68cd0..9d108d9942 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -94,10 +94,7 @@ jobs: - name: Install cargo-llvm-cov uses: taiki-e/install-action@cargo-llvm-cov - name: Run cargo llvm-cov for openhuman core - # Single-threaded: several raw-coverage tests set the process-global - # OPENHUMAN_WORKSPACE per-test and trample each other under llvm-cov's - # slower instrumentation when run in parallel. Serialize for hermeticity. - run: cargo llvm-cov -p openhuman --lcov --output-path lcov-core.info -- --test-threads=1 + run: cargo llvm-cov -p openhuman --lcov --output-path lcov-core.info - name: Upload core lcov uses: actions/upload-artifact@v5 with: diff --git a/.github/workflows/pr-ci.yml b/.github/workflows/pr-ci.yml index 6982e5c52d..c1cc739455 100644 --- a/.github/workflows/pr-ci.yml +++ b/.github/workflows/pr-ci.yml @@ -315,12 +315,7 @@ jobs: uses: taiki-e/install-action@cargo-llvm-cov - name: Run cargo llvm-cov for openhuman core - # Run the coverage suite single-threaded: several raw-coverage tests set - # the process-global OPENHUMAN_WORKSPACE env var per-test, and under - # llvm-cov's slower instrumentation the parallel default lets them - # trample each other's workspace (flaky count/state assertions). These - # tests are hermetic only when serialized. - run: bash scripts/ci-cancel-aware.sh cargo llvm-cov -p openhuman --lcov --output-path lcov-core.info -- --test-threads=1 + run: bash scripts/ci-cancel-aware.sh cargo llvm-cov -p openhuman --lcov --output-path lcov-core.info - name: Upload core lcov uses: actions/upload-artifact@v5 diff --git a/tests/memory_threads_raw_coverage_e2e.rs b/tests/memory_threads_raw_coverage_e2e.rs index cb52275fa1..bb5e61923e 100644 --- a/tests/memory_threads_raw_coverage_e2e.rs +++ b/tests/memory_threads_raw_coverage_e2e.rs @@ -752,6 +752,7 @@ async fn memory_source_status_counts_reader_and_composio_prefixes() { #[tokio::test] async fn memory_thread_tree_and_sync_controller_schemas_execute_public_handlers() { let tmp = TempDir::new().expect("tempdir"); + let _env_lock = env_lock(); let _workspace = EnvVarGuard::set_to_path("OPENHUMAN_WORKSPACE", tmp.path()); let config = Config::load_or_init().await.expect("init isolated config"); @@ -3479,6 +3480,7 @@ fn turn_state_store_persists_lists_marks_and_clears_snapshots() { #[tokio::test] async fn threads_rpc_ops_cover_crud_title_fallback_and_turn_state_cleanup() { let tmp = TempDir::new().expect("tempdir"); + let _env_lock = env_lock(); let _workspace = EnvVarGuard::set_to_path("OPENHUMAN_WORKSPACE", tmp.path()); let config = Config::load_or_init().await.expect("init isolated config"); let workspace_dir = config.workspace_dir.clone(); @@ -3685,6 +3687,7 @@ async fn threads_rpc_ops_cover_crud_title_fallback_and_turn_state_cleanup() { #[tokio::test] async fn threads_title_generation_branches_cover_noop_and_not_found_paths() { let tmp = TempDir::new().expect("tempdir"); + let _env_lock = env_lock(); let _workspace = EnvVarGuard::set_to_path("OPENHUMAN_WORKSPACE", tmp.path()); Config::load_or_init().await.expect("init isolated config"); @@ -3755,6 +3758,7 @@ async fn threads_title_generation_branches_cover_noop_and_not_found_paths() { #[tokio::test] async fn memory_sources_registry_rpc_and_schema_handlers_cover_crud_edges() { let tmp = TempDir::new().expect("tempdir"); + let _env_lock = env_lock(); let _workspace = EnvVarGuard::set_to_path("OPENHUMAN_WORKSPACE", tmp.path()); Config::load_or_init().await.expect("init isolated config"); std::fs::write(tmp.path().join("reader-note.md"), "# Reader note").expect("write note"); @@ -3961,6 +3965,7 @@ async fn memory_sources_registry_rpc_and_schema_handlers_cover_crud_edges() { #[tokio::test] async fn memory_ops_public_handlers_cover_document_file_kv_graph_and_envelopes() { let tmp = TempDir::new().expect("tempdir"); + let _env_lock = env_lock(); let _workspace = EnvVarGuard::set_to_path("OPENHUMAN_WORKSPACE", tmp.path()); let init = openhuman_core::openhuman::memory::ops::memory_init(MemoryInitRequest { @@ -4334,6 +4339,7 @@ async fn memory_ops_public_handlers_cover_document_file_kv_graph_and_envelopes() #[tokio::test] async fn memory_tree_retrieval_rpc_and_schema_wrappers_cover_empty_and_invalid_paths() { let tmp = TempDir::new().expect("tempdir"); + let _env_lock = env_lock(); let _workspace = EnvVarGuard::set_to_path("OPENHUMAN_WORKSPACE", tmp.path()); let config = config_in(&tmp); @@ -4451,6 +4457,7 @@ async fn memory_tree_retrieval_rpc_and_schema_wrappers_cover_empty_and_invalid_p #[tokio::test] async fn memory_query_backend_and_tree_flush_wrappers_cover_public_edges() { let tmp = TempDir::new().expect("tempdir"); + let _env_lock = env_lock(); let _workspace = EnvVarGuard::set_to_path("OPENHUMAN_WORKSPACE", tmp.path()); let mut config = Config::load_or_init().await.expect("init isolated config"); config.memory_tree.embedding_endpoint = None; @@ -4590,6 +4597,7 @@ async fn tree_summarizer_ops_cover_validation_query_and_local_provider_guards() #[tokio::test] async fn memory_sources_types_registry_and_sync_state_cover_public_persistence_edges() { let tmp = TempDir::new().expect("tempdir"); + let _env_lock = env_lock(); let _workspace = EnvVarGuard::set_to_path("OPENHUMAN_WORKSPACE", tmp.path()); let _config = Config::load_or_init().await.expect("init isolated config"); openhuman_core::openhuman::memory_sources::reconcile::ensure_composio_sources().await; @@ -4800,3 +4808,13 @@ fn welcome_migration_public_entrypoint_covers_empty_marker_and_transcript_paths( .expect("second migration"); assert!(second.already_done); } + +// Serializes this binary's process-global OPENHUMAN_WORKSPACE mutation so the +// raw-coverage tests do not trample each other under parallel llvm-cov runs. +static ENV_LOCK: std::sync::OnceLock> = std::sync::OnceLock::new(); +fn env_lock() -> std::sync::MutexGuard<'static, ()> { + ENV_LOCK + .get_or_init(|| std::sync::Mutex::new(())) + .lock() + .unwrap_or_else(|e| e.into_inner()) +} diff --git a/tests/memory_tree_sync_raw_coverage_e2e.rs b/tests/memory_tree_sync_raw_coverage_e2e.rs index 4abcb32663..cce0419428 100644 --- a/tests/memory_tree_sync_raw_coverage_e2e.rs +++ b/tests/memory_tree_sync_raw_coverage_e2e.rs @@ -436,6 +436,7 @@ async fn memory_walk_provider_errors_and_unknown_actions_are_reported() { #[tokio::test] async fn composio_providers_sync_state_and_bus_surfaces_cover_read_write_edges() { + let _lock = env_lock(); let tmp = TempDir::new().expect("tempdir"); let _workspace = EnvVarGuard::set("OPENHUMAN_WORKSPACE", tmp.path()); let _triage = EnvVarGuard::set_str("OPENHUMAN_TRIGGER_TRIAGE_DISABLED", "yes"); @@ -646,3 +647,13 @@ async fn default_composio_provider_hooks_return_expected_noop_shapes() { .any(|entity| entity.canonical_id == "email:round14@example.com")); assert!(approx_token_count("one two three four") > 0); } + +// Serializes this binary's process-global OPENHUMAN_WORKSPACE mutation so the +// raw-coverage tests do not trample each other under parallel llvm-cov runs. +static ENV_LOCK: std::sync::OnceLock> = std::sync::OnceLock::new(); +fn env_lock() -> std::sync::MutexGuard<'static, ()> { + ENV_LOCK + .get_or_init(|| std::sync::Mutex::new(())) + .lock() + .unwrap_or_else(|e| e.into_inner()) +} diff --git a/tests/near90_closure_raw_coverage_e2e.rs b/tests/near90_closure_raw_coverage_e2e.rs index 93ac3d5e01..e00a892f5d 100644 --- a/tests/near90_closure_raw_coverage_e2e.rs +++ b/tests/near90_closure_raw_coverage_e2e.rs @@ -390,6 +390,7 @@ fn round20_credentials_profiles_cover_legacy_plaintext_errors_and_active_edges() #[tokio::test] async fn round20_memory_sources_readers_and_sync_cover_error_edges_without_network() { + let _lock = env_lock(); let _lock = env_lock(); let harness = setup("http://127.0.0.1:9"); let config = harness.config().await; diff --git a/tests/owned_domain_raw_coverage_e2e.rs b/tests/owned_domain_raw_coverage_e2e.rs index b4c0bb8fd1..98d02ddbde 100644 --- a/tests/owned_domain_raw_coverage_e2e.rs +++ b/tests/owned_domain_raw_coverage_e2e.rs @@ -340,6 +340,7 @@ fn owned_domain_config(workspace_root: &std::path::Path) -> Config { #[tokio::test] async fn openai_compatible_provider_covers_auth_temperature_tool_fallback_and_responses() { + let _lock = env_lock(); let (base_url, state) = serve_provider_mock().await; let provider = OpenAiCompatibleProvider::new_with_user_agent( "owned-mock", @@ -873,3 +874,13 @@ async fn tool_registry_controller_handlers_cover_list_get_and_validation_paths() .and_then(Value::as_u64) .is_some_and(|count| count > 0)); } + +// Serializes this binary's process-global OPENHUMAN_WORKSPACE mutation so the +// raw-coverage tests do not trample each other under parallel llvm-cov runs. +static ENV_LOCK: std::sync::OnceLock> = std::sync::OnceLock::new(); +fn env_lock() -> std::sync::MutexGuard<'static, ()> { + ENV_LOCK + .get_or_init(|| std::sync::Mutex::new(())) + .lock() + .unwrap_or_else(|e| e.into_inner()) +} diff --git a/tests/tool_registry_approval_raw_coverage_e2e.rs b/tests/tool_registry_approval_raw_coverage_e2e.rs index 760c267e15..62ce3be1ca 100644 --- a/tests/tool_registry_approval_raw_coverage_e2e.rs +++ b/tests/tool_registry_approval_raw_coverage_e2e.rs @@ -1157,6 +1157,7 @@ async fn approval_schema_handlers_validate_params_and_surface_empty_gate_state() #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn approval_rpc_decision_paths_persist_always_allow_and_recent_audit() { + let _lock = env_lock(); let _lock = env_lock(); let harness = setup("").await; let config = Config::load_or_init() diff --git a/tests/tools_composio_network_leftovers_raw_coverage_e2e.rs b/tests/tools_composio_network_leftovers_raw_coverage_e2e.rs index 7d4aa4e8c4..9e87a2f12c 100644 --- a/tests/tools_composio_network_leftovers_raw_coverage_e2e.rs +++ b/tests/tools_composio_network_leftovers_raw_coverage_e2e.rs @@ -554,6 +554,7 @@ async fn round20_polymarket_covers_discovery_errors_rpc_allowance_and_write_gate #[tokio::test] async fn round20_spawn_subagent_covers_validation_schema_and_disabled_worker_branch() { + let _lock = env_lock(); let _lock = env_lock(); let tool = SpawnSubagentTool::new(); diff --git a/tests/worker_b_raw_coverage_e2e.rs b/tests/worker_b_raw_coverage_e2e.rs index ed7bc226ba..f08bedb026 100644 --- a/tests/worker_b_raw_coverage_e2e.rs +++ b/tests/worker_b_raw_coverage_e2e.rs @@ -575,6 +575,7 @@ async fn agent_profile_lifecycle_persists_custom_profile_and_validates_delete() #[tokio::test] async fn approval_gate_rpc_decision_resumes_parked_tool_and_records_execution() { + let _lock = env_lock(); let _lock = env_lock(); let harness = setup().await; let config = Config::load_or_init() From 99276a37f5660f9ac276b5a058839074c6955730 Mon Sep 17 00:00:00 2001 From: sanil-23 Date: Mon, 1 Jun 2026 21:18:29 +0530 Subject: [PATCH 04/12] Revert "test(coverage): serialize env-racing raw-coverage tests with env_lock (drop --test-threads=1)" This reverts commit 543b8473980e7f24c845a8f30e0d3ae31c4d1627. --- .github/workflows/coverage.yml | 5 ++++- .github/workflows/pr-ci.yml | 7 ++++++- tests/memory_threads_raw_coverage_e2e.rs | 18 ------------------ tests/memory_tree_sync_raw_coverage_e2e.rs | 11 ----------- tests/near90_closure_raw_coverage_e2e.rs | 1 - tests/owned_domain_raw_coverage_e2e.rs | 11 ----------- .../tool_registry_approval_raw_coverage_e2e.rs | 1 - ...posio_network_leftovers_raw_coverage_e2e.rs | 1 - tests/worker_b_raw_coverage_e2e.rs | 1 - 9 files changed, 10 insertions(+), 46 deletions(-) diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index 9d108d9942..5f66f68cd0 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -94,7 +94,10 @@ jobs: - name: Install cargo-llvm-cov uses: taiki-e/install-action@cargo-llvm-cov - name: Run cargo llvm-cov for openhuman core - run: cargo llvm-cov -p openhuman --lcov --output-path lcov-core.info + # Single-threaded: several raw-coverage tests set the process-global + # OPENHUMAN_WORKSPACE per-test and trample each other under llvm-cov's + # slower instrumentation when run in parallel. Serialize for hermeticity. + run: cargo llvm-cov -p openhuman --lcov --output-path lcov-core.info -- --test-threads=1 - name: Upload core lcov uses: actions/upload-artifact@v5 with: diff --git a/.github/workflows/pr-ci.yml b/.github/workflows/pr-ci.yml index c1cc739455..6982e5c52d 100644 --- a/.github/workflows/pr-ci.yml +++ b/.github/workflows/pr-ci.yml @@ -315,7 +315,12 @@ jobs: uses: taiki-e/install-action@cargo-llvm-cov - name: Run cargo llvm-cov for openhuman core - run: bash scripts/ci-cancel-aware.sh cargo llvm-cov -p openhuman --lcov --output-path lcov-core.info + # Run the coverage suite single-threaded: several raw-coverage tests set + # the process-global OPENHUMAN_WORKSPACE env var per-test, and under + # llvm-cov's slower instrumentation the parallel default lets them + # trample each other's workspace (flaky count/state assertions). These + # tests are hermetic only when serialized. + run: bash scripts/ci-cancel-aware.sh cargo llvm-cov -p openhuman --lcov --output-path lcov-core.info -- --test-threads=1 - name: Upload core lcov uses: actions/upload-artifact@v5 diff --git a/tests/memory_threads_raw_coverage_e2e.rs b/tests/memory_threads_raw_coverage_e2e.rs index bb5e61923e..cb52275fa1 100644 --- a/tests/memory_threads_raw_coverage_e2e.rs +++ b/tests/memory_threads_raw_coverage_e2e.rs @@ -752,7 +752,6 @@ async fn memory_source_status_counts_reader_and_composio_prefixes() { #[tokio::test] async fn memory_thread_tree_and_sync_controller_schemas_execute_public_handlers() { let tmp = TempDir::new().expect("tempdir"); - let _env_lock = env_lock(); let _workspace = EnvVarGuard::set_to_path("OPENHUMAN_WORKSPACE", tmp.path()); let config = Config::load_or_init().await.expect("init isolated config"); @@ -3480,7 +3479,6 @@ fn turn_state_store_persists_lists_marks_and_clears_snapshots() { #[tokio::test] async fn threads_rpc_ops_cover_crud_title_fallback_and_turn_state_cleanup() { let tmp = TempDir::new().expect("tempdir"); - let _env_lock = env_lock(); let _workspace = EnvVarGuard::set_to_path("OPENHUMAN_WORKSPACE", tmp.path()); let config = Config::load_or_init().await.expect("init isolated config"); let workspace_dir = config.workspace_dir.clone(); @@ -3687,7 +3685,6 @@ async fn threads_rpc_ops_cover_crud_title_fallback_and_turn_state_cleanup() { #[tokio::test] async fn threads_title_generation_branches_cover_noop_and_not_found_paths() { let tmp = TempDir::new().expect("tempdir"); - let _env_lock = env_lock(); let _workspace = EnvVarGuard::set_to_path("OPENHUMAN_WORKSPACE", tmp.path()); Config::load_or_init().await.expect("init isolated config"); @@ -3758,7 +3755,6 @@ async fn threads_title_generation_branches_cover_noop_and_not_found_paths() { #[tokio::test] async fn memory_sources_registry_rpc_and_schema_handlers_cover_crud_edges() { let tmp = TempDir::new().expect("tempdir"); - let _env_lock = env_lock(); let _workspace = EnvVarGuard::set_to_path("OPENHUMAN_WORKSPACE", tmp.path()); Config::load_or_init().await.expect("init isolated config"); std::fs::write(tmp.path().join("reader-note.md"), "# Reader note").expect("write note"); @@ -3965,7 +3961,6 @@ async fn memory_sources_registry_rpc_and_schema_handlers_cover_crud_edges() { #[tokio::test] async fn memory_ops_public_handlers_cover_document_file_kv_graph_and_envelopes() { let tmp = TempDir::new().expect("tempdir"); - let _env_lock = env_lock(); let _workspace = EnvVarGuard::set_to_path("OPENHUMAN_WORKSPACE", tmp.path()); let init = openhuman_core::openhuman::memory::ops::memory_init(MemoryInitRequest { @@ -4339,7 +4334,6 @@ async fn memory_ops_public_handlers_cover_document_file_kv_graph_and_envelopes() #[tokio::test] async fn memory_tree_retrieval_rpc_and_schema_wrappers_cover_empty_and_invalid_paths() { let tmp = TempDir::new().expect("tempdir"); - let _env_lock = env_lock(); let _workspace = EnvVarGuard::set_to_path("OPENHUMAN_WORKSPACE", tmp.path()); let config = config_in(&tmp); @@ -4457,7 +4451,6 @@ async fn memory_tree_retrieval_rpc_and_schema_wrappers_cover_empty_and_invalid_p #[tokio::test] async fn memory_query_backend_and_tree_flush_wrappers_cover_public_edges() { let tmp = TempDir::new().expect("tempdir"); - let _env_lock = env_lock(); let _workspace = EnvVarGuard::set_to_path("OPENHUMAN_WORKSPACE", tmp.path()); let mut config = Config::load_or_init().await.expect("init isolated config"); config.memory_tree.embedding_endpoint = None; @@ -4597,7 +4590,6 @@ async fn tree_summarizer_ops_cover_validation_query_and_local_provider_guards() #[tokio::test] async fn memory_sources_types_registry_and_sync_state_cover_public_persistence_edges() { let tmp = TempDir::new().expect("tempdir"); - let _env_lock = env_lock(); let _workspace = EnvVarGuard::set_to_path("OPENHUMAN_WORKSPACE", tmp.path()); let _config = Config::load_or_init().await.expect("init isolated config"); openhuman_core::openhuman::memory_sources::reconcile::ensure_composio_sources().await; @@ -4808,13 +4800,3 @@ fn welcome_migration_public_entrypoint_covers_empty_marker_and_transcript_paths( .expect("second migration"); assert!(second.already_done); } - -// Serializes this binary's process-global OPENHUMAN_WORKSPACE mutation so the -// raw-coverage tests do not trample each other under parallel llvm-cov runs. -static ENV_LOCK: std::sync::OnceLock> = std::sync::OnceLock::new(); -fn env_lock() -> std::sync::MutexGuard<'static, ()> { - ENV_LOCK - .get_or_init(|| std::sync::Mutex::new(())) - .lock() - .unwrap_or_else(|e| e.into_inner()) -} diff --git a/tests/memory_tree_sync_raw_coverage_e2e.rs b/tests/memory_tree_sync_raw_coverage_e2e.rs index cce0419428..4abcb32663 100644 --- a/tests/memory_tree_sync_raw_coverage_e2e.rs +++ b/tests/memory_tree_sync_raw_coverage_e2e.rs @@ -436,7 +436,6 @@ async fn memory_walk_provider_errors_and_unknown_actions_are_reported() { #[tokio::test] async fn composio_providers_sync_state_and_bus_surfaces_cover_read_write_edges() { - let _lock = env_lock(); let tmp = TempDir::new().expect("tempdir"); let _workspace = EnvVarGuard::set("OPENHUMAN_WORKSPACE", tmp.path()); let _triage = EnvVarGuard::set_str("OPENHUMAN_TRIGGER_TRIAGE_DISABLED", "yes"); @@ -647,13 +646,3 @@ async fn default_composio_provider_hooks_return_expected_noop_shapes() { .any(|entity| entity.canonical_id == "email:round14@example.com")); assert!(approx_token_count("one two three four") > 0); } - -// Serializes this binary's process-global OPENHUMAN_WORKSPACE mutation so the -// raw-coverage tests do not trample each other under parallel llvm-cov runs. -static ENV_LOCK: std::sync::OnceLock> = std::sync::OnceLock::new(); -fn env_lock() -> std::sync::MutexGuard<'static, ()> { - ENV_LOCK - .get_or_init(|| std::sync::Mutex::new(())) - .lock() - .unwrap_or_else(|e| e.into_inner()) -} diff --git a/tests/near90_closure_raw_coverage_e2e.rs b/tests/near90_closure_raw_coverage_e2e.rs index e00a892f5d..93ac3d5e01 100644 --- a/tests/near90_closure_raw_coverage_e2e.rs +++ b/tests/near90_closure_raw_coverage_e2e.rs @@ -390,7 +390,6 @@ fn round20_credentials_profiles_cover_legacy_plaintext_errors_and_active_edges() #[tokio::test] async fn round20_memory_sources_readers_and_sync_cover_error_edges_without_network() { - let _lock = env_lock(); let _lock = env_lock(); let harness = setup("http://127.0.0.1:9"); let config = harness.config().await; diff --git a/tests/owned_domain_raw_coverage_e2e.rs b/tests/owned_domain_raw_coverage_e2e.rs index 98d02ddbde..b4c0bb8fd1 100644 --- a/tests/owned_domain_raw_coverage_e2e.rs +++ b/tests/owned_domain_raw_coverage_e2e.rs @@ -340,7 +340,6 @@ fn owned_domain_config(workspace_root: &std::path::Path) -> Config { #[tokio::test] async fn openai_compatible_provider_covers_auth_temperature_tool_fallback_and_responses() { - let _lock = env_lock(); let (base_url, state) = serve_provider_mock().await; let provider = OpenAiCompatibleProvider::new_with_user_agent( "owned-mock", @@ -874,13 +873,3 @@ async fn tool_registry_controller_handlers_cover_list_get_and_validation_paths() .and_then(Value::as_u64) .is_some_and(|count| count > 0)); } - -// Serializes this binary's process-global OPENHUMAN_WORKSPACE mutation so the -// raw-coverage tests do not trample each other under parallel llvm-cov runs. -static ENV_LOCK: std::sync::OnceLock> = std::sync::OnceLock::new(); -fn env_lock() -> std::sync::MutexGuard<'static, ()> { - ENV_LOCK - .get_or_init(|| std::sync::Mutex::new(())) - .lock() - .unwrap_or_else(|e| e.into_inner()) -} diff --git a/tests/tool_registry_approval_raw_coverage_e2e.rs b/tests/tool_registry_approval_raw_coverage_e2e.rs index 62ce3be1ca..760c267e15 100644 --- a/tests/tool_registry_approval_raw_coverage_e2e.rs +++ b/tests/tool_registry_approval_raw_coverage_e2e.rs @@ -1157,7 +1157,6 @@ async fn approval_schema_handlers_validate_params_and_surface_empty_gate_state() #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn approval_rpc_decision_paths_persist_always_allow_and_recent_audit() { - let _lock = env_lock(); let _lock = env_lock(); let harness = setup("").await; let config = Config::load_or_init() diff --git a/tests/tools_composio_network_leftovers_raw_coverage_e2e.rs b/tests/tools_composio_network_leftovers_raw_coverage_e2e.rs index 9e87a2f12c..7d4aa4e8c4 100644 --- a/tests/tools_composio_network_leftovers_raw_coverage_e2e.rs +++ b/tests/tools_composio_network_leftovers_raw_coverage_e2e.rs @@ -554,7 +554,6 @@ async fn round20_polymarket_covers_discovery_errors_rpc_allowance_and_write_gate #[tokio::test] async fn round20_spawn_subagent_covers_validation_schema_and_disabled_worker_branch() { - let _lock = env_lock(); let _lock = env_lock(); let tool = SpawnSubagentTool::new(); diff --git a/tests/worker_b_raw_coverage_e2e.rs b/tests/worker_b_raw_coverage_e2e.rs index f08bedb026..ed7bc226ba 100644 --- a/tests/worker_b_raw_coverage_e2e.rs +++ b/tests/worker_b_raw_coverage_e2e.rs @@ -575,7 +575,6 @@ async fn agent_profile_lifecycle_persists_custom_profile_and_validates_delete() #[tokio::test] async fn approval_gate_rpc_decision_resumes_parked_tool_and_records_execution() { - let _lock = env_lock(); let _lock = env_lock(); let harness = setup().await; let config = Config::load_or_init() From c2af5bc76506900ebbadb8479e7afa88dd71fdc3 Mon Sep 17 00:00:00 2001 From: sanil-23 Date: Mon, 1 Jun 2026 21:52:41 +0530 Subject: [PATCH 05/12] test: update stale composio-enabled count assertion (#3113 sync redesign) memory_sources_closure_round23: a freshly-inserted Composio source is no longer reported by list_enabled_by_kind until it has an active connection, so the count is 0, not 1. Aligns the last stale raw-coverage assertion with the behavior shipped in #3113. Co-Authored-By: Claude Opus 4.8 (1M context) --- tests/memory_sources_closure_round23_raw_coverage_e2e.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/memory_sources_closure_round23_raw_coverage_e2e.rs b/tests/memory_sources_closure_round23_raw_coverage_e2e.rs index c808b6c0c7..0fd6e56ed3 100644 --- a/tests/memory_sources_closure_round23_raw_coverage_e2e.rs +++ b/tests/memory_sources_closure_round23_raw_coverage_e2e.rs @@ -220,7 +220,9 @@ async fn round23_memory_sources_status_registry_and_readers_cover_remaining_edge let enabled_composio = memory_sources::list_enabled_by_kind(SourceKind::Composio) .await .expect("enabled composio"); - assert_eq!(enabled_composio.len(), 1); + // #3113 sync-flow redesign: a freshly-inserted Composio source is no longer + // reported as enabled here (it requires an active connection first). + assert_eq!(enabled_composio.len(), 0); let composio_reader = openhuman_core::openhuman::memory_sources::readers::composio::ComposioReader; From 695bacc0697f007310246ac103dc8946e0a183cd Mon Sep 17 00:00:00 2001 From: sanil-23 Date: Mon, 1 Jun 2026 23:16:57 +0530 Subject: [PATCH 06/12] test: make round21 github-reader hermetic + drop stale comments assertion The github reader fetches commits via a real `git clone` first (only falling back to the fake `gh` on error), and `fetch_github` falls back to the real api.github.com on any gh path-mismatch. So this test was flaky: with network, the real clone returned the repo's real commits (no abc123). Fixes: - Add a failing `git` stub on PATH so the reader deterministically uses the fake `gh` (no real clone, no network). - Glob the fake-`gh` list/comments path patterns so they match the reader's actual `?per_page=..&page=..` query strings (the exact patterns missed them and silently fell through to the real API). - Drop the stale `## Comments` assertion: #3113 stopped inlining comments into the rendered issue body (fetch_issue_comments is now unused); assert on the content read_issue still renders. Verified: passes deterministically (3x, ~0.3s, no network). Co-Authored-By: Claude Opus 4.8 (1M context) --- ...ources_readers_round21_raw_coverage_e2e.rs | 27 ++++++++++++++----- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/tests/memory_sources_readers_round21_raw_coverage_e2e.rs b/tests/memory_sources_readers_round21_raw_coverage_e2e.rs index 283005354f..6c9017be66 100644 --- a/tests/memory_sources_readers_round21_raw_coverage_e2e.rs +++ b/tests/memory_sources_readers_round21_raw_coverage_e2e.rs @@ -180,6 +180,17 @@ async fn round21_github_reader_covers_commit_issue_comments_and_error_paths() { std::fs::create_dir_all(&bin).expect("bin dir"); let script = bin.join("gh"); write_fake_gh(&script); + let git_stub = bin.join("git"); + std::fs::write(&git_stub, "#!/usr/bin/env bash\nexit 1\n").expect("write fake git"); + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + let mut perms = std::fs::metadata(&git_stub) + .expect("metadata") + .permissions(); + perms.set_mode(0o755); + std::fs::set_permissions(&git_stub, perms).expect("chmod fake git"); + } let old_path = std::env::var("PATH").unwrap_or_default(); let _path = EnvGuard::set_path("PATH", Path::new(&format!("{}:{old_path}", bin.display()))); @@ -207,9 +218,11 @@ async fn round21_github_reader_covers_commit_issue_comments_and_error_paths() { let issue = reader .read_item(&entry, "issue:42", &config) .await - .expect("read issue with comments"); - assert!(issue.body.contains("## Comments")); - assert!(issue.body.contains("Looks good from the fixture")); + .expect("read issue"); + // #3113 redesign: read_issue no longer inlines comments into the rendered + // body (fetch_issue_comments is retained but unused), so assert on the + // issue content read_issue still renders rather than the dropped comments. + assert!(issue.body.contains("Round21 issue")); assert_eq!( issue .metadata @@ -238,17 +251,17 @@ if [[ "${1:-}" != "api" ]]; then exit 2 fi case "${2:-}" in - repos/tinyhumansai/openhuman/commits?per_page=30) + repos/tinyhumansai/openhuman/commits\?*) cat <<'JSON' [{"sha":"abc123","commit":{"message":"Round21 commit subject\n\nBody line","author":{"name":"Ada","email":"ada@example.test","date":"2026-05-30T00:00:00Z"},"committer":{"name":"Ada","email":"ada@example.test","date":"2026-05-30T00:00:00Z"}}}] JSON ;; - repos/tinyhumansai/openhuman/issues?per_page=30\&state=all) + repos/tinyhumansai/openhuman/issues\?*) cat <<'JSON' [{"number":42,"title":"Round21 issue","body":"Issue body","state":"open","user":{"login":"octo"},"labels":[],"created_at":"2026-05-30T00:00:00Z","updated_at":"2026-05-30T00:01:00Z","pull_request":null}] JSON ;; - repos/tinyhumansai/openhuman/pulls?per_page=30\&state=all) + repos/tinyhumansai/openhuman/pulls\?*) cat <<'JSON' [{"number":43,"title":"Round21 PR","body":"PR body","state":"open","user":{"login":"octo"},"labels":[],"created_at":"2026-05-30T00:00:00Z","updated_at":"2026-05-30T00:02:00Z","merged_at":null,"comments":1}] JSON @@ -263,7 +276,7 @@ JSON {"number":42,"title":"Round21 issue","body":"Issue body","state":"open","user":{"login":"octo"},"labels":[],"created_at":"2026-05-30T00:00:00Z","updated_at":"2026-05-30T00:01:00Z","pull_request":null} JSON ;; - repos/tinyhumansai/openhuman/issues/42/comments?per_page=50) + repos/tinyhumansai/openhuman/issues/42/comments\?*) cat <<'JSON' [{"user":{"login":"reviewer"},"body":"Looks good from the fixture","created_at":"2026-05-30T00:04:00Z"}] JSON From a1b54846ff7dc571cbf532f7fa8deab057b586c2 Mon Sep 17 00:00:00 2001 From: sanil-23 Date: Mon, 1 Jun 2026 23:20:35 +0530 Subject: [PATCH 07/12] test: make github-reader tests hermetic in memory_sync_sources + near90 Same fix as round21, applied to the two sibling github-reader tests: - Add a failing `git` stub on PATH so GithubReader can't reach the real `git clone` of github.com and deterministically uses the fake `gh`. - Glob the fake-`gh` list/comments path patterns so they match the reader's actual `?per_page=..&page=..` queries instead of silently falling through to the real api.github.com. - memory_sync_sources: drop the stale `## Comments` assertion (#3113 stopped inlining comments into the issue body); assert on the rendered content. Verified: both pass deterministically (~0.7s, no network). Co-Authored-By: Claude Opus 4.8 (1M context) --- tests/memory_sync_sources_raw_coverage_e2e.rs | 24 ++++++++++++++----- tests/near90_closure_raw_coverage_e2e.rs | 19 +++++++++++---- 2 files changed, 33 insertions(+), 10 deletions(-) diff --git a/tests/memory_sync_sources_raw_coverage_e2e.rs b/tests/memory_sync_sources_raw_coverage_e2e.rs index 734cd59e80..daf0213d96 100644 --- a/tests/memory_sync_sources_raw_coverage_e2e.rs +++ b/tests/memory_sync_sources_raw_coverage_e2e.rs @@ -311,6 +311,17 @@ async fn github_reader_uses_fake_gh_for_list_and_read_paths() { std::fs::create_dir_all(&bin).expect("bin dir"); let script = bin.join("gh"); write_fake_gh(&script); + let git_stub = bin.join("git"); + std::fs::write(&git_stub, "#!/usr/bin/env bash\nexit 1\n").expect("write fake git"); + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + let mut perms = std::fs::metadata(&git_stub) + .expect("metadata") + .permissions(); + perms.set_mode(0o755); + std::fs::set_permissions(&git_stub, perms).expect("chmod fake git"); + } let old_path = std::env::var("PATH").unwrap_or_default(); let _path = EnvGuard::set("PATH", format!("{}:{old_path}", bin.display())); @@ -341,8 +352,9 @@ async fn github_reader_uses_fake_gh_for_list_and_read_paths() { .read_item(&entry, "issue:7", &config) .await .expect("read issue"); - assert!(issue.body.contains("## Comments")); - assert!(issue.body.contains("Needs fixture coverage")); + // #3113 redesign: read_issue no longer inlines comments into the body, so + // assert on the issue content it still renders rather than the comments. + assert!(issue.body.contains("Issue #7")); assert_eq!( issue.metadata.get("state").and_then(Value::as_str), Some("open") @@ -611,17 +623,17 @@ if [[ "${1:-}" != "api" ]]; then exit 2 fi case "${2:-}" in - repos/tinyhumansai/openhuman/commits?per_page=30) + repos/tinyhumansai/openhuman/commits\?*) cat <<'JSON' [{"sha":"abc123","commit":{"message":"Add coverage hooks\n\nMore details","author":{"name":"Ada","email":"ada@example.test","date":"2026-05-28T10:00:00Z"},"committer":{"name":"Ada","email":"ada@example.test","date":"2026-05-28T10:00:00Z"}}}] JSON ;; - repos/tinyhumansai/openhuman/issues?per_page=30\&state=all) + repos/tinyhumansai/openhuman/issues\?*) cat <<'JSON' [{"number":7,"title":"Memory source reader gap","body":"Needs fixture coverage","state":"open","user":{"login":"ada"},"labels":[{"name":"coverage"}],"created_at":"2026-05-27T10:00:00Z","updated_at":"2026-05-28T11:00:00Z","pull_request":null},{"number":99,"title":"PR-shaped issue","body":"","state":"open","user":{"login":"bot"},"labels":[],"created_at":"2026-05-27T10:00:00Z","updated_at":"2026-05-28T11:00:00Z","pull_request":{}}] JSON ;; - repos/tinyhumansai/openhuman/pulls?per_page=30\&state=all) + repos/tinyhumansai/openhuman/pulls\?*) cat <<'JSON' [{"number":9,"title":"Raw coverage PR","body":"PR body","state":"open","user":{"login":"grace"},"labels":[{"name":"tests"}],"created_at":"2026-05-27T10:00:00Z","updated_at":"2026-05-28T12:00:00Z","merged_at":null,"comments":1}] JSON @@ -641,7 +653,7 @@ JSON {"number":9,"title":"Raw coverage PR","body":"PR body","state":"open","user":{"login":"grace"},"labels":[{"name":"tests"}],"created_at":"2026-05-27T10:00:00Z","updated_at":"2026-05-28T12:00:00Z","merged_at":null,"comments":1} JSON ;; - repos/tinyhumansai/openhuman/issues/7/comments?per_page=50|repos/tinyhumansai/openhuman/issues/9/comments?per_page=50) + repos/tinyhumansai/openhuman/issues/7/comments\?*|repos/tinyhumansai/openhuman/issues/9/comments\?*) cat <<'JSON' [{"user":{"login":"reviewer"},"body":"Looks deterministic","created_at":"2026-05-28T13:00:00Z"}] JSON diff --git a/tests/near90_closure_raw_coverage_e2e.rs b/tests/near90_closure_raw_coverage_e2e.rs index 93ac3d5e01..47ed56be1b 100644 --- a/tests/near90_closure_raw_coverage_e2e.rs +++ b/tests/near90_closure_raw_coverage_e2e.rs @@ -430,6 +430,17 @@ async fn round20_memory_sources_readers_and_sync_cover_error_edges_without_netwo std::fs::create_dir_all(&bin).expect("bin dir"); let script = bin.join("gh"); write_fake_gh_round20(&script); + let git_stub = bin.join("git"); + std::fs::write(&git_stub, "#!/usr/bin/env bash\nexit 1\n").expect("write fake git"); + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + let mut perms = std::fs::metadata(&git_stub) + .expect("metadata") + .permissions(); + perms.set_mode(0o755); + std::fs::set_permissions(&git_stub, perms).expect("chmod fake git"); + } let old_path = std::env::var("PATH").unwrap_or_default(); let _path = EnvGuard::set("PATH", format!("{}:{old_path}", bin.display())); @@ -760,17 +771,17 @@ if [[ "${1:-}" != "api" ]]; then exit 2 fi case "${2:-}" in - repos/tinyhumansai/openhuman/commits?per_page=30) + repos/tinyhumansai/openhuman/commits\?*) cat <<'JSON' [{"sha":"def456","commit":{"message":"Round20 commit fixture","author":{"name":"Ada","email":"ada@example.test","date":"2026-05-29T00:00:00Z"},"committer":{"name":"Ada","email":"ada@example.test","date":"2026-05-29T00:00:00Z"}}}] JSON ;; - repos/tinyhumansai/openhuman/issues?per_page=30\&state=all) + repos/tinyhumansai/openhuman/issues\?*) cat <<'JSON' [{"number":20,"title":"Round20 issue","body":null,"state":"closed","user":null,"labels":[],"created_at":null,"updated_at":"2026-05-29T00:30:00Z","pull_request":null}] JSON ;; - repos/tinyhumansai/openhuman/pulls?per_page=30\&state=all) + repos/tinyhumansai/openhuman/pulls\?*) cat <<'JSON' [{"number":21,"title":"Round20 merged PR","body":null,"state":"closed","user":null,"labels":[],"created_at":null,"updated_at":"2026-05-29T01:00:00Z","merged_at":"2026-05-29T01:00:00Z","comments":0}] JSON @@ -780,7 +791,7 @@ JSON {"number":21,"title":"Round20 merged PR","body":null,"state":"closed","user":null,"labels":[],"created_at":null,"updated_at":"2026-05-29T01:00:00Z","merged_at":"2026-05-29T01:00:00Z","comments":0} JSON ;; - repos/tinyhumansai/openhuman/issues/21/comments?per_page=50) + repos/tinyhumansai/openhuman/issues/21/comments\?*) cat <<'JSON' [] JSON From be29029a6d97f8d25337c9d35cd32ab94bfcff5c Mon Sep 17 00:00:00 2001 From: sanil-23 Date: Mon, 1 Jun 2026 23:49:13 +0530 Subject: [PATCH 08/12] test: responses-API input content is structured parts, not a bare string owned_domain coverage asserted the responses fallback request's /input/0/content equals "fallback please" (a plain string). The OpenAI-compatible responses payload now sends structured content parts ([{text, type:"input_text"}], #2748/#3124), so assert that shape. Co-Authored-By: Claude Opus 4.8 (1M context) --- tests/owned_domain_raw_coverage_e2e.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/owned_domain_raw_coverage_e2e.rs b/tests/owned_domain_raw_coverage_e2e.rs index b4c0bb8fd1..40c2768164 100644 --- a/tests/owned_domain_raw_coverage_e2e.rs +++ b/tests/owned_domain_raw_coverage_e2e.rs @@ -433,7 +433,9 @@ async fn openai_compatible_provider_covers_auth_temperature_tool_fallback_and_re assert_eq!(fallback_text, "responses fallback answer"); assert_eq!( state.response_requests.lock().expect("response requests")[0].pointer("/input/0/content"), - Some(&json!("fallback please")) + // Responses-API input content is structured parts (#2748/#3124), not a + // bare string. + Some(&json!([{"text": "fallback please", "type": "input_text"}])) ); } From 89c43250bf302abd57e1a7d51588bdaecc748475 Mon Sep 17 00:00:00 2001 From: sanil-23 Date: Mon, 1 Jun 2026 23:50:51 +0530 Subject: [PATCH 09/12] test: fix time-bomb in cron-add coverage test (hardcoded past 'at' date) round22_cron_add_tool used a hardcoded `at: 2026-05-31T00:00:00Z` schedule, which is now in the past, so cron creation fails validation ("'at' must be in the future"). Compute the `at` time at runtime (now + 30 days) so the test never expires again. Co-Authored-By: Claude Opus 4.8 (1M context) --- tests/tools_composio_round22_raw_coverage_e2e.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/tools_composio_round22_raw_coverage_e2e.rs b/tests/tools_composio_round22_raw_coverage_e2e.rs index df6aedc485..33ac22a609 100644 --- a/tests/tools_composio_round22_raw_coverage_e2e.rs +++ b/tests/tools_composio_round22_raw_coverage_e2e.rs @@ -415,7 +415,7 @@ async fn round22_cron_add_tool_covers_validation_and_markdown_edges() { .execute_with_options( json!({ "name": "round22_agent_once", - "schedule": { "kind": "at", "at": "2026-05-31T00:00:00Z" }, + "schedule": { "kind": "at", "at": (chrono::Utc::now() + chrono::Duration::days(30)).to_rfc3339() }, "job_type": "agent", "prompt": "collect validation notes", "session_target": "main", From 13d9cbf08438718e43befec529f40f10ebe60d98 Mon Sep 17 00:00:00 2001 From: sanil-23 Date: Tue, 2 Jun 2026 00:33:54 +0530 Subject: [PATCH 10/12] test: give ApprovalGate test session_ids the required `session-` prefix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit #2952 added a debug_assert in ApprovalGate::new requiring session_id to start with `session-` (the per-launch UUID shape) to guard against credential leaks. The approval coverage tests init the global gate with ids like `approval-raw-e2e-session` / `worker-b-approval-session`, which trip the assert when that test happens to initialize the global gate first (order-dependent — why it read as a flake). Prefix the test session_ids with `session-` so they satisfy the guard regardless of test order. Co-Authored-By: Claude Opus 4.8 (1M context) --- tests/tool_registry_approval_raw_coverage_e2e.rs | 9 +++++---- tests/worker_b_raw_coverage_e2e.rs | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/tests/tool_registry_approval_raw_coverage_e2e.rs b/tests/tool_registry_approval_raw_coverage_e2e.rs index 760c267e15..e6c6f24916 100644 --- a/tests/tool_registry_approval_raw_coverage_e2e.rs +++ b/tests/tool_registry_approval_raw_coverage_e2e.rs @@ -1162,7 +1162,7 @@ async fn approval_rpc_decision_paths_persist_always_allow_and_recent_audit() { let config = Config::load_or_init() .await .expect("load config for approval gate"); - let gate = ApprovalGate::init_global(config.clone(), "approval-raw-e2e-session"); + let gate = ApprovalGate::init_global(config.clone(), "session-approval-raw-e2e-session"); let gate_for_task = gate.clone(); let approval_task = tokio::spawn(async move { @@ -1439,10 +1439,11 @@ async fn approval_rpc_decision_paths_persist_always_allow_and_recent_audit() { } assert_eq!(deny_approved_id, None); assert!(gate.pending_for_thread("approval-deny-thread").is_none()); - assert_eq!(gate.session_id(), "approval-raw-e2e-session"); + assert_eq!(gate.session_id(), "session-approval-raw-e2e-session"); - let second_init = ApprovalGate::init_global(Config::default(), "ignored-second-session"); - assert_eq!(second_init.session_id(), "approval-raw-e2e-session"); + let second_init = + ApprovalGate::init_global(Config::default(), "session-ignored-second-session"); + assert_eq!(second_init.session_id(), "session-approval-raw-e2e-session"); let approval_dir = config.workspace_dir.join("approval"); if approval_dir.exists() { diff --git a/tests/worker_b_raw_coverage_e2e.rs b/tests/worker_b_raw_coverage_e2e.rs index ed7bc226ba..23b6741797 100644 --- a/tests/worker_b_raw_coverage_e2e.rs +++ b/tests/worker_b_raw_coverage_e2e.rs @@ -580,7 +580,7 @@ async fn approval_gate_rpc_decision_resumes_parked_tool_and_records_execution() let config = Config::load_or_init() .await .expect("load config for approval gate"); - let gate = ApprovalGate::init_global(config, "worker-b-approval-session"); + let gate = ApprovalGate::init_global(config, "session-worker-b-approval-session"); let gate_for_task = gate.clone(); let approval_task = tokio::spawn(async move { From 0e46854ef79658d10ae59b46e79e5947fcf97a6a Mon Sep 17 00:00:00 2001 From: sanil-23 Date: Tue, 2 Jun 2026 02:12:31 +0530 Subject: [PATCH 11/12] ci(coverage): serialize core llvm-cov tests (--test-threads=1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The merge took main's parallel workflow, but env_lock only guards env vars — not other process-global state like the toolkit/connection registry exercised by tools_approval_channels. Under llvm-cov's slower instrumentation those non-env races flake (unknown_toolkit suggestion assertion). Restore the serialized run (proven green at 13d9cbf0, endorsed by graycyrus) while keeping main's --no-fail-fast + build jobs. Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/workflows/coverage.yml | 9 ++++++++- .github/workflows/pr-ci.yml | 9 ++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index 35a4a71b8d..fe9363c238 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -94,7 +94,14 @@ jobs: - name: Install cargo-llvm-cov uses: taiki-e/install-action@cargo-llvm-cov - name: Run cargo llvm-cov for openhuman core - run: cargo llvm-cov --no-fail-fast -p openhuman --lcov --output-path lcov-core.info + # Serialize test execution (`--test-threads=1`). Several raw-coverage + # tests share process-global state beyond env vars (e.g. the toolkit / + # connection registry exercised by tools_approval_channels), which the + # per-test env_lock does not guard. Under llvm-cov's slower + # instrumentation the parallel default widens those races into flaky + # failures (e.g. unknown_toolkit suggestions). Serialized, they are + # hermetic. + run: cargo llvm-cov --no-fail-fast -p openhuman --lcov --output-path lcov-core.info -- --test-threads=1 env: CARGO_BUILD_JOBS: "2" - name: Upload core lcov diff --git a/.github/workflows/pr-ci.yml b/.github/workflows/pr-ci.yml index 60eedf1993..698d1c25a6 100644 --- a/.github/workflows/pr-ci.yml +++ b/.github/workflows/pr-ci.yml @@ -315,7 +315,14 @@ jobs: uses: taiki-e/install-action@cargo-llvm-cov - name: Run cargo llvm-cov for openhuman core - run: bash scripts/ci-cancel-aware.sh cargo llvm-cov --no-fail-fast -p openhuman --lcov --output-path lcov-core.info + # Serialize test execution (`--test-threads=1`). Several raw-coverage + # tests share process-global state beyond env vars (e.g. the toolkit / + # connection registry exercised by tools_approval_channels), which the + # per-test env_lock does not guard. Under llvm-cov's slower + # instrumentation the parallel default widens those races into flaky + # failures (e.g. unknown_toolkit suggestions). Serialized, they are + # hermetic. + run: bash scripts/ci-cancel-aware.sh cargo llvm-cov --no-fail-fast -p openhuman --lcov --output-path lcov-core.info -- --test-threads=1 env: CARGO_BUILD_JOBS: "2" From 657781a06a1deea27d554da4af65e35880ff893e Mon Sep 17 00:00:00 2001 From: sanil-23 Date: Tue, 2 Jun 2026 02:21:41 +0530 Subject: [PATCH 12/12] test(coverage): hold env_lock in orchestrator_tool_synthesis (fast race fix) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces the --test-threads=1 serialization with a surgical lock so the coverage job stays parallel/fast. The test read the process-global connection/toolkit registry without holding env_lock, while sibling tests swap OPENHUMAN_WORKSPACE under it — under llvm-cov's slower parallel run that trampled the integrations tool's toolkit list and dropped gmail_pro/slack_bot from the unknown-toolkit suggestion. It was the only test in the file missing the lock. Workflows revert to main's parallel run. Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/workflows/coverage.yml | 9 +-------- .github/workflows/pr-ci.yml | 9 +-------- tests/tools_approval_channels_raw_coverage_e2e.rs | 8 ++++++++ 3 files changed, 10 insertions(+), 16 deletions(-) diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index fe9363c238..35a4a71b8d 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -94,14 +94,7 @@ jobs: - name: Install cargo-llvm-cov uses: taiki-e/install-action@cargo-llvm-cov - name: Run cargo llvm-cov for openhuman core - # Serialize test execution (`--test-threads=1`). Several raw-coverage - # tests share process-global state beyond env vars (e.g. the toolkit / - # connection registry exercised by tools_approval_channels), which the - # per-test env_lock does not guard. Under llvm-cov's slower - # instrumentation the parallel default widens those races into flaky - # failures (e.g. unknown_toolkit suggestions). Serialized, they are - # hermetic. - run: cargo llvm-cov --no-fail-fast -p openhuman --lcov --output-path lcov-core.info -- --test-threads=1 + run: cargo llvm-cov --no-fail-fast -p openhuman --lcov --output-path lcov-core.info env: CARGO_BUILD_JOBS: "2" - name: Upload core lcov diff --git a/.github/workflows/pr-ci.yml b/.github/workflows/pr-ci.yml index 698d1c25a6..60eedf1993 100644 --- a/.github/workflows/pr-ci.yml +++ b/.github/workflows/pr-ci.yml @@ -315,14 +315,7 @@ jobs: uses: taiki-e/install-action@cargo-llvm-cov - name: Run cargo llvm-cov for openhuman core - # Serialize test execution (`--test-threads=1`). Several raw-coverage - # tests share process-global state beyond env vars (e.g. the toolkit / - # connection registry exercised by tools_approval_channels), which the - # per-test env_lock does not guard. Under llvm-cov's slower - # instrumentation the parallel default widens those races into flaky - # failures (e.g. unknown_toolkit suggestions). Serialized, they are - # hermetic. - run: bash scripts/ci-cancel-aware.sh cargo llvm-cov --no-fail-fast -p openhuman --lcov --output-path lcov-core.info -- --test-threads=1 + run: bash scripts/ci-cancel-aware.sh cargo llvm-cov --no-fail-fast -p openhuman --lcov --output-path lcov-core.info env: CARGO_BUILD_JOBS: "2" diff --git a/tests/tools_approval_channels_raw_coverage_e2e.rs b/tests/tools_approval_channels_raw_coverage_e2e.rs index 501ffdda0f..255b381864 100644 --- a/tests/tools_approval_channels_raw_coverage_e2e.rs +++ b/tests/tools_approval_channels_raw_coverage_e2e.rs @@ -1606,6 +1606,14 @@ fn tools_and_tool_registry_public_surfaces_cover_schema_and_assembly_paths() { #[tokio::test] async fn orchestrator_tool_synthesis_covers_agent_and_integration_delegation_edges() { + // This test reads the process-global connection/toolkit registry (the + // integrations tool's available-toolkit list). Sibling tests mutate + // OPENHUMAN_WORKSPACE under env_lock; without holding it here, a concurrent + // workspace swap trampled our view and dropped gmail_pro/slack_bot from the + // unknown-toolkit suggestion (flaky only under llvm-cov's slower parallel + // run). Hold the same lock so this test is hermetic without serializing the + // whole suite. + let _lock = env_lock(); let mut registry = AgentDefinitionRegistry::default(); registry.insert(coverage_agent_definition( "researcher",