diff --git a/src/core/cli.rs b/src/core/cli.rs
index 52bc5d5641..5c4d592d24 100644
--- a/src/core/cli.rs
+++ b/src/core/cli.rs
@@ -278,8 +278,17 @@ fn run_server_command(args: &[String]) -> Result<()> {
     crate::core::logging::init_for_cli_run(verbose, log_scope);
 
     // Initialize the Tokio multi-threaded runtime.
+    //
+    // A single agent turn is a very large async state machine (system prompt +
+    // hundreds of tool specs + the nested provider/tool loop), and delegating
+    // to a sub-agent runs another full turn one level down. Even with the inner
+    // sub-agent future boxed (`subagent_runner::ops`), that nesting overflows
+    // tokio's default 2 MiB worker-thread stack and aborts the whole process
+    // (SIGABRT: "thread 'tokio-rt-worker' has overflowed its stack"), taking
+    // the JSON-RPC server down mid-request. Give workers a roomier stack.
     let rt = tokio::runtime::Builder::new_multi_thread()
         .enable_all()
+        .thread_stack_size(16 * 1024 * 1024)
         .build()?;
     rt.block_on(async {
         crate::core::jsonrpc::run_server(host.as_deref(), port, socketio_enabled).await