diff --git a/.github/workflows/build_cpp.yml b/.github/workflows/build_cpp.yml index 01fe3d9d9..049cc21ac 100644 --- a/.github/workflows/build_cpp.yml +++ b/.github/workflows/build_cpp.yml @@ -107,6 +107,7 @@ jobs: cmake -B cpp/build -S cpp -DCMAKE_BUILD_TYPE=Release \ -DGAIA_BUILD_TESTS=OFF -DGAIA_BUILD_EXAMPLES=OFF \ -DGAIA_BUILD_INTEGRATION_TESTS=OFF \ + -DGAIA_BUILD_TUI=OFF \ -DCMAKE_INSTALL_PREFIX="${{ runner.temp }}/gaia_install" cmake --build cpp/build --config Release --parallel cmake --install cpp/build --config Release diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 75098da36..f02148fc5 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -73,6 +73,27 @@ if(NOT httplib_FOUND) FetchContent_MakeAvailable(httplib) endif() +# FTXUI — reactive TUI framework (optional, gated behind GAIA_BUILD_TUI) +# FTXUI must always be built static — it doesn't export symbols for DLL builds, +# so BUILD_SHARED_LIBS=ON causes LNK1181 on Windows. +option(GAIA_BUILD_TUI "Build FTXUI-based TUI console" ON) +if(GAIA_BUILD_TUI) + find_package(ftxui QUIET) + if(NOT ftxui_FOUND) + message(STATUS "FTXUI not found -- fetching via FetchContent") + # Save and override BUILD_SHARED_LIBS so FTXUI always builds static + set(_GAIA_SAVE_BSL ${BUILD_SHARED_LIBS}) + set(BUILD_SHARED_LIBS OFF) + FetchContent_Declare( + ftxui + GIT_REPOSITORY https://github.com/ArthurSonzogni/FTXUI + GIT_TAG v6.1.9 + ) + FetchContent_MakeAvailable(ftxui) + set(BUILD_SHARED_LIBS ${_GAIA_SAVE_BSL}) + endif() +endif() + # Google Test (unit tests and/or integration tests) if(GAIA_BUILD_TESTS OR GAIA_BUILD_INTEGRATION_TESTS) find_package(GTest QUIET) @@ -103,8 +124,23 @@ add_library(gaia_core src/mcp_client.cpp src/security.cpp src/sse_parser.cpp + src/process.cpp + src/file_tools.cpp + src/git_tools.cpp + src/session.cpp + src/repl.cpp + src/json_event_handler.cpp ) +# TUI sources (conditional on FTXUI availability) +if(GAIA_BUILD_TUI) + target_sources(gaia_core PRIVATE + src/tui_console.cpp + src/tui_markdown.cpp + ) + target_compile_definitions(gaia_core PUBLIC GAIA_HAS_TUI=1) +endif() + add_library(gaia::gaia_core ALIAS gaia_core) include(GenerateExportHeader) @@ -163,6 +199,16 @@ if(WIN32) target_link_libraries(gaia_core PRIVATE ws2_32) endif() +# FTXUI (TUI console — optional) +if(GAIA_BUILD_TUI) + target_link_libraries(gaia_core PRIVATE + ftxui::component ftxui::dom ftxui::screen + ) + message(STATUS "GAIA TUI: enabled (FTXUI)") +else() + message(STATUS "GAIA TUI: disabled") +endif() + # HTTPS support — auto-detected. No flags required; OpenSSL is used when available. find_package(OpenSSL QUIET) if(OpenSSL_FOUND) @@ -175,6 +221,36 @@ else() message(STATUS "GAIA SSL: not available (HTTP only)") endif() +# --------------------------------------------------------------------------- +# gaia-bash agent binary +# --------------------------------------------------------------------------- +option(GAIA_BUILD_BASH_AGENT "Build the gaia-bash coding agent" ON) +if(GAIA_BUILD_BASH_AGENT) + add_executable(gaia-bash + agents/bash/main.cpp + agents/bash/bash_agent.cpp + agents/bash/bash_tools.cpp + agents/bash/api_server.cpp + agents/bash/mcp_server.cpp + ) + target_include_directories(gaia-bash PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/agents/bash + ) + target_link_libraries(gaia-bash PRIVATE gaia::gaia_core) + + # API server needs httplib (already a dependency of gaia_core but PRIVATE) + if(httplib_FOUND) + target_link_libraries(gaia-bash PRIVATE httplib::httplib) + else() + target_include_directories(gaia-bash SYSTEM PRIVATE + $) + endif() + if(OpenSSL_FOUND) + target_compile_definitions(gaia-bash PRIVATE CPPHTTPLIB_OPENSSL_SUPPORT) + target_link_libraries(gaia-bash PRIVATE OpenSSL::SSL OpenSSL::Crypto) + endif() +endif() + # --------------------------------------------------------------------------- # Examples # --------------------------------------------------------------------------- @@ -218,11 +294,18 @@ if(GAIA_BUILD_TESTS) tests/test_decision.cpp tests/test_security.cpp tests/test_sse_parser.cpp + tests/test_process.cpp + tests/test_file_tools.cpp + tests/test_git_tools.cpp + tests/test_session.cpp + tests/test_repl.cpp + tests/test_json_event_handler.cpp ) target_link_libraries(tests_mock PRIVATE gaia::gaia_core GTest::gtest_main + GTest::gmock ) # VLM tests need httplib (mock LLM server) and the fixtures directory. @@ -242,6 +325,14 @@ if(GAIA_BUILD_TESTS) ) target_include_directories(tests_mock PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/tests) + # TUI tests (conditional -- requires FTXUI) + if(GAIA_BUILD_TUI) + target_sources(tests_mock PRIVATE tests/test_tui_console.cpp) + target_link_libraries(tests_mock PRIVATE + ftxui::component ftxui::dom ftxui::screen + ) + endif() + include(GoogleTest) gtest_discover_tests(tests_mock) endif() diff --git a/cpp/agents/bash/api_server.cpp b/cpp/agents/bash/api_server.cpp new file mode 100644 index 000000000..6e5403421 --- /dev/null +++ b/cpp/agents/bash/api_server.cpp @@ -0,0 +1,539 @@ +// Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved. +// SPDX-License-Identifier: MIT +// +// ApiServer implementation — OpenAI-compatible REST API wrapping a GAIA Agent. +// Uses cpp-httplib (same dependency as LemonadeClient in gaia_core). + +#include "api_server.h" + +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + +namespace gaia { + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +/// Generate a unique-enough ID for chat completion responses. +static std::string generateCompletionId() { + auto now = std::chrono::system_clock::now().time_since_epoch(); + auto ms = std::chrono::duration_cast(now).count(); + return "chatcmpl-" + std::to_string(ms); +} + +/// Return the current Unix timestamp. +static int64_t unixTimestamp() { + return std::chrono::duration_cast( + std::chrono::system_clock::now().time_since_epoch()) + .count(); +} + +/// Build a JSON error response body. +static json errorJson(const std::string& message, const std::string& type = "server_error", + const std::string& code = "") { + json err = { + {"error", { + {"message", message}, + {"type", type} + }} + }; + if (!code.empty()) { + err["error"]["code"] = code; + } + return err; +} + +/// Extract the last user message content from an OpenAI-style messages array. +static std::string extractUserInput(const json& messages) { + // Walk backwards to find the last "user" role message. + for (auto it = messages.rbegin(); it != messages.rend(); ++it) { + if (it->value("role", "") == "user") { + // Content can be a string or an array of content parts. + const auto& content = (*it)["content"]; + if (content.is_string()) { + return content.get(); + } + if (content.is_array()) { + // Concatenate text parts. + std::string text; + for (const auto& part : content) { + if (part.value("type", "") == "text") { + if (!text.empty()) text += "\n"; + text += part.value("text", ""); + } + } + return text; + } + } + } + return ""; +} + +// --------------------------------------------------------------------------- +// PIMPL +// --------------------------------------------------------------------------- + +struct ApiServer::Impl { + Agent& agent; + int port; + httplib::Server server; + std::shared_ptr sessionStore; + + Impl(Agent& a, int p) : agent(a), port(p) {} + + // ---- CORS ---- + + void addCorsHeaders(httplib::Response& res) { + res.set_header("Access-Control-Allow-Origin", "*"); + res.set_header("Access-Control-Allow-Methods", "GET, POST, DELETE, OPTIONS"); + res.set_header("Access-Control-Allow-Headers", "Content-Type, Authorization"); + } + + // ---- Route setup ---- + + void setupRoutes() { + // CORS preflight for all paths. + server.Options(R"(.*)", [this](const httplib::Request& /*req*/, httplib::Response& res) { + addCorsHeaders(res); + res.status = 204; + }); + + server.Post("/v1/chat/completions", + [this](const httplib::Request& req, httplib::Response& res) { + handleChatCompletions(req, res); + }); + + server.Get("/v1/tools", + [this](const httplib::Request& req, httplib::Response& res) { + handleListTools(req, res); + }); + + // cpp-httplib path-param capture: /v1/tools/:name + server.Post(R"(/v1/tools/([^/]+))", + [this](const httplib::Request& req, httplib::Response& res) { + handleExecuteTool(req, res); + }); + + server.Get("/health", + [this](const httplib::Request& req, httplib::Response& res) { + handleHealth(req, res); + }); + + server.Get("/sessions", + [this](const httplib::Request& req, httplib::Response& res) { + handleListSessions(req, res); + }); + + // DELETE /sessions/:id + server.Delete(R"(/sessions/([^/]+))", + [this](const httplib::Request& req, httplib::Response& res) { + handleDeleteSession(req, res); + }); + } + + // ---- POST /v1/chat/completions ---- + + void handleChatCompletions(const httplib::Request& req, httplib::Response& res) { + addCorsHeaders(res); + + // Parse request body. + json body; + try { + body = json::parse(req.body); + } catch (const std::exception& e) { + res.status = 400; + res.set_content( + errorJson("Invalid JSON: " + std::string(e.what()), "invalid_request_error").dump(), + "application/json"); + return; + } + + // Validate messages field. + if (!body.contains("messages") || !body["messages"].is_array() || + body["messages"].empty()) { + res.status = 400; + res.set_content( + errorJson("'messages' field is required and must be a non-empty array", + "invalid_request_error").dump(), + "application/json"); + return; + } + + std::string userInput = extractUserInput(body["messages"]); + if (userInput.empty()) { + res.status = 400; + res.set_content( + errorJson("No user message found in messages array", + "invalid_request_error").dump(), + "application/json"); + return; + } + + bool stream = body.value("stream", false); + std::string model = body.value("model", agent.config().modelId); + std::string completionId = generateCompletionId(); + int64_t created = unixTimestamp(); + + if (stream) { + handleStreamingCompletion(res, userInput, model, completionId, created); + } else { + handleNonStreamingCompletion(res, userInput, model, completionId, created); + } + } + + void handleNonStreamingCompletion(httplib::Response& res, + const std::string& userInput, + const std::string& model, + const std::string& completionId, + int64_t created) { + try { + json result = agent.processQuery(userInput); + std::string content = result.value("result", ""); + + json response = { + {"id", completionId}, + {"object", "chat.completion"}, + {"created", created}, + {"model", model}, + {"choices", json::array({ + { + {"index", 0}, + {"message", { + {"role", "assistant"}, + {"content", content} + }}, + {"finish_reason", "stop"} + } + })}, + {"usage", { + {"prompt_tokens", 0}, + {"completion_tokens", 0}, + {"total_tokens", 0} + }} + }; + + res.status = 200; + res.set_content(response.dump(), "application/json"); + + } catch (const std::runtime_error& e) { + std::string what = e.what(); + // Agent is not re-entrant — detect concurrency conflict. + if (what.find("already running") != std::string::npos) { + res.status = 409; + res.set_content( + errorJson("Agent is busy processing another request. " + "Concurrent requests are not supported.", + "conflict", "agent_busy").dump(), + "application/json"); + } else { + res.status = 500; + res.set_content( + errorJson("Agent error: " + what).dump(), + "application/json"); + } + } catch (const std::exception& e) { + res.status = 500; + res.set_content( + errorJson("Internal error: " + std::string(e.what())).dump(), + "application/json"); + } + } + + void handleStreamingCompletion(httplib::Response& res, + const std::string& userInput, + const std::string& model, + const std::string& completionId, + int64_t created) { + // Process the query first (we can't truly stream token-by-token since + // Agent::processQuery returns a complete result). We simulate SSE by + // sending the full result as a single chunk followed by [DONE]. + std::string content; + bool agentBusy = false; + std::string errorMsg; + + try { + json result = agent.processQuery(userInput); + content = result.value("result", ""); + } catch (const std::runtime_error& e) { + std::string what = e.what(); + if (what.find("already running") != std::string::npos) { + agentBusy = true; + } + errorMsg = what; + } catch (const std::exception& e) { + errorMsg = e.what(); + } + + if (agentBusy) { + res.status = 409; + res.set_content( + errorJson("Agent is busy processing another request. " + "Concurrent requests are not supported.", + "conflict", "agent_busy").dump(), + "application/json"); + return; + } + + if (!errorMsg.empty()) { + res.status = 500; + res.set_content( + errorJson("Agent error: " + errorMsg).dump(), + "application/json"); + return; + } + + // Send as SSE chunks via chunked transfer encoding. + res.set_header("Content-Type", "text/event-stream"); + res.set_header("Cache-Control", "no-cache"); + res.set_header("Connection", "keep-alive"); + + // Build the SSE data chunk with the full content. + json chunk = { + {"id", completionId}, + {"object", "chat.completion.chunk"}, + {"created", created}, + {"model", model}, + {"choices", json::array({ + { + {"index", 0}, + {"delta", { + {"role", "assistant"}, + {"content", content} + }}, + {"finish_reason", nullptr} + } + })} + }; + + // Stop chunk. + json stopChunk = { + {"id", completionId}, + {"object", "chat.completion.chunk"}, + {"created", created}, + {"model", model}, + {"choices", json::array({ + { + {"index", 0}, + {"delta", json::object()}, + {"finish_reason", "stop"} + } + })} + }; + + std::string body; + body += "data: " + chunk.dump() + "\n\n"; + body += "data: " + stopChunk.dump() + "\n\n"; + body += "data: [DONE]\n\n"; + + res.set_content(body, "text/event-stream"); + } + + // ---- GET /v1/tools ---- + + void handleListTools(const httplib::Request& /*req*/, httplib::Response& res) { + addCorsHeaders(res); + + json tools = json::array(); + for (const auto& [name, info] : agent.tools().allTools()) { + if (!info.enabled) continue; + + json params = json::array(); + for (const auto& p : info.parameters) { + params.push_back({ + {"name", p.name}, + {"type", paramTypeToString(p.type)}, + {"required", p.required}, + {"description", p.description} + }); + } + + tools.push_back({ + {"name", info.name}, + {"description", info.description}, + {"parameters", params} + }); + } + + json response = {{"tools", tools}}; + res.status = 200; + res.set_content(response.dump(), "application/json"); + } + + // ---- POST /v1/tools/:name ---- + + void handleExecuteTool(const httplib::Request& req, httplib::Response& res) { + addCorsHeaders(res); + + // Extract tool name from the regex capture. + std::string toolName = req.matches[1].str(); + + // Parse body as tool arguments. + json args = json::object(); + if (!req.body.empty()) { + try { + args = json::parse(req.body); + } catch (const std::exception& e) { + res.status = 400; + res.set_content( + errorJson("Invalid JSON body: " + std::string(e.what()), + "invalid_request_error").dump(), + "application/json"); + return; + } + } + + // Check if tool exists. + if (!agent.tools().hasTool(toolName)) { + // Try name resolution (handles common LLM mistakes). + std::string resolved = agent.tools().resolveName(toolName); + if (resolved.empty()) { + res.status = 404; + res.set_content( + errorJson("Tool not found: " + toolName, "not_found").dump(), + "application/json"); + return; + } + toolName = resolved; + } + + try { + // Execute through the mutable toolRegistry() to allow policy checks. + json result = agent.toolRegistry().executeTool(toolName, args); + + json response = { + {"tool", toolName}, + {"result", result} + }; + res.status = 200; + res.set_content(response.dump(), "application/json"); + + } catch (const std::exception& e) { + res.status = 500; + res.set_content( + errorJson("Tool execution error: " + std::string(e.what())).dump(), + "application/json"); + } + } + + // ---- GET /health ---- + + void handleHealth(const httplib::Request& /*req*/, httplib::Response& res) { + addCorsHeaders(res); + + AgentConfig cfg = agent.config(); + size_t toolCount = agent.tools().allTools().size(); + + json response = { + {"status", "ok"}, + {"model", cfg.modelId}, + {"tools", static_cast(toolCount)}, + {"port", port} + }; + + res.status = 200; + res.set_content(response.dump(), "application/json"); + } + + // ---- GET /sessions ---- + + void handleListSessions(const httplib::Request& /*req*/, httplib::Response& res) { + addCorsHeaders(res); + + if (!sessionStore) { + res.status = 200; + res.set_content(json::array().dump(), "application/json"); + return; + } + + auto sessions = sessionStore->list(); + json arr = json::array(); + for (const auto& s : sessions) { + arr.push_back({ + {"id", s.id}, + {"timestamp", s.timestamp}, + {"preview", s.preview}, + {"message_count", s.messageCount} + }); + } + + res.status = 200; + res.set_content(arr.dump(), "application/json"); + } + + // ---- DELETE /sessions/:id ---- + + void handleDeleteSession(const httplib::Request& req, httplib::Response& res) { + addCorsHeaders(res); + + std::string sessionId = req.matches[1].str(); + + if (!sessionStore) { + res.status = 404; + res.set_content( + errorJson("Session store not configured", "not_found").dump(), + "application/json"); + return; + } + + bool removed = sessionStore->remove(sessionId); + if (removed) { + res.status = 200; + res.set_content( + json({{"deleted", true}, {"id", sessionId}}).dump(), + "application/json"); + } else { + res.status = 404; + res.set_content( + errorJson("Session not found: " + sessionId, "not_found").dump(), + "application/json"); + } + } +}; + +// --------------------------------------------------------------------------- +// ApiServer public interface +// --------------------------------------------------------------------------- + +ApiServer::ApiServer(Agent& agent, int port) + : impl_(std::make_unique(agent, port)) { + impl_->setupRoutes(); +} + +ApiServer::~ApiServer() = default; + +void ApiServer::setSessionStore(std::shared_ptr store) { + impl_->sessionStore = std::move(store); +} + +void ApiServer::run() { + std::cerr << "[ApiServer] Listening on port " << impl_->port << std::endl; + std::cerr << "[ApiServer] Endpoints:" << std::endl; + std::cerr << " POST /v1/chat/completions -- agent query" << std::endl; + std::cerr << " GET /v1/tools -- list tools" << std::endl; + std::cerr << " POST /v1/tools/:name -- execute tool" << std::endl; + std::cerr << " GET /health -- health check" << std::endl; + std::cerr << " GET /sessions -- list sessions" << std::endl; + std::cerr << " DELETE /sessions/:id -- delete session" << std::endl; + + if (!impl_->server.listen("127.0.0.1", impl_->port)) { + throw std::runtime_error( + "ApiServer failed to bind on port " + std::to_string(impl_->port) + + ". Check that the port is not already in use."); + } +} + +void ApiServer::stop() { + impl_->server.stop(); +} + +} // namespace gaia diff --git a/cpp/agents/bash/api_server.h b/cpp/agents/bash/api_server.h new file mode 100644 index 000000000..04c5077ba --- /dev/null +++ b/cpp/agents/bash/api_server.h @@ -0,0 +1,58 @@ +// Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved. +// SPDX-License-Identifier: MIT +// +// HTTP REST API server that wraps an Agent with OpenAI-compatible endpoints. +// Uses cpp-httplib for the HTTP server (same dependency as gaia_core). + +#pragma once + +#include +#include + +#include "gaia/export.h" + +namespace gaia { + +class Agent; +class SessionStore; + +/// HTTP REST API server that wraps an Agent with OpenAI-compatible endpoints. +/// Uses cpp-httplib for the HTTP server. +/// +/// Endpoints: +/// POST /v1/chat/completions -- agent query (streaming + non-streaming) +/// GET /v1/tools -- list registered tools +/// POST /v1/tools/:name -- execute a tool directly +/// GET /health -- health check +/// GET /sessions -- list sessions +/// DELETE /sessions/:id -- delete session +/// +/// Threading: httplib::Server runs its own thread pool. Agent::processQuery() +/// is NOT re-entrant (guarded by inFlight_), so concurrent /v1/chat/completions +/// requests will receive a 409 Conflict error. Tool execution and read-only +/// endpoints are safe to call concurrently. +/// +/// Usage: +/// BashAgent agent(config); +/// ApiServer server(agent, 8200); +/// server.setSessionStore(store); +/// server.run(); // blocking +class ApiServer { +public: + ApiServer(Agent& agent, int port = 8200); + ~ApiServer(); + + void setSessionStore(std::shared_ptr store); + + /// Start the server (blocking). + void run(); + + /// Stop the server (call from another thread or signal handler). + void stop(); + +private: + struct Impl; + std::unique_ptr impl_; +}; + +} // namespace gaia diff --git a/cpp/agents/bash/bash_agent.cpp b/cpp/agents/bash/bash_agent.cpp new file mode 100644 index 000000000..3e02ddd42 --- /dev/null +++ b/cpp/agents/bash/bash_agent.cpp @@ -0,0 +1,107 @@ +// Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved. +// SPDX-License-Identifier: MIT + +#include "bash_agent.h" +#include "bash_tools.h" + +namespace gaia { + +BashAgent::BashAgent(const AgentConfig& config) + : Agent(config) { + init(); +} + +BashAgent::~BashAgent() = default; + +void BashAgent::registerTools() { + FileIOTools::registerAll(toolRegistry()); + GitTools::registerAll(toolRegistry()); + BashTools::registerAll(toolRegistry()); +} + +std::string BashAgent::getSystemPrompt() const { + return R"(You are an expert bash/shell scripting agent running locally via the GAIA framework on AMD hardware. You write, execute, debug, and explain shell scripts with precision. + +## SHELL CODING STANDARDS + +1. **POSIX-first**: Write POSIX sh-compatible code by default. Use bashisms (arrays, [[ ]], process substitution, etc.) ONLY when the shebang is explicitly #!/bin/bash or #!/usr/bin/env bash. + +2. **Safety pragmas**: In every non-trivial script (>3 lines), start with: + ```bash + set -euo pipefail + ``` + - `set -e`: Exit on first error + - `set -u`: Treat unset variables as errors + - `set -o pipefail`: Propagate pipe failures + +3. **Variable quoting**: ALWAYS double-quote variable expansions: + ```bash + # Correct + echo "$filename" + cp "$src" "$dst" + for f in "$@"; do + + # WRONG - word splitting, glob expansion + echo $filename + cp $src $dst + ``` + +4. **Shellcheck-clean code**: Write code that passes `shellcheck` without warnings. Common rules: + - SC2086: Double-quote variables + - SC2046: Quote command substitutions + - SC2006: Use $() instead of backticks + - SC2034: Don't leave variables unused + - SC2155: Declare and assign separately + +5. **Destructive operations**: For commands that can cause data loss or system damage, ALWAYS explain what will happen and ask for confirmation before executing: + - `rm -rf` — recursive delete + - `dd` — raw disk write + - `mkfs` — filesystem creation + - `chmod -R 777` — open permissions + - `chown -R` — ownership changes on system dirs + - `> file` — file truncation + - Pipe to `| sh` or `| bash` — arbitrary execution + +6. **Man page references**: When using non-obvious flags, cite the relevant man page section: + - `find -newer` — see find(1), TESTS section + - `tar --strip-components` — see tar(1) + - `grep -P` — Perl regex, see grep(1), -P flag (GNU only) + +## TOOL USAGE + +You have access to these tool categories: + +### File operations +- `file_read` — Read file contents with optional line range +- `file_write` — Create or overwrite files +- `file_edit` — Surgical search-and-replace in files +- `file_search` — Find files by glob pattern or content + +### Git operations +- `git_status` — Working tree status +- `git_diff` — Show changes (staged/unstaged) +- `git_log` — Recent commit history +- `git_show` — Show specific commits + +### Bash operations +- `bash_execute` — Run shell commands with timeout and output capture +- `env_inspect` — Detect shell, OS, PATH, and installed tools + +## WORKFLOW + +1. Start by understanding the environment: use `env_inspect` to check available tools +2. Read relevant files before modifying them +3. Use `bash_execute` to run commands — prefer small, focused commands over long pipelines +4. When writing scripts, use `file_write` to create them, then `bash_execute` to run them +5. After making changes, verify them (re-read files, check output) + +## RESPONSE STYLE + +- Be concise and precise — shell users value brevity +- Show the command AND explain what it does +- For complex pipelines, break them down step by step +- Always show expected output format when relevant +- Prefer standard POSIX utilities over GNU extensions when possible)"; +} + +} // namespace gaia diff --git a/cpp/agents/bash/bash_agent.h b/cpp/agents/bash/bash_agent.h new file mode 100644 index 000000000..faf658a6f --- /dev/null +++ b/cpp/agents/bash/bash_agent.h @@ -0,0 +1,39 @@ +// Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved. +// SPDX-License-Identifier: MIT +// +// BashAgent — a GAIA agent specialized for bash/shell scripting. +// Combines file I/O, git, and bash execution tools with a system prompt +// tuned for POSIX-correct, shellcheck-clean shell code. + +#pragma once + +#include +#include +#include + +namespace gaia { + +/// Bash coding agent — writes, executes, and debugs shell scripts. +/// +/// Registers: +/// - File I/O tools (read, write, edit, search) +/// - Git tools (status, diff, log, show) +/// - bash_execute (run commands with timeout) +/// - env_inspect (detect shell, OS, installed tools) +/// +/// System prompt enforces: +/// - POSIX-first coding style +/// - set -euo pipefail in non-trivial scripts +/// - Proper variable quoting +/// - Confirmation for destructive operations +class BashAgent : public Agent { +public: + explicit BashAgent(const AgentConfig& config = {}); + ~BashAgent() override; + +protected: + void registerTools() override; + std::string getSystemPrompt() const override; +}; + +} // namespace gaia diff --git a/cpp/agents/bash/bash_tools.cpp b/cpp/agents/bash/bash_tools.cpp new file mode 100644 index 000000000..e45fa3e0e --- /dev/null +++ b/cpp/agents/bash/bash_tools.cpp @@ -0,0 +1,266 @@ +// Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved. +// SPDX-License-Identifier: MIT + +#include "bash_tools.h" + +#include +#include +#include + +#include +#include + +#ifdef _WIN32 +#include +#endif + +namespace gaia { + +// --------------------------------------------------------------------------- +// Registration +// --------------------------------------------------------------------------- + +void BashTools::registerAll(ToolRegistry& registry) { + registry.registerTool(bashExecute()); + registry.registerTool(envInspect()); +} + +// --------------------------------------------------------------------------- +// bash_execute +// --------------------------------------------------------------------------- + +ToolInfo BashTools::bashExecute() { + ToolInfo info; + info.name = "bash_execute"; + info.description = + "Execute a shell command and return its output. " + "The command runs in the detected shell (bash preferred, sh fallback). " + "Output is truncated at 32 KB. Use timeout_ms to control the deadline."; + info.parameters = { + {"command", ToolParamType::STRING, /*required=*/true, + "The shell command to execute"}, + {"timeout_ms", ToolParamType::INTEGER, /*required=*/false, + "Timeout in milliseconds (default: 30000)"}, + }; + info.callback = doBashExecute; + info.policy = ToolPolicy::CONFIRM; + return info; +} + +json BashTools::doBashExecute(const json& args) { + // Extract arguments + std::string command = args.value("command", ""); + if (command.empty()) { + return {{"error", "command parameter is required"}}; + } + + int timeoutMs = args.value("timeout_ms", DEFAULT_TIMEOUT_MS); + if (timeoutMs <= 0) { + timeoutMs = DEFAULT_TIMEOUT_MS; + } + + // Detect the shell and build the full command + std::string shell = detectShell(); + std::string fullCommand; + +#ifdef _WIN32 + if (!shell.empty()) { + // Use detected bash/sh: wrap the command in shell -c "..." + // Escape double quotes in the command for the outer shell + std::string escaped = command; + // Replace \ with \\ and " with \" for the bash -c wrapper + std::string safeCmd; + safeCmd.reserve(escaped.size() + 16); + for (char c : escaped) { + if (c == '\\') { + safeCmd += "\\\\"; + } else if (c == '"') { + safeCmd += "\\\""; + } else { + safeCmd += c; + } + } + fullCommand = shell + " -c \"" + safeCmd + "\""; + } else { + // No bash/sh available — run via cmd.exe directly + fullCommand = "cmd.exe /C " + command; + } +#else + // POSIX: always use bash -c (or sh -c as fallback) + if (shell.empty()) { + shell = "sh"; + } + // Escape single quotes for POSIX shell: replace ' with '\'' + std::string safeCmd; + safeCmd.reserve(command.size() + 16); + for (char c : command) { + if (c == '\'') { + safeCmd += "'\\''"; + } else { + safeCmd += c; + } + } + fullCommand = shell + " -c '" + safeCmd + "'"; +#endif + + // Execute via ProcessRunner + ProcessResult result = ProcessRunner::run(fullCommand, timeoutMs, "", {}, MAX_OUTPUT_BYTES); + + // Truncate stdout/stderr if needed + std::string stdoutStr = result.stdout_output; + std::string stderrStr = result.stderr_output; + + static constexpr const char* TRUNCATION_MSG = "\n... [output truncated at 32 KB]"; + static const size_t TRUNC_LEN = std::strlen(TRUNCATION_MSG); + if (stdoutStr.size() > MAX_OUTPUT_BYTES) { + stdoutStr.resize(MAX_OUTPUT_BYTES - TRUNC_LEN); + stdoutStr += TRUNCATION_MSG; + } + if (stderrStr.size() > MAX_OUTPUT_BYTES) { + stderrStr.resize(MAX_OUTPUT_BYTES - TRUNC_LEN); + stderrStr += TRUNCATION_MSG; + } + + return { + {"stdout", stdoutStr}, + {"stderr", stderrStr}, + {"exit_code", result.exitCode}, + {"timed_out", result.timedOut}, + }; +} + +// --------------------------------------------------------------------------- +// env_inspect +// --------------------------------------------------------------------------- + +ToolInfo BashTools::envInspect() { + ToolInfo info; + info.name = "env_inspect"; + info.description = + "Inspect the shell environment: detect shell version, OS info, " + "PATH entries, and check for common developer tools " + "(shellcheck, bats, jq, yq, curl, git, docker)."; + info.parameters = {}; // no args + info.callback = doEnvInspect; + info.policy = ToolPolicy::ALLOW; + return info; +} + +json BashTools::doEnvInspect(const json& /*args*/) { + json result; + + // --- Shell version --- + std::string shellVersion; + try { + shellVersion = ProcessRunner::runOrThrow("bash --version", 5000); + // Take only the first line + auto nl = shellVersion.find('\n'); + if (nl != std::string::npos) { + shellVersion = shellVersion.substr(0, nl); + } + } catch (...) { + shellVersion = "bash not available"; + } + result["shell"] = shellVersion; + + // --- OS info --- + std::string osInfo; + try { +#ifdef _WIN32 + osInfo = ProcessRunner::runOrThrow("systeminfo | findstr /B /C:\"OS Name\" /C:\"OS Version\"", 10000); +#else + osInfo = ProcessRunner::runOrThrow("uname -a", 5000); +#endif + // Trim trailing whitespace + while (!osInfo.empty() && (osInfo.back() == '\n' || osInfo.back() == '\r')) { + osInfo.pop_back(); + } + } catch (...) { + osInfo = "unknown"; + } + result["os"] = osInfo; + + // --- PATH entries --- + json pathEntries = json::array(); + std::string pathVar; +#ifdef _WIN32 + pathVar = getEnvVar("PATH", ""); + char delimiter = ';'; +#else + pathVar = getEnvVar("PATH", ""); + char delimiter = ':'; +#endif + if (!pathVar.empty()) { + std::istringstream stream(pathVar); + std::string entry; + while (std::getline(stream, entry, delimiter)) { + if (!entry.empty()) { + pathEntries.push_back(entry); + } + } + } + result["path"] = pathEntries; + + // --- Installed tools --- + json tools = json::object(); + const std::vector toolNames = { + "shellcheck", "bats", "jq", "yq", "curl", "git", "docker" + }; + for (const auto& name : toolNames) { + tools[name] = isToolAvailable(name); + } + result["tools"] = tools; + + return result; +} + +// --------------------------------------------------------------------------- +// Shell detection +// --------------------------------------------------------------------------- + +std::string BashTools::detectShell() { +#ifdef _WIN32 + // On Windows, try these in order: + // 1. bash (Git Bash, MSYS2, WSL — typically on PATH) + // 2. sh (fallback) + if (isToolAvailable("bash")) { + return "bash"; + } + if (isToolAvailable("sh")) { + return "sh"; + } + // No POSIX shell found + return ""; +#else + // On POSIX, prefer bash, fall back to sh + if (isToolAvailable("bash")) { + return "bash"; + } + return "sh"; +#endif +} + +// --------------------------------------------------------------------------- +// Tool availability check +// --------------------------------------------------------------------------- + +bool BashTools::isToolAvailable(const std::string& toolName) { + if (toolName.empty()) { + return false; + } + +#ifdef _WIN32 + std::string cmd = "where " + toolName + " >nul 2>&1"; +#else + std::string cmd = "which " + toolName + " >/dev/null 2>&1"; +#endif + + try { + ProcessResult result = ProcessRunner::run(cmd, 3000); + return result.exitCode == 0; + } catch (...) { + return false; + } +} + +} // namespace gaia diff --git a/cpp/agents/bash/bash_tools.h b/cpp/agents/bash/bash_tools.h new file mode 100644 index 000000000..d1feb5764 --- /dev/null +++ b/cpp/agents/bash/bash_tools.h @@ -0,0 +1,64 @@ +// Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved. +// SPDX-License-Identifier: MIT +// +// Bash-specific tool callbacks for the GAIA BashAgent. +// Provides shell execution and environment inspection tools. + +#pragma once + +#include + +#include "gaia/export.h" +#include "gaia/tool_registry.h" +#include "gaia/types.h" + +namespace gaia { + +/// Bash-specific tool callbacks for the BashAgent. +/// +/// Provides two tools: +/// - bash_execute: Run a shell command with timeout and output capture +/// - env_inspect: Inspect the shell environment, OS, PATH, and installed tools +/// +/// Usage: +/// BashTools::registerAll(agent.toolRegistry()); +class BashTools { +public: + /// Register all bash tools with the given registry. + static void registerAll(ToolRegistry& registry); + + /// bash_execute: Execute a shell command. + /// Args: {"command": string, "timeout_ms"?: int (default 30000)} + /// Policy: CONFIRM (user must approve each command) + /// Returns: {"stdout": string, "stderr": string, "exit_code": int, "timed_out": bool} + /// On error: {"error": string} + static ToolInfo bashExecute(); + + /// env_inspect: Inspect the shell environment. + /// Args: {} (no args) + /// Policy: ALLOW (read-only inspection) + /// Returns: {"shell": string, "os": string, "path": [string], "tools": {"name": bool}} + static ToolInfo envInspect(); + +private: + // Implementation callbacks + static json doBashExecute(const json& args); + static json doEnvInspect(const json& args); + + /// Detect the best available shell on this system. + /// Returns the shell command prefix (e.g. "bash", "sh", "/usr/bin/bash"). + /// On Windows, checks for bash (WSL, Git Bash, MSYS2) then falls back to sh. + static std::string detectShell(); + + /// Check if a command is available on PATH. + /// Uses "which" on POSIX, "where" on Windows. + static bool isToolAvailable(const std::string& toolName); + + /// Maximum output size before truncation (32 KB). + static inline constexpr size_t MAX_OUTPUT_BYTES = 32768; + + /// Default command timeout in milliseconds. + static inline constexpr int DEFAULT_TIMEOUT_MS = 30000; +}; + +} // namespace gaia diff --git a/cpp/agents/bash/eval/bash_eval_adapter.py b/cpp/agents/bash/eval/bash_eval_adapter.py new file mode 100644 index 000000000..a2e6d8606 --- /dev/null +++ b/cpp/agents/bash/eval/bash_eval_adapter.py @@ -0,0 +1,383 @@ +# Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved. +# SPDX-License-Identifier: MIT + +"""Adapter for running gaia eval scenarios against the gaia-bash REST API. + +Usage: + # Run all scenarios against a running gaia-bash server + python bash_eval_adapter.py + + # Run against a specific server + python bash_eval_adapter.py --url http://localhost:8200 + + # Start the server automatically + python bash_eval_adapter.py --binary ./build/gaia-bash + + # Run a specific scenario + python bash_eval_adapter.py --scenario bash-tool-execute +""" + +import argparse +import json +import os +import subprocess +import sys +import time +from pathlib import Path + +try: + import requests +except ImportError: + print("Error: 'requests' package required. Install with: pip install requests") + sys.exit(1) + + +class BashEvalAdapter: + """Connects the GAIA eval framework to the gaia-bash API server.""" + + def __init__(self, base_url="http://localhost:8200", startup_timeout=30): + self.base_url = base_url.rstrip("/") + self.startup_timeout = startup_timeout + self.process = None + + def start_server(self, binary_path="./build/gaia-bash"): + """Start gaia-bash in --serve mode as a subprocess.""" + self.process = subprocess.Popen( + [binary_path, "--serve", "--port", "8200"], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + self._wait_for_health() + + def stop_server(self): + """Stop the gaia-bash server.""" + if self.process: + self.process.terminate() + try: + self.process.wait(timeout=5) + except subprocess.TimeoutExpired: + self.process.kill() + self.process.wait() + self.process = None + + def _wait_for_health(self): + """Wait for the server to be healthy.""" + for _ in range(self.startup_timeout): + try: + r = requests.get(f"{self.base_url}/health", timeout=1) + if r.status_code == 200: + return + except requests.ConnectionError: + pass + time.sleep(1) + raise RuntimeError( + f"gaia-bash server at {self.base_url} failed to start " + f"within {self.startup_timeout}s" + ) + + def health(self): + """Check server health.""" + r = requests.get(f"{self.base_url}/health", timeout=5) + r.raise_for_status() + return r.json() + + def send_query(self, prompt, timeout=120): + """Send a chat query and return the response.""" + r = requests.post( + f"{self.base_url}/v1/chat/completions", + json={ + "messages": [{"role": "user", "content": prompt}], + "stream": False, + }, + timeout=timeout, + ) + r.raise_for_status() + return r.json() + + def execute_tool(self, tool_name, args, timeout=30): + """Execute a specific tool directly.""" + r = requests.post( + f"{self.base_url}/v1/tools/{tool_name}", + json=args, + timeout=timeout, + ) + r.raise_for_status() + return r.json() + + def list_tools(self): + """List available tools.""" + r = requests.get(f"{self.base_url}/v1/tools", timeout=5) + r.raise_for_status() + return r.json() + + def run_scenario(self, scenario, ground_truth=None): + """Run a single eval scenario and return results.""" + scenario_id = scenario["id"] + prompt = scenario["prompt"] + + result = { + "scenario_id": scenario_id, + "category": scenario["category"], + "success": False, + "errors": [], + "response": None, + "tools_used": [], + } + + try: + response = self.send_query(prompt) + result["response"] = response + + # Extract response content + content = "" + if "choices" in response: + content = ( + response["choices"][0] + .get("message", {}) + .get("content", "") + ) + result["content"] = content + + # Validate against ground truth if provided + if ground_truth and scenario_id in ground_truth: + gt = ground_truth[scenario_id] + errors = self._validate_ground_truth(content, gt) + result["errors"] = errors + + result["success"] = len(result["errors"]) == 0 + + except requests.RequestException as e: + result["errors"].append(f"HTTP error: {e}") + result["success"] = False + except Exception as e: + result["errors"].append(f"Unexpected error: {e}") + result["success"] = False + + return result + + def _validate_ground_truth(self, content, gt): + """Validate response content against ground truth criteria.""" + errors = [] + content_lower = content.lower() + + # Check must_contain + if "must_contain" in gt: + must_contain_any = gt.get("must_contain_any", False) + found_any = False + for term in gt["must_contain"]: + if term.lower() in content_lower: + found_any = True + elif not must_contain_any: + errors.append(f"Missing required content: '{term}'") + if must_contain_any and not found_any: + errors.append( + f"Must contain at least one of: {gt['must_contain']}" + ) + + # Check must_not_contain + for term in gt.get("must_not_contain", []): + if term.lower() in content_lower: + errors.append(f"Contains forbidden content: '{term}'") + + # Check response_must_mention + for term in gt.get("response_must_mention", []): + if term.lower() not in content_lower: + errors.append(f"Response should mention: '{term}'") + + # Check response_must_contain + if "response_must_contain" in gt: + term = gt["response_must_contain"] + if term.lower() not in content_lower: + errors.append(f"Response must contain: '{term}'") + + # Note: expected_tools and tool_args_must_contain are soft checks. + # The API returns only the final answer, not the tool call trace, + # so we can't reliably verify which tools were used from the + # response content alone. These checks look for tool/arg names + # in the text but don't fail the scenario — they're informational. + # A future enhancement could parse structured tool call events. + + # Check error expectations + if gt.get("expect_error"): + if "error" not in content_lower: + errors.append("Expected error response but none found") + + if gt.get("expect_nonzero_exit"): + # Look for non-zero exit code indicators + has_nonzero = any( + indicator in content_lower + for indicator in ["exit code", "exit_code", "non-zero", "failed", "error"] + ) + if not has_nonzero: + errors.append("Expected non-zero exit code but not indicated") + + if gt.get("expect_timeout"): + if "timeout" not in content_lower and "timed_out" not in content_lower: + errors.append("Expected timeout but not indicated in response") + + return errors + + +def load_scenarios(path=None): + """Load eval scenarios from JSON file.""" + if path is None: + path = Path(__file__).parent / "bash_scenarios.json" + with open(path) as f: + return json.load(f)["scenarios"] + + +def load_ground_truth(path=None): + """Load ground truth from JSON file.""" + if path is None: + path = Path(__file__).parent / "bash_ground_truth.json" + with open(path) as f: + return json.load(f)["ground_truth"] + + +def run_eval( + base_url="http://localhost:8200", + binary_path=None, + scenario_filter=None, + verbose=False, +): + """Run the full bash agent evaluation. + + Args: + base_url: URL of a running gaia-bash API server. + binary_path: If set, start the server automatically. + scenario_filter: If set, only run scenarios matching this ID. + verbose: Print detailed output. + + Returns: + List of result dicts, one per scenario. + """ + adapter = BashEvalAdapter(base_url) + + if binary_path: + print(f"Starting gaia-bash server from {binary_path}...") + adapter.start_server(binary_path) + + try: + # Verify server is up + health = adapter.health() + print(f"Server healthy: {health}") + + tools = adapter.list_tools() + tool_count = len(tools.get("tools", [])) + print(f"Tools available: {tool_count}") + + scenarios = load_scenarios() + ground_truth = load_ground_truth() + + if scenario_filter: + scenarios = [s for s in scenarios if s["id"] == scenario_filter] + if not scenarios: + print(f"No scenario found with id: {scenario_filter}") + return [] + + results = [] + passed = 0 + failed = 0 + + for scenario in scenarios: + sid = scenario["id"] + cat = scenario["category"] + prompt_preview = scenario["prompt"][:60].replace("\n", " ") + + print(f"\n[{cat}] {sid}") + print(f" Prompt: {prompt_preview}...") + + result = adapter.run_scenario(scenario, ground_truth) + results.append(result) + + if result["success"] and not result["errors"]: + passed += 1 + print(f" PASS") + else: + failed += 1 + for err in result["errors"]: + print(f" FAIL: {err}") + + if verbose and result.get("content"): + preview = result["content"][:200].replace("\n", " ") + print(f" Response: {preview}...") + + # Summary + total = len(results) + print(f"\n{'=' * 60}") + print(f"Results: {passed}/{total} passed, {failed}/{total} failed") + print(f"{'=' * 60}") + + # Category breakdown + categories = {} + for r in results: + cat = r["category"] + if cat not in categories: + categories[cat] = {"passed": 0, "total": 0} + categories[cat]["total"] += 1 + if r["success"] and not r["errors"]: + categories[cat]["passed"] += 1 + + for cat, stats in sorted(categories.items()): + print(f" {cat}: {stats['passed']}/{stats['total']}") + + return results + + finally: + if binary_path: + adapter.stop_server() + + +def main(): + parser = argparse.ArgumentParser( + description="Run bash agent eval scenarios" + ) + parser.add_argument( + "--url", + default="http://localhost:8200", + help="gaia-bash API server URL (default: http://localhost:8200)", + ) + parser.add_argument( + "--binary", + default=None, + help="Path to gaia-bash binary (starts server automatically)", + ) + parser.add_argument( + "--scenario", + default=None, + help="Run a specific scenario by ID", + ) + parser.add_argument( + "--verbose", "-v", + action="store_true", + help="Print detailed output", + ) + parser.add_argument( + "--json-output", + default=None, + help="Write results to JSON file", + ) + + args = parser.parse_args() + + results = run_eval( + base_url=args.url, + binary_path=args.binary, + scenario_filter=args.scenario, + verbose=args.verbose, + ) + + if args.json_output: + with open(args.json_output, "w") as f: + json.dump(results, f, indent=2) + print(f"\nResults written to {args.json_output}") + + # Exit with non-zero if any scenario failed + all_passed = all( + r.get("success") and not r.get("errors") for r in results + ) + sys.exit(0 if all_passed else 1) + + +if __name__ == "__main__": + main() diff --git a/cpp/agents/bash/eval/bash_ground_truth.json b/cpp/agents/bash/eval/bash_ground_truth.json new file mode 100644 index 000000000..5996003c8 --- /dev/null +++ b/cpp/agents/bash/eval/bash_ground_truth.json @@ -0,0 +1,120 @@ +{ + "version": 1, + "description": "Ground truth and acceptance criteria for bash agent eval scenarios", + "ground_truth": { + "bash-write-dedup": { + "must_contain": ["set -e", "md5sum", "find"], + "must_not_contain": ["eval"], + "expected_tools": ["file_write", "bash_execute"], + "shellcheck_max_warnings": 2 + }, + "bash-write-backup": { + "must_contain": ["rsync", "--link-dest"], + "must_not_contain": ["eval", "rm -rf /"], + "expected_tools": ["file_write"], + "shellcheck_max_warnings": 2 + }, + "bash-write-csv": { + "must_contain": ["awk", "cut"], + "must_contain_any": true, + "expected_tools": ["file_write"], + "shellcheck_max_warnings": 2 + }, + "bash-write-cron": { + "must_contain": ["crontab"], + "expected_tools": ["file_write"], + "shellcheck_max_warnings": 2 + }, + "bash-write-logrotate": { + "must_contain": ["find", "-mtime", "gzip"], + "expected_tools": ["file_write"], + "shellcheck_max_warnings": 2 + }, + "bash-write-parallel-dl": { + "must_contain": ["xargs", "curl"], + "expected_tools": ["file_write"], + "shellcheck_max_warnings": 2 + }, + "bash-write-systemd": { + "must_contain": ["[Unit]", "[Service]", "[Install]"], + "expected_tools": ["file_write"], + "shellcheck_max_warnings": 0 + }, + "bash-write-sshkey": { + "must_contain": ["ssh"], + "expected_tools": ["file_write"], + "shellcheck_max_warnings": 2 + }, + "bash-review-injection": { + "must_contain": ["eval", "injection", "quote"], + "must_contain_any": false, + "expected_tools": [], + "response_must_mention": ["command injection", "unquoted"] + }, + "bash-review-portability": { + "response_must_mention": ["bashism", "POSIX", "declare", "[["], + "expected_tools": [] + }, + "bash-review-performance": { + "response_must_mention": ["cat", "awk"], + "expected_tools": [] + }, + "bash-review-errors": { + "response_must_mention": ["set -e", "error", "cd"], + "expected_tools": [] + }, + "bash-review-race": { + "response_must_mention": ["race", "mktemp", "TOCTOU"], + "expected_tools": [] + }, + "bash-tool-filesearch": { + "expected_tools": ["file_search"], + "tool_args_must_contain": {"pattern": "*.py"} + }, + "bash-tool-gitlog": { + "expected_tools": ["git_log"], + "tool_args_must_contain": {"count": 5} + }, + "bash-tool-fileread": { + "expected_tools": ["file_read"], + "tool_args_must_contain": {"path": "cpp/CMakeLists.txt"} + }, + "bash-tool-execute": { + "expected_tools": ["bash_execute"], + "response_must_contain": "hello world" + }, + "bash-tool-envcheck": { + "expected_tools": ["env_inspect"] + }, + "bash-tool-search-todo": { + "expected_tools": ["file_search"], + "tool_args_must_contain": {"content_pattern": "TODO"} + }, + "bash-tool-gitdiff": { + "expected_tools": ["git_diff"], + "tool_args_must_contain": {"staged": true} + }, + "bash-error-nonexistent": { + "expected_tools": ["bash_execute"], + "expect_error": true, + "expect_nonzero_exit": true + }, + "bash-error-timeout": { + "expected_tools": ["bash_execute"], + "expect_timeout": true + }, + "bash-error-readonly": { + "expected_tools": ["file_write"], + "expect_error": true + }, + "bash-posix-convert": { + "must_not_contain": ["declare", "[[", "$(<"], + "must_contain": ["#!/bin/sh"], + "expected_tools": ["file_write"] + }, + "bash-posix-explain": { + "response_must_mention": ["POSIX", "test", "keyword"], + "expected_tools": [] + } + } +} diff --git a/cpp/agents/bash/eval/bash_scenarios.json b/cpp/agents/bash/eval/bash_scenarios.json new file mode 100644 index 000000000..4a946b5c3 --- /dev/null +++ b/cpp/agents/bash/eval/bash_scenarios.json @@ -0,0 +1,293 @@ +{ + "version": 1, + "description": "Bash coding agent evaluation scenarios", + "scenarios": [ + { + "id": "bash-write-dedup", + "category": "script_writing", + "prompt": "Write a script that finds duplicate files by MD5 checksum in a given directory", + "tools_expected": ["file_write", "bash_execute"], + "acceptance_criteria": [ + "Uses set -euo pipefail or equivalent error handling", + "Uses find + md5sum or shasum for checksumming", + "Handles filenames with spaces", + "Groups and reports duplicates", + "Accepts directory as argument with sensible default" + ] + }, + { + "id": "bash-write-backup", + "category": "script_writing", + "prompt": "Write a backup script that uses rsync with incremental snapshots and date-stamped directories", + "tools_expected": ["file_write"], + "acceptance_criteria": [ + "Uses rsync with --link-dest for incremental backups", + "Creates date-stamped snapshot directories", + "Handles errors and reports failures", + "Configurable source and destination" + ] + }, + { + "id": "bash-write-csv", + "category": "script_writing", + "prompt": "Write a script to parse a CSV file and extract a specific column by number", + "tools_expected": ["file_write"], + "acceptance_criteria": [ + "Uses awk or cut for column extraction", + "Accepts column number as argument", + "Handles CSV with headers option", + "Handles edge cases like empty fields" + ] + }, + { + "id": "bash-write-cron", + "category": "script_writing", + "prompt": "Write a cron job manager script that can add, remove, and list crontab entries", + "tools_expected": ["file_write"], + "acceptance_criteria": [ + "Uses crontab -l and crontab - for read/write", + "Supports add, remove, and list subcommands", + "Uses getopts or case for argument parsing", + "Validates cron expression format" + ] + }, + { + "id": "bash-write-logrotate", + "category": "script_writing", + "prompt": "Write a log rotation script that compresses old logs and enforces a retention period", + "tools_expected": ["file_write"], + "acceptance_criteria": [ + "Uses find -mtime for age-based selection", + "Compresses with gzip or xz", + "Configurable retention days and log directory", + "Reports what was compressed and deleted" + ] + }, + { + "id": "bash-write-parallel-dl", + "category": "script_writing", + "prompt": "Write a script that downloads a list of URLs in parallel using xargs and curl", + "tools_expected": ["file_write"], + "acceptance_criteria": [ + "Uses xargs -P for parallel execution", + "Uses curl with retry and timeout flags", + "Reads URLs from file or stdin", + "Reports success/failure per URL" + ] + }, + { + "id": "bash-write-systemd", + "category": "script_writing", + "prompt": "Write a script that generates a systemd service file for a given binary", + "tools_expected": ["file_write"], + "acceptance_criteria": [ + "Generates valid [Unit], [Service], [Install] sections", + "Accepts binary path and service name as arguments", + "Sets sensible defaults (restart, user, working dir)", + "Outputs to /etc/systemd/system/ or stdout" + ] + }, + { + "id": "bash-write-sshkey", + "category": "script_writing", + "prompt": "Write a script to deploy SSH keys to multiple remote hosts from a hosts file", + "tools_expected": ["file_write"], + "acceptance_criteria": [ + "Reads hosts from a file (one per line)", + "Uses ssh-copy-id or manual key deployment", + "Handles connection failures gracefully", + "Reports success/failure per host" + ] + }, + { + "id": "bash-review-injection", + "category": "script_review", + "prompt": "Review this script for security issues:\n\n#!/bin/bash\nUSER_INPUT=$1\nresult=$(eval \"echo $USER_INPUT\")\nfiles=$(ls $result)\nfor f in $files; do\n cat $f\ndone", + "tools_expected": ["file_read"], + "acceptance_criteria": [ + "Identifies eval with unsanitized input as command injection", + "Identifies unquoted variable expansions", + "Recommends quoting all variables", + "Suggests removing eval entirely" + ] + }, + { + "id": "bash-review-portability", + "category": "script_review", + "prompt": "Review this script for portability issues:\n\n#!/bin/sh\ndeclare -A counts\nfor f in $(ls *.txt); do\n if [[ -f \"$f\" ]]; then\n counts[$f]=$(wc -l < \"$f\")\n fi\ndone\necho \"${!counts[@]}\"", + "tools_expected": ["file_read"], + "acceptance_criteria": [ + "Identifies declare -A as bash-only (not POSIX sh)", + "Identifies [[ ]] as bashism", + "Identifies ${!array[@]} as bashism", + "Suggests POSIX alternatives for each" + ] + }, + { + "id": "bash-review-performance", + "category": "script_review", + "prompt": "Review this script for performance:\n\n#!/bin/bash\ncat access.log | grep 'ERROR' | grep -v 'DEBUG' | awk '{print $1}' | sort | uniq -c | sort -rn | head -10", + "tools_expected": [], + "acceptance_criteria": [ + "Identifies useless use of cat", + "Suggests combining grep + awk into single awk", + "Notes sort | uniq -c can be sort -u or awk", + "Provides optimized one-liner" + ] + }, + { + "id": "bash-review-errors", + "category": "script_review", + "prompt": "Review this script for error handling:\n\n#!/bin/bash\ncd /tmp/workdir\nrm -rf output/\nmkdir output\ncp important.dat output/\nprocess_data output/important.dat > output/result.txt\necho \"Done\"", + "tools_expected": [], + "acceptance_criteria": [ + "Identifies missing set -e or error checking after cd", + "Notes rm -rf without confirming directory exists", + "Warns cd could fail silently leaving us in wrong directory", + "Recommends set -euo pipefail or explicit checks" + ] + }, + { + "id": "bash-review-race", + "category": "script_review", + "prompt": "Review this script for race conditions:\n\n#!/bin/bash\nTMPFILE=/tmp/myapp_$$\nif [ ! -f $TMPFILE ]; then\n echo \"data\" > $TMPFILE\nfi\ncat $TMPFILE\nrm $TMPFILE", + "tools_expected": [], + "acceptance_criteria": [ + "Identifies TOCTOU race between test and write", + "Notes predictable temp filename from PID", + "Recommends mktemp for safe temp file creation", + "Suggests trap for cleanup on exit" + ] + }, + { + "id": "bash-tool-filesearch", + "category": "tool_usage", + "prompt": "List all Python files in the current directory recursively", + "tools_expected": ["file_search"], + "acceptance_criteria": [ + "Uses file_search tool with pattern *.py", + "Returns file paths in results" + ] + }, + { + "id": "bash-tool-gitlog", + "category": "tool_usage", + "prompt": "Show me the git log for the last 5 commits", + "tools_expected": ["git_log"], + "acceptance_criteria": [ + "Uses git_log tool with count=5", + "Returns commit history" + ] + }, + { + "id": "bash-tool-fileread", + "category": "tool_usage", + "prompt": "Read the first 20 lines of the file cpp/CMakeLists.txt", + "tools_expected": ["file_read"], + "acceptance_criteria": [ + "Uses file_read with start_line and end_line parameters", + "Returns the requested lines" + ] + }, + { + "id": "bash-tool-execute", + "category": "tool_usage", + "prompt": "Run 'echo hello world' and show me the output", + "tools_expected": ["bash_execute"], + "acceptance_criteria": [ + "Uses bash_execute tool", + "Returns stdout containing 'hello world'", + "Exit code is 0" + ] + }, + { + "id": "bash-tool-envcheck", + "category": "tool_usage", + "prompt": "Check what development tools are installed on this system", + "tools_expected": ["env_inspect"], + "acceptance_criteria": [ + "Uses env_inspect tool", + "Reports shell version and installed tools" + ] + }, + { + "id": "bash-tool-search-todo", + "category": "tool_usage", + "prompt": "Search for TODO comments in all shell scripts in this repository", + "tools_expected": ["file_search"], + "acceptance_criteria": [ + "Uses file_search with content_pattern for TODO", + "Filters to *.sh files" + ] + }, + { + "id": "bash-tool-gitdiff", + "category": "tool_usage", + "prompt": "Show the git diff for staged changes", + "tools_expected": ["git_diff"], + "acceptance_criteria": [ + "Uses git_diff tool with staged=true", + "Returns diff output" + ] + }, + { + "id": "bash-error-nonexistent", + "category": "error_handling", + "prompt": "Run this command: ls /nonexistent/path/that/does/not/exist", + "tools_expected": ["bash_execute"], + "acceptance_criteria": [ + "Uses bash_execute", + "Returns non-zero exit code", + "Error message in stderr", + "Does not crash the agent" + ] + }, + { + "id": "bash-error-timeout", + "category": "error_handling", + "prompt": "Execute 'sleep 60' with a 2-second timeout", + "tools_expected": ["bash_execute"], + "acceptance_criteria": [ + "Uses bash_execute with timeout_ms parameter", + "Returns timed_out=true", + "Process is killed after timeout" + ] + }, + { + "id": "bash-error-readonly", + "category": "error_handling", + "prompt": "Try to write the text 'test' to /etc/shadow", + "tools_expected": ["file_write"], + "acceptance_criteria": [ + "Returns error about permission denied", + "Does not crash the agent", + "Does not actually modify /etc/shadow" + ] + }, + { + "id": "bash-posix-convert", + "category": "posix_compliance", + "prompt": "Convert this bash script to POSIX sh:\n\n#!/bin/bash\ndeclare -a files\nfor f in *.txt; do\n if [[ -f \"$f\" ]]; then\n content=$(<\"$f\")\n files+=(\"$f\")\n fi\ndone\necho \"Found ${#files[@]} files\"", + "tools_expected": ["file_write"], + "acceptance_criteria": [ + "Replaces declare -a with positional params or manual list", + "Replaces [[ ]] with [ ]", + "Replaces $( Resume a saved session +// gaia-bash --list-sessions List saved sessions and exit +// gaia-bash --model Override the default model +// gaia-bash --no-tui Force CleanConsole output +// gaia-bash --json-events Emit JSONL events to stdout (for TUI/WebUI) +// gaia-bash --query "text" Non-interactive single query (pair with --json-events) +// gaia-bash --debug Enable debug logging + +#include +#include +#include +#include + +#include "api_server.h" +#include "bash_agent.h" +#include "bash_tools.h" +#include "mcp_server.h" + +#include +#include +#include +#include +#include + +namespace color = gaia::color; + +// --------------------------------------------------------------------------- +// Argument parsing helpers +// --------------------------------------------------------------------------- + +/// Print usage information and exit. +static void printUsage(const char* progName) { + std::cout << color::BOLD << "gaia-bash" << color::RESET + << " — GAIA Bash Agent\n\n" + << color::BOLD << "Usage:" << color::RESET << "\n" + << " " << progName << " Interactive mode (default)\n" + << " " << progName << " \"\" Single query mode\n" + << " " << progName << " --print Pipe mode (no TUI)\n" + << " " << progName << " --serve [--port N] API server (default port 8200)\n" + << " " << progName << " --mcp MCP server (stdio JSON-RPC)\n" + << " " << progName << " --resume Resume a saved session\n" + << " " << progName << " --list-sessions List saved sessions\n" + << " " << progName << " --model Override model\n" + << " " << progName << " --no-tui Force plain console output\n" + << " " << progName << " --json-events JSONL events to stdout (for TUI/WebUI)\n" + << " " << progName << " --query \"\" Non-interactive query (use with --json-events)\n" + << " " << progName << " --debug Enable debug logging\n" + << " " << progName << " --help Show this help\n"; +} + +/// List saved sessions and exit. +static int listSessions() { + gaia::SessionStore store; + auto sessions = store.list(); + + if (sessions.empty()) { + std::cout << color::GRAY << "No saved sessions." << color::RESET << "\n"; + return 0; + } + + std::cout << color::BOLD << "Saved sessions:" << color::RESET << "\n\n"; + for (const auto& s : sessions) { + std::cout << color::CYAN << " " << s.id << color::RESET + << color::GRAY << " (" << s.messageCount << " messages, " + << s.timestamp << ")" << color::RESET << "\n" + << " " << s.preview << "\n\n"; + } + return 0; +} + +// --------------------------------------------------------------------------- +// main +// --------------------------------------------------------------------------- + +int main(int argc, char* argv[]) { + try { + // Parse arguments + std::string query; + std::string resumeId; + std::string modelOverride; + int port = 0; + bool printMode = false; + bool serveMode = false; + bool mcpMode = false; + bool noTui = false; + bool jsonEvents = false; + std::string queryArg; + bool debug = false; + bool showHelp = false; + bool listSessionsFlag = false; + + for (int i = 1; i < argc; ++i) { + std::string arg = argv[i]; + + if (arg == "--help" || arg == "-h") { + showHelp = true; + } else if (arg == "--print") { + printMode = true; + } else if (arg == "--serve") { + serveMode = true; + } else if (arg == "--port") { + if (i + 1 < argc) { + try { + port = std::stoi(argv[++i]); + } catch (...) { + std::cerr << color::RED << "Error: --port requires a numeric value" + << color::RESET << "\n"; + return 1; + } + } else { + std::cerr << color::RED << "Error: --port requires a value" + << color::RESET << "\n"; + return 1; + } + } else if (arg == "--mcp") { + mcpMode = true; + } else if (arg == "--resume") { + if (i + 1 < argc) { + resumeId = argv[++i]; + } else { + std::cerr << color::RED << "Error: --resume requires a session ID" + << color::RESET << "\n"; + return 1; + } + } else if (arg == "--list-sessions") { + listSessionsFlag = true; + } else if (arg == "--model") { + if (i + 1 < argc) { + modelOverride = argv[++i]; + } else { + std::cerr << color::RED << "Error: --model requires a model name" + << color::RESET << "\n"; + return 1; + } + } else if (arg == "--no-tui") { + noTui = true; + } else if (arg == "--json-events") { + jsonEvents = true; + } else if (arg == "--query") { + if (i + 1 < argc) { + queryArg = argv[++i]; + } else { + std::cerr << color::RED << "Error: --query requires a value" + << color::RESET << "\n"; + return 1; + } + } else if (arg == "--debug") { + debug = true; + } else if (arg[0] == '-') { + std::cerr << color::RED << "Unknown option: " << arg + << color::RESET << "\n"; + printUsage(argv[0]); + return 1; + } else { + // Positional argument = query + if (query.empty()) { + query = arg; + } else { + // Append additional positional args with spaces + query += " "; + query += arg; + } + } + } + + // Handle help + if (showHelp) { + printUsage(argv[0]); + return 0; + } + + // Handle --list-sessions + if (listSessionsFlag) { + return listSessions(); + } + + // Handle --serve (API server mode) + if (serveMode) { + int serverPort = (port > 0) ? port : 8200; + + gaia::AgentConfig apiConfig; + apiConfig.debug = debug; + apiConfig.contextSize = 32768; + apiConfig.modelId = "Gemma-4-E4B-it-GGUF"; + if (!modelOverride.empty()) apiConfig.modelId = modelOverride; + + gaia::BashAgent apiAgent(apiConfig); + // API server has no stdin — auto-allow all tool confirmations + apiAgent.setToolConfirmCallback( + [](const std::string&, const gaia::json&) { + return gaia::ToolConfirmResult::ALLOW_ONCE; + }); + gaia::ApiServer server(apiAgent, serverPort); + server.setSessionStore(std::make_shared()); + + std::cerr << color::GREEN << color::BOLD << "gaia-bash" + << color::RESET << " API server starting on port " + << serverPort << "\n"; + server.run(); // blocking + return 0; + } + + // Handle --mcp (MCP stdio server mode) + if (mcpMode) { + gaia::AgentConfig mcpConfig; + mcpConfig.debug = debug; + mcpConfig.silentMode = true; // no console output on stdout + mcpConfig.contextSize = 32768; + mcpConfig.modelId = "Gemma-4-E4B-it-GGUF"; + if (!modelOverride.empty()) mcpConfig.modelId = modelOverride; + + gaia::BashAgent mcpAgent(mcpConfig); + // In MCP mode, the external agent handles safety — auto-allow all tools + mcpAgent.setToolConfirmCallback( + [](const std::string&, const gaia::json&) { + return gaia::ToolConfirmResult::ALLOW_ONCE; + }); + gaia::McpServer mcpServer(mcpAgent); + + mcpServer.run(); // blocking, reads stdin + return 0; + } + + // Handle --json-events mode (JSONL subprocess for TUI/WebUI) + if (jsonEvents) { + gaia::AgentConfig jeConfig; + jeConfig.debug = debug; + jeConfig.contextSize = 32768; + jeConfig.streaming = false; // avoid raw JSON tokens in output + jeConfig.structuredEvents = true; // emit thought/goal/answer events + if (!modelOverride.empty()) jeConfig.modelId = modelOverride; + + gaia::BashAgent jeAgent(jeConfig); + jeAgent.setOutputHandler(std::make_unique()); + // In JSON events mode, auto-allow all tools (TUI handles confirmation) + jeAgent.setToolConfirmCallback( + [](const std::string&, const gaia::json&) { + return gaia::ToolConfirmResult::ALLOW_ONCE; + }); + + // --query "text" runs a single query + std::string jeQuery = queryArg.empty() ? query : queryArg; + if (!jeQuery.empty()) { + jeAgent.processQuery(jeQuery); + return 0; + } + + // Interactive JSONL mode: read queries from stdin, emit events to stdout. + // Each line on stdin is a user query; events go to stdout as JSONL. + std::string line; + while (std::getline(std::cin, line)) { + if (line.empty()) continue; + jeAgent.processQuery(line); + } + return 0; + } + + // Build agent config + gaia::AgentConfig config; + config.debug = debug; + config.contextSize = 32768; // bash agent needs 32K for system prompt + tools + config.modelId = "Gemma-4-E4B-it-GGUF"; + + if (!modelOverride.empty()) { + config.modelId = modelOverride; + } + + // --print implies --no-tui and auto-allows tools (no interactive stdin) + if (printMode) { + noTui = true; + } + + // Create agent + gaia::BashAgent agent(config); + + // In pipe/print mode, auto-allow all tools since there's no stdin for confirmation + if (printMode) { + agent.setToolConfirmCallback( + [](const std::string&, const gaia::json&) { + return gaia::ToolConfirmResult::ALLOW_ONCE; + }); + } + + // Set up the REPL + gaia::ReplRunner repl(agent); + repl.setSessionStore(std::make_shared()); + + if (!resumeId.empty()) { + repl.setResumeId(resumeId); + } + + if (noTui) { + repl.setUseTui(false); + } + + // Register bash-specific slash commands + repl.addCommand("/run", "Execute a bash command directly", + [](const std::string& args, gaia::Agent& a) { + if (args.empty()) { + a.console().printWarning("Usage: /run "); + return; + } + // Execute directly via bash_execute tool + gaia::json toolArgs = {{"command", args}}; + auto result = a.toolRegistry().executeTool("bash_execute", toolArgs); + if (result.contains("error")) { + a.console().printError(result["error"].get()); + } else { + std::string output; + if (result.contains("stdout") && !result["stdout"].get().empty()) { + output = result["stdout"].get(); + } + if (result.contains("stderr") && !result["stderr"].get().empty()) { + if (!output.empty()) output += "\n"; + output += result["stderr"].get(); + } + if (!output.empty()) { + a.console().printInfo(output); + } + int exitCode = result.value("exit_code", -1); + if (exitCode != 0) { + a.console().printWarning("Exit code: " + std::to_string(exitCode)); + } + } + }); + + repl.addCommand("/env", "Show environment info (shell, OS, tools)", + [](const std::string& /*args*/, gaia::Agent& a) { + auto result = a.toolRegistry().executeTool("env_inspect", gaia::json::object()); + if (result.contains("error")) { + a.console().printError(result["error"].get()); + } else { + // Print formatted environment info directly to stdout + // (printInfo is a no-op in CleanConsole, so use cout) + if (result.contains("shell")) { + std::cout << gaia::color::CYAN << " Shell: " + << gaia::color::RESET << result["shell"].get() << std::endl; + } + if (result.contains("os")) { + std::string os = result["os"].get(); + auto cr = os.find('\r'); + if (cr != std::string::npos) os = os.substr(0, cr); + std::cout << gaia::color::CYAN << " OS: " + << gaia::color::RESET << os << std::endl; + } + if (result.contains("tools") && result["tools"].is_object()) { + std::string installed, missing; + for (auto& [name, avail] : result["tools"].items()) { + if (avail.get()) { + if (!installed.empty()) installed += ", "; + installed += name; + } else { + if (!missing.empty()) missing += ", "; + missing += name; + } + } + if (!installed.empty()) { + std::cout << gaia::color::GREEN << " Tools: " + << gaia::color::RESET << installed << std::endl; + } + if (!missing.empty()) { + a.console().printWarning("Not found: " + missing); + } + } + } + }); + + // Single query mode + if (!query.empty()) { + return repl.runOnce(query); + } + + // Interactive mode + repl.run(); + return 0; + + } catch (const std::exception& e) { + std::cerr << color::RED << color::BOLD << "Fatal error: " + << color::RESET << color::RED << e.what() + << color::RESET << "\n"; + return 1; + } +} diff --git a/cpp/agents/bash/mcp_server.cpp b/cpp/agents/bash/mcp_server.cpp new file mode 100644 index 000000000..4324ec88f --- /dev/null +++ b/cpp/agents/bash/mcp_server.cpp @@ -0,0 +1,283 @@ +// Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved. +// SPDX-License-Identifier: MIT + +#include "mcp_server.h" + +#include "gaia/agent.h" +#include "gaia/tool_registry.h" + +#include +#include + +namespace gaia { + +McpServer::McpServer(Agent& agent) : agent_(agent) {} + +// --------------------------------------------------------------------------- +// run() — main stdio loop +// --------------------------------------------------------------------------- + +void McpServer::run() { + // All debug/status output goes to stderr — stdout is the MCP transport. + std::cerr << "[gaia-bash] MCP server started, reading from stdin..." << std::endl; + + std::string line; + while (std::getline(std::cin, line)) { + if (line.empty()) continue; + + try { + json request = json::parse(line); + json response = handleRequest(request); + std::cout << response.dump() << std::endl; + std::cout.flush(); + } catch (const json::parse_error& e) { + json error = { + {"jsonrpc", "2.0"}, + {"id", nullptr}, + {"error", {{"code", -32700}, {"message", std::string("Parse error: ") + e.what()}}} + }; + std::cout << error.dump() << std::endl; + std::cout.flush(); + } catch (const std::exception& e) { + json error = { + {"jsonrpc", "2.0"}, + {"id", nullptr}, + {"error", {{"code", -32603}, {"message", std::string("Internal error: ") + e.what()}}} + }; + std::cout << error.dump() << std::endl; + std::cout.flush(); + } + } + + std::cerr << "[gaia-bash] MCP server shutting down (stdin closed)" << std::endl; +} + +// --------------------------------------------------------------------------- +// handleRequest — dispatch by method +// --------------------------------------------------------------------------- + +json McpServer::handleRequest(const json& request) { + auto id = request.value("id", json(nullptr)); + std::string method = request.value("method", std::string("")); + json params = request.value("params", json::object()); + + json result; + + if (method == "initialize") { + result = handleInitialize(params); + } else if (method == "tools/list") { + result = handleToolsList(params); + } else if (method == "tools/call") { + result = handleToolsCall(params); + } else if (method == "prompts/list") { + result = handlePromptsList(params); + } else if (method == "prompts/get") { + result = handlePromptsGet(params); + } else if (method == "notifications/initialized") { + // Client acknowledgement — no response needed, but return empty result + return json{{"jsonrpc", "2.0"}, {"id", id}, {"result", json::object()}}; + } else { + return json{ + {"jsonrpc", "2.0"}, + {"id", id}, + {"error", {{"code", -32601}, {"message", "Method not found: " + method}}} + }; + } + + return json{{"jsonrpc", "2.0"}, {"id", id}, {"result", result}}; +} + +// --------------------------------------------------------------------------- +// initialize +// --------------------------------------------------------------------------- + +json McpServer::handleInitialize(const json& /*params*/) { + return json{ + {"protocolVersion", "2024-11-05"}, + {"capabilities", { + {"tools", json::object()}, + {"prompts", json::object()} + }}, + {"serverInfo", { + {"name", "gaia-bash"}, + {"version", "0.1.0"} + }} + }; +} + +// --------------------------------------------------------------------------- +// tools/list +// --------------------------------------------------------------------------- + +json McpServer::handleToolsList(const json& /*params*/) { + json tools = json::array(); + for (const auto& [name, info] : agent_.tools().allTools()) { + if (!info.enabled) continue; + tools.push_back(toolInfoToMcp(info)); + } + return json{{"tools", tools}}; +} + +json McpServer::toolInfoToMcp(const ToolInfo& tool) { + // Build JSON Schema for inputSchema + json properties = json::object(); + json required = json::array(); + + for (const auto& param : tool.parameters) { + json prop = { + {"type", paramTypeToJsonSchema(param.type)}, + {"description", param.description} + }; + properties[param.name] = prop; + if (param.required) { + required.push_back(param.name); + } + } + + json inputSchema = { + {"type", "object"}, + {"properties", properties} + }; + if (!required.empty()) { + inputSchema["required"] = required; + } + + return json{ + {"name", tool.name}, + {"description", tool.description}, + {"inputSchema", inputSchema} + }; +} + +std::string McpServer::paramTypeToJsonSchema(ToolParamType type) { + switch (type) { + case ToolParamType::STRING: return "string"; + case ToolParamType::INTEGER: return "integer"; + case ToolParamType::NUMBER: return "number"; + case ToolParamType::BOOLEAN: return "boolean"; + case ToolParamType::ARRAY: return "array"; + case ToolParamType::OBJECT: return "object"; + case ToolParamType::UNKNOWN: return "string"; + } + return "string"; +} + +// --------------------------------------------------------------------------- +// tools/call +// --------------------------------------------------------------------------- + +json McpServer::handleToolsCall(const json& params) { + std::string name = params.value("name", std::string("")); + json arguments = params.value("arguments", json::object()); + + if (name.empty()) { + return json{ + {"content", json::array({json{{"type", "text"}, {"text", "Error: tool name is required"}}})}, + {"isError", true} + }; + } + + std::cerr << "[gaia-bash] tools/call: " << name << std::endl; + + json result = agent_.toolRegistry().executeTool(name, arguments); + + // Check if the tool returned an error (two patterns: {"status":"error"} or {"error":"..."}) + bool isError = (result.contains("status") && result["status"] == "error") + || (result.contains("error") && !result.contains("success")); + + std::string resultText = result.dump(2); + + return json{ + {"content", json::array({json{{"type", "text"}, {"text", resultText}}})}, + {"isError", isError} + }; +} + +// --------------------------------------------------------------------------- +// prompts/list +// --------------------------------------------------------------------------- + +json McpServer::handlePromptsList(const json& /*params*/) { + json prompts = json::array(); + + prompts.push_back(json{ + {"name", "review-script"}, + {"description", "Multi-pass code review of a bash script (correctness, security, portability, performance, style)"}, + {"arguments", json::array({json{{"name", "path"}, {"description", "Path to the script to review"}, {"required", true}}})} + }); + + prompts.push_back(json{ + {"name", "generate-bats-test"}, + {"description", "Generate BATS test cases for a bash script"}, + {"arguments", json::array({json{{"name", "path"}, {"description", "Path to the script to test"}, {"required", true}}})} + }); + + prompts.push_back(json{ + {"name", "explain-command"}, + {"description", "Explain a bash command or one-liner in detail"}, + {"arguments", json::array({json{{"name", "command"}, {"description", "The command to explain"}, {"required", true}}})} + }); + + prompts.push_back(json{ + {"name", "posix-check"}, + {"description", "Check a bash script for POSIX compliance and flag bashisms"}, + {"arguments", json::array({json{{"name", "path"}, {"description", "Path to the script to check"}, {"required", true}}})} + }); + + return json{{"prompts", prompts}}; +} + +// --------------------------------------------------------------------------- +// prompts/get +// --------------------------------------------------------------------------- + +json McpServer::handlePromptsGet(const json& params) { + std::string name = params.value("name", std::string("")); + json arguments = params.value("arguments", json::object()); + + std::string promptText; + + if (name == "review-script") { + std::string path = arguments.value("path", std::string("")); + promptText = "Perform a thorough multi-pass code review of the bash script at '" + path + + "'. Analyze for: 1) Correctness (logic errors, edge cases), " + "2) Security (injection, unquoted vars, eval), " + "3) Portability (bashisms in #!/bin/sh), " + "4) Performance (unnecessary subshells, useless cat), " + "5) Style (ShellCheck compliance, naming)."; + } else if (name == "generate-bats-test") { + std::string path = arguments.value("path", std::string("")); + promptText = "Generate comprehensive BATS test cases for the bash script at '" + path + + "'. Cover: happy path, error cases (missing args, bad input), " + "edge cases (empty input, spaces in filenames), and exit code verification."; + } else if (name == "explain-command") { + std::string command = arguments.value("command", std::string("")); + promptText = "Explain this bash command in detail, breaking down each part: " + command; + } else if (name == "posix-check") { + std::string path = arguments.value("path", std::string("")); + promptText = "Check the bash script at '" + path + + "' for POSIX compliance. Flag any bashisms ([[ ]], arrays, <<<, " + "${var,,}, process substitution) and suggest portable alternatives."; + } else { + return json{ + {"description", "Unknown prompt: " + name}, + {"messages", json::array()} + }; + } + + // Execute the prompt through the agent + std::cerr << "[gaia-bash] prompts/get: " << name << std::endl; + + json result = agent_.processQuery(promptText); + std::string answer = result.value("result", std::string("")); + + return json{ + {"description", "Result of " + name}, + {"messages", json::array({ + json{{"role", "user"}, {"content", json{{"type", "text"}, {"text", promptText}}}}, + json{{"role", "assistant"}, {"content", json{{"type", "text"}, {"text", answer}}}} + })} + }; +} + +} // namespace gaia diff --git a/cpp/agents/bash/mcp_server.h b/cpp/agents/bash/mcp_server.h new file mode 100644 index 000000000..ff50ddec9 --- /dev/null +++ b/cpp/agents/bash/mcp_server.h @@ -0,0 +1,67 @@ +// Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved. +// SPDX-License-Identifier: MIT +// +// MCP stdio server that exposes an Agent's tools via JSON-RPC. +// Implements the Model Context Protocol server side for integration +// with Claude Code, OpenCode, and other MCP-compatible agents. + +#pragma once + +#include + +#include "gaia/export.h" +#include "gaia/types.h" + +namespace gaia { + +class Agent; + +/// MCP stdio server that exposes an Agent's tools via JSON-RPC 2.0. +/// +/// Reads JSON-RPC requests from stdin, processes them, writes responses +/// to stdout. Implements the MCP protocol: +/// - initialize: handshake with capabilities +/// - tools/list: returns registered tools as MCP tool definitions +/// - tools/call: executes a tool and returns the result +/// - prompts/list: returns available prompt templates +/// - prompts/get: returns a prompt with parameter substitution +/// +/// Usage: +/// @code +/// BashAgent agent(config); +/// McpServer server(agent); +/// server.run(); // blocking, reads stdin until EOF +/// @endcode +/// +/// Configure in Claude Code (~/.claude/settings.json): +/// @code +/// {"mcpServers": {"gaia-bash": {"command": "gaia-bash", "args": ["--mcp"]}}} +/// @endcode +class McpServer { +public: + explicit McpServer(Agent& agent); + + /// Run the server (blocking). Reads stdin line-by-line, writes to stdout. + void run(); + +private: + Agent& agent_; + + /// Process a single JSON-RPC request and return the response. + json handleRequest(const json& request); + + // Method handlers + json handleInitialize(const json& params); + json handleToolsList(const json& params); + json handleToolsCall(const json& params); + json handlePromptsList(const json& params); + json handlePromptsGet(const json& params); + + /// Convert a ToolInfo to MCP tool definition format. + static json toolInfoToMcp(const ToolInfo& tool); + + /// Convert ToolParamType to JSON Schema type string. + static std::string paramTypeToJsonSchema(ToolParamType type); +}; + +} // namespace gaia diff --git a/cpp/examples/process_agent.cpp b/cpp/examples/process_agent.cpp index 39e66b78a..9bc1659c0 100644 --- a/cpp/examples/process_agent.cpp +++ b/cpp/examples/process_agent.cpp @@ -701,7 +701,8 @@ static void cleanupBalloonNotify() { // --------------------------------------------------------------------------- class ProcessConsole : public gaia::CleanConsole { public: - void printFinalAnswer(const std::string& answer) override { + void printFinalAnswer(const std::string& answer, + const gaia::UsageStats& /*usage*/ = {}) override { if (answer.empty()) return; std::string cleanAnswer = answer; diff --git a/cpp/include/gaia/agent.h b/cpp/include/gaia/agent.h index ea5b11788..550924507 100644 --- a/cpp/include/gaia/agent.h +++ b/cpp/include/gaia/agent.h @@ -111,6 +111,31 @@ class GAIA_API Agent { /// Clear conversation history (start a fresh topic). void clearHistory() { conversationHistory_.clear(); } + /// Get a snapshot of the current conversation history (for session persistence). + /// Returns a copy to avoid races with processQuery() on another thread. + std::vector history() const { + std::lock_guard lock(configMutex_); + return conversationHistory_; + } + + /// Replace conversation history (for session resume). + /// Must NOT be called while processQuery() is running (guarded by inFlight_). + void setHistory(std::vector history) { + if (inFlight_.load()) { + throw std::runtime_error("Cannot set history while processQuery() is running"); + } + conversationHistory_ = std::move(history); + } + + /// Request cancellation of the current processQuery() run. + /// The agent loop checks this flag between steps and exits early + /// with a partial result. Safe to call from any thread. + /// The flag is automatically reset at the start of the next processQuery(). + void requestCancel() { cancelled_.store(true); } + + /// Check whether a cancel has been requested. + bool isCancelled() const { return cancelled_.load(); } + /// Get a mutable reference to the tool registry (for subclass tool registration). ToolRegistry& toolRegistry() { return tools_; } @@ -162,11 +187,16 @@ class GAIA_API Agent { // ---- LLM Communication ---- - /// Send messages to the LLM and get a response. + struct LlmResult { + std::string content; + UsageStats usage; + }; + + /// Send messages to the LLM and get a response with usage stats. /// Uses OpenAI-compatible chat completions API. /// @param cfg Config snapshot from the current processQuery() call. - std::string callLlm(const std::vector& messages, const std::string& systemPrompt, - const AgentConfig& cfg); + LlmResult callLlm(const std::vector& messages, const std::string& systemPrompt, + const AgentConfig& cfg); // ---- Execution Helpers ---- @@ -196,6 +226,10 @@ class GAIA_API Agent { // call on the same Agent (from any thread) throws std::runtime_error. std::atomic inFlight_{false}; + // Cancel flag — set by requestCancel(), checked between loop steps. + // Reset at the start of each processQuery(). + std::atomic cancelled_{false}; + AgentState executionState_ = AgentState::PLANNING; json currentPlan_; int currentStep_ = 0; diff --git a/cpp/include/gaia/clean_console.h b/cpp/include/gaia/clean_console.h index 4b0bf1042..b122df3be 100644 --- a/cpp/include/gaia/clean_console.h +++ b/cpp/include/gaia/clean_console.h @@ -63,7 +63,8 @@ class GAIA_API CleanConsole : public OutputHandler { void printInfo(const std::string& message) override; void startProgress(const std::string& message) override; void stopProgress() override; - void printFinalAnswer(const std::string& answer) override; + void printFinalAnswer(const std::string& answer, + const UsageStats& usage = {}) override; void printCompletion(int stepsTaken, int stepsLimit) override; void printDecisionMenu(const std::vector& decisions) override; void printStreamToken(const std::string& token) override; diff --git a/cpp/include/gaia/console.h b/cpp/include/gaia/console.h index 33d5c2dfd..fde8235f6 100644 --- a/cpp/include/gaia/console.h +++ b/cpp/include/gaia/console.h @@ -50,7 +50,8 @@ class GAIA_API OutputHandler { virtual void stopProgress() = 0; // === Completion Methods === - virtual void printFinalAnswer(const std::string& answer) = 0; + virtual void printFinalAnswer(const std::string& answer, + const UsageStats& usage = {}) = 0; virtual void printCompletion(int stepsTaken, int stepsLimit) = 0; // === Optional Methods (default no-op) === @@ -92,7 +93,8 @@ class GAIA_API TerminalConsole : public OutputHandler { void printInfo(const std::string& message) override; void startProgress(const std::string& message) override; void stopProgress() override; - void printFinalAnswer(const std::string& answer) override; + void printFinalAnswer(const std::string& answer, + const UsageStats& usage = {}) override; void printCompletion(int stepsTaken, int stepsLimit) override; void printHeader(const std::string& text) override; void printSeparator(int length = 50) override; @@ -136,7 +138,8 @@ class GAIA_API SilentConsole : public OutputHandler { void printInfo(const std::string&) override {} void startProgress(const std::string&) override {} void stopProgress() override {} - void printFinalAnswer(const std::string& answer) override; + void printFinalAnswer(const std::string& answer, + const UsageStats& usage = {}) override; void printCompletion(int, int) override {} private: diff --git a/cpp/include/gaia/file_tools.h b/cpp/include/gaia/file_tools.h new file mode 100644 index 000000000..20704a81f --- /dev/null +++ b/cpp/include/gaia/file_tools.h @@ -0,0 +1,70 @@ +// Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved. +// SPDX-License-Identifier: MIT +// +// Pre-built file I/O tool callbacks for GAIA agents. +// Provides read, write, edit, and search tools that any agent can register +// to give the LLM file manipulation capabilities. + +#pragma once + +#include + +#include "gaia/export.h" +#include "gaia/tool_registry.h" +#include "gaia/types.h" + +namespace gaia { + +/// Pre-built file I/O tool callbacks for agents. +/// Each static method returns a ToolInfo ready for ToolRegistry::registerTool(). +/// +/// Usage: +/// auto& reg = agent.toolRegistry(); +/// reg.registerTool(FileIOTools::fileRead()); +/// reg.registerTool(FileIOTools::fileWrite()); +/// reg.registerTool(FileIOTools::fileEdit()); +/// reg.registerTool(FileIOTools::fileSearch()); +/// +/// Or register all at once: +/// FileIOTools::registerAll(agent.toolRegistry()); +class GAIA_API FileIOTools { +public: + /// Register all file I/O tools with the given registry. + static void registerAll(ToolRegistry& registry); + + /// file_read: Read file contents with optional line range. + /// Args: {"path": string, "start_line"?: int, "end_line"?: int} + /// Returns: {"content": string, "lines": int, "path": string} + /// On error: {"error": string} + static ToolInfo fileRead(); + + /// file_write: Write content to a file (creates parent dirs). + /// Args: {"path": string, "content": string} + /// Returns: {"success": true, "path": string, "bytes_written": int} + /// On error: {"error": string} + static ToolInfo fileWrite(); + + /// file_edit: Surgical string replacement in a file. + /// Args: {"path": string, "old_string": string, "new_string": string} + /// Returns: {"success": true, "path": string, "replacements": int} + /// On error: {"error": string} + static ToolInfo fileEdit(); + + /// file_search: Search for files by glob pattern and/or content pattern. + /// Args: {"pattern": string, "path"?: string, "content_pattern"?: string, "max_results"?: int} + /// Returns: {"matches": [{"path": string, "line"?: int, "context"?: string}], "total": int} + /// On error: {"error": string} + static ToolInfo fileSearch(); + +private: + // Implementation callbacks + static json doFileRead(const json& args); + static json doFileWrite(const json& args); + static json doFileEdit(const json& args); + static json doFileSearch(const json& args); + + /// Simple glob-style pattern matching (supports * and ? wildcards). + static bool matchGlob(const std::string& pattern, const std::string& text); +}; + +} // namespace gaia diff --git a/cpp/include/gaia/git_tools.h b/cpp/include/gaia/git_tools.h new file mode 100644 index 000000000..95900b550 --- /dev/null +++ b/cpp/include/gaia/git_tools.h @@ -0,0 +1,69 @@ +// Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved. +// SPDX-License-Identifier: MIT +// +// Pre-built read-only git tool callbacks for GAIA agents. +// Provides status, diff, log, and show tools that any agent can register +// to give the LLM git inspection capabilities. + +#pragma once + +#include + +#include "gaia/export.h" +#include "gaia/tool_registry.h" +#include "gaia/types.h" + +namespace gaia { + +/// Pre-built read-only git tool callbacks for agents. +/// All tools use ALLOW policy (read-only operations). +/// +/// Each static method returns a ToolInfo ready for ToolRegistry::registerTool(). +/// +/// Usage: +/// auto& reg = agent.toolRegistry(); +/// reg.registerTool(GitTools::gitStatus()); +/// reg.registerTool(GitTools::gitDiff()); +/// reg.registerTool(GitTools::gitLog()); +/// reg.registerTool(GitTools::gitShow()); +/// +/// Or register all at once: +/// GitTools::registerAll(agent.toolRegistry()); +class GAIA_API GitTools { +public: + /// Register all git tools with the given registry. + static void registerAll(ToolRegistry& registry); + + /// git_status: Get working tree status. + /// Args: {} (no args) + /// Returns: {"status": string, "clean": bool} + /// On error: {"error": string} + static ToolInfo gitStatus(); + + /// git_diff: Show changes in working tree or between refs. + /// Args: {"path"?: string, "staged"?: bool, "ref"?: string} + /// Returns: {"diff": string, "files_changed": int} + /// On error: {"error": string} + static ToolInfo gitDiff(); + + /// git_log: Show recent commit history. + /// Args: {"count"?: int (default 10), "oneline"?: bool (default true), "path"?: string} + /// Returns: {"log": string, "commits": int} + /// On error: {"error": string} + static ToolInfo gitLog(); + + /// git_show: Show a specific commit or object. + /// Args: {"ref": string (default "HEAD")} + /// Returns: {"content": string, "ref": string} + /// On error: {"error": string} + static ToolInfo gitShow(); + +private: + // Implementation callbacks + static json doGitStatus(const json& args); + static json doGitDiff(const json& args); + static json doGitLog(const json& args); + static json doGitShow(const json& args); +}; + +} // namespace gaia diff --git a/cpp/include/gaia/json_event_handler.h b/cpp/include/gaia/json_event_handler.h new file mode 100644 index 000000000..ed313e9ff --- /dev/null +++ b/cpp/include/gaia/json_event_handler.h @@ -0,0 +1,75 @@ +// Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved. +// SPDX-License-Identifier: MIT +// +// JSON-line event output handler for TUI / WebUI integration. +// +// Emits one JSON object per line to stdout, following the SSE event protocol +// defined in docs/plans/cpp-webui-integration.md. Each OutputHandler method +// maps to a single JSONL event. +// +// Usage: +// agent.setOutputHandler(std::make_unique()); +// agent.config().structuredEvents = true; +// agent.config().streaming = false; // avoid raw JSON tokens + +#pragma once + +#include +#include +#include + +#include "gaia/console.h" +#include "gaia/export.h" + +namespace gaia { + +/// Emits structured JSONL events to stdout for consumption by gaia-tui +/// or the Python CppAgentBackend subprocess bridge. +/// +/// Thread-safe: all emit() calls are serialized via mutex. +class GAIA_API JsonEventOutputHandler : public OutputHandler { +public: + // === Core Progress/State === + void printProcessingStart(const std::string& query, int maxSteps, + const std::string& modelId) override; + void printStepHeader(int stepNum, int stepLimit) override; + void printStateInfo(const std::string& message) override; + void printThought(const std::string& thought) override; + void printGoal(const std::string& goal) override; + void printPlan(const json& plan, int currentStep) override; + + // === Tool Execution === + void printToolUsage(const std::string& toolName) override; + void printToolComplete() override; + void prettyPrintJson(const json& data, const std::string& title) override; + + // === Status Messages === + void printError(const std::string& message) override; + void printWarning(const std::string& message) override; + void printInfo(const std::string& message) override; + + // === Progress Indicators === + void startProgress(const std::string& message) override; + void stopProgress() override; + + // === Completion === + void printFinalAnswer(const std::string& answer, + const UsageStats& usage = {}) override; + void printCompletion(int stepsTaken, int stepsLimit) override; + + // === Streaming === + void printStreamToken(const std::string& token) override; + void printStreamEnd() override; + +private: + /// Write a JSON object as a single line to stdout. + void emit(const json& event); + + std::string currentTool_; + int stepsTaken_ = 0; + int stepsLimit_ = 0; + int toolsUsed_ = 0; + std::mutex mutex_; +}; + +} // namespace gaia diff --git a/cpp/include/gaia/process.h b/cpp/include/gaia/process.h new file mode 100644 index 000000000..2eb85254b --- /dev/null +++ b/cpp/include/gaia/process.h @@ -0,0 +1,78 @@ +// Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved. +// SPDX-License-Identifier: MIT +// +// Cross-platform process execution utility for the GAIA C++ agent framework. +// Replaces the ad-hoc runShell() pattern in example agents with a proper +// library function that handles timeouts, output capping, working directory, +// and environment variables. + +#pragma once + +#include +#include +#include + +#include "gaia/export.h" + +namespace gaia { + +/// Result of a process execution. +struct GAIA_API ProcessResult { + std::string stdout_output; ///< Captured stdout + std::string stderr_output; ///< Captured stderr + int exitCode = -1; ///< Process exit code (-1 if not started) + bool timedOut = false; ///< True if process was killed due to timeout +}; + +/// Cross-platform process execution utility. +/// +/// Provides static methods to run shell commands and capture their output, +/// with support for timeouts, output capping, working directory override, +/// and environment variable injection. +/// +/// @note NOT fully thread-safe when `cwd` or `env` parameters are used. +/// Working directory (chdir) and environment variables (setenv) are +/// process-wide on both POSIX and Windows. Concurrent calls with +/// different cwd/env values will interfere. Safe for concurrent use +/// only when cwd and env are both empty (the default). +/// +/// Example: +/// @code +/// auto result = gaia::ProcessRunner::run("echo hello", 5000); +/// if (result.exitCode == 0) { +/// std::cout << result.stdout_output; +/// } +/// @endcode +class GAIA_API ProcessRunner { +public: + /// Run a command and capture output. + /// + /// @param command Shell command string to execute + /// @param timeoutMs Timeout in milliseconds (0 = no timeout, default 30000) + /// @param cwd Working directory (empty = inherit current) + /// @param env Additional environment variables (merged with current) + /// @param maxOutputBytes Maximum bytes to capture per stream (default 64 KB) + /// @return ProcessResult with captured output and exit code + static ProcessResult run( + const std::string& command, + int timeoutMs = 30000, + const std::string& cwd = "", + const std::map& env = {}, + size_t maxOutputBytes = 65536 + ); + + /// Convenience: run and return stdout only, throw on non-zero exit. + /// + /// @param command Shell command string to execute + /// @param timeoutMs Timeout in milliseconds (0 = no timeout, default 30000) + /// @param cwd Working directory (empty = inherit current) + /// @return Captured stdout on success + /// @throws std::runtime_error on non-zero exit, timeout, or execution failure + static std::string runOrThrow( + const std::string& command, + int timeoutMs = 30000, + const std::string& cwd = "" + ); +}; + +} // namespace gaia diff --git a/cpp/include/gaia/repl.h b/cpp/include/gaia/repl.h new file mode 100644 index 000000000..b550de92e --- /dev/null +++ b/cpp/include/gaia/repl.h @@ -0,0 +1,140 @@ +// Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved. +// SPDX-License-Identifier: MIT +// +// Reusable interactive REPL runner for any GAIA agent. +// Provides slash command framework, Ctrl-C cancellation, and session persistence. + +#pragma once + +#include +#include +#include +#include + +#include "gaia/export.h" + +namespace gaia { + +// Forward declarations +class Agent; +class SessionStore; + +/// Callback type for slash commands. Receives the argument string +/// (everything after the command name, trimmed) and the Agent reference. +using SlashCommandCallback = std::function; + +/// Reusable interactive REPL runner for any GAIA agent. +/// +/// Provides a two-thread architecture: +/// - Main thread: reads user input +/// - Worker thread: runs agent.processQuery() +/// +/// Features: +/// - Slash command framework with built-in commands (/clear, /help, /model, /history) +/// - Agent-registered custom commands (e.g. /lint, /review) +/// - Ctrl-C cancels current agent run (via Agent::requestCancel()), doesn't kill process +/// - Session persistence via SessionStore +/// - Single-query mode (run one query, print result, exit) +/// +/// Usage: +/// @code +/// Agent myAgent(config); +/// ReplRunner repl(myAgent); +/// repl.addCommand("/lint", "Run linter", [](const std::string& args, Agent& a) { ... }); +/// repl.run(); // blocking — runs until /exit or EOF +/// @endcode +class GAIA_API ReplRunner { +public: + /// Construct a REPL for the given agent. + /// @param agent The agent to run queries against. + /// @param prompt The input prompt string (default: "> "). + explicit ReplRunner(Agent& agent, const std::string& prompt = "> "); + + /// Run the interactive REPL loop (blocking). + /// Returns when the user types /exit, "exit", "quit", or sends EOF (Ctrl-D). + void run(); + + /// Run a single query, print the result, and return the exit code. + /// @param query The query string to process. + /// @return 0 on success, 1 on failure. + int runOnce(const std::string& query); + + /// Register a custom slash command. + /// @param name Command name including the slash (e.g. "/lint"). + /// @param description Help text shown by /help. + /// @param callback Function to invoke when the command is used. + void addCommand(const std::string& name, const std::string& description, + SlashCommandCallback callback); + + /// Set the session store for save/load/resume. + /// When set, conversations are auto-saved on exit. + void setSessionStore(std::shared_ptr store); + + /// Set the session ID to resume (loads history on first run()). + void setResumeId(const std::string& sessionId); + + /// Set whether to show the welcome banner on run(). + void setShowBanner(bool show) { showBanner_ = show; } + + /// Force TUI mode on or off. When false, uses CleanConsole even if + /// FTXUI is available. When not called, auto-detects based on whether + /// stdout is an interactive terminal (isatty). + void setUseTui(bool useTui) { useTui_ = useTui; tuiOverride_ = true; } + + /// Check whether stdout is an interactive terminal. + static bool isInteractiveTerminal(); + + /// Try to dispatch input as a slash command. + /// @return true if the input was a command (handled), false if it's a query for the LLM. + bool tryDispatchCommand(const std::string& input); + + /// Check whether a given command name is registered. + /// @param name Command name including the slash (e.g. "/clear"). + /// @return true if the command is registered. + bool hasCommand(const std::string& name) const; + + /// Get the number of registered commands. + size_t commandCount() const { return commands_.size(); } + +private: + Agent& agent_; + std::string prompt_; + bool showBanner_ = true; + + // Slash commands: name -> {description, callback} + struct CommandEntry { + std::string description; + SlashCommandCallback callback; + }; + std::map commands_; + + // Session + std::shared_ptr sessionStore_; + std::string sessionId_; + std::string resumeId_; + + // Built-in command handlers + void cmdClear(const std::string& args, Agent& agent); + void cmdHelp(const std::string& args, Agent& agent); + void cmdModel(const std::string& args, Agent& agent); + void cmdHistory(const std::string& args, Agent& agent); + void cmdExit(const std::string& args, Agent& agent); + + /// Register all built-in slash commands. + void registerBuiltinCommands(); + + /// Print the welcome banner. + void printBanner(); + + /// Save the current session (if store is set). + void saveSession(); + + bool exitRequested_ = false; + bool useTui_ = true; + bool tuiOverride_ = false; + + /// Configure the agent's output handler based on TUI availability. + void configureOutputHandler(); +}; + +} // namespace gaia diff --git a/cpp/include/gaia/session.h b/cpp/include/gaia/session.h new file mode 100644 index 000000000..27668b3b1 --- /dev/null +++ b/cpp/include/gaia/session.h @@ -0,0 +1,99 @@ +// Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved. +// SPDX-License-Identifier: MIT +// +// Persistent session store for agent conversation history. +// Enables save/load/resume of chat sessions to/from disk. + +#pragma once + +#include +#include + +#include "gaia/export.h" +#include "gaia/types.h" + +namespace gaia { + +/// Metadata for a saved session (returned by list()). +struct GAIA_API SessionInfo { + std::string id; ///< Unique session identifier. + std::string timestamp; ///< ISO 8601 creation time. + std::string preview; ///< First user message (truncated to ~100 chars). + size_t messageCount = 0; ///< Total messages in session. +}; + +/// Persistent session store for agent conversation history. +/// +/// Sessions are stored as JSON files in a configurable directory +/// (default: ~/.gaia/sessions/). Each file contains the full +/// conversation history serialized as an array of Message objects. +/// +/// Usage: +/// @code +/// SessionStore store; +/// store.save("my-session", conversationHistory); +/// auto history = store.load("my-session"); +/// auto sessions = store.list(); +/// @endcode +class GAIA_API SessionStore { +public: + /// Construct with default directory (~/.gaia/sessions/). + SessionStore(); + + /// Construct with explicit directory (for testing). + explicit SessionStore(const std::string& dir); + + /// Save conversation history to a session file. + /// @param id Session identifier (used as filename stem). + /// @param history The conversation messages to persist. + /// @throws std::runtime_error if the directory can't be created or file can't be written. + /// @throws std::invalid_argument if the session ID contains invalid characters. + void save(const std::string& id, const std::vector& history); + + /// Load conversation history from a session file. + /// @param id Session identifier. + /// @return The persisted conversation messages. + /// @throws std::runtime_error if the session file doesn't exist or is malformed. + /// @throws std::invalid_argument if the session ID contains invalid characters. + std::vector load(const std::string& id) const; + + /// Check whether a session exists. + /// @param id Session identifier. + /// @return true if a session file exists for the given ID. + bool exists(const std::string& id) const; + + /// Delete a session file. + /// @param id Session identifier. + /// @return true if the file was deleted, false if it didn't exist. + bool remove(const std::string& id); + + /// List all saved sessions, sorted by timestamp (newest first). + /// @return Vector of SessionInfo for every valid session file in the directory. + std::vector list() const; + + /// Generate a unique session ID based on current timestamp. + /// Format: "session-YYYYMMDD-HHMMSS" (with disambiguation suffix if needed). + /// @return A unique session identifier string. + static std::string generateId(); + + /// Get the storage directory path. + const std::string& directory() const { return dir_; } + +private: + std::string dir_; + + /// Get the file path for a session ID. + std::string pathForId(const std::string& id) const; + + /// Parse a Message from JSON (inverse of Message::toJson()). + static Message messageFromJson(const json& j); + + /// Validate a session ID (alphanumeric, hyphens, underscores only). + /// @throws std::invalid_argument if the ID is invalid. + static void validateId(const std::string& id); + + /// Determine the default sessions directory. + static std::string defaultDir(); +}; + +} // namespace gaia diff --git a/cpp/include/gaia/tool_registry.h b/cpp/include/gaia/tool_registry.h index e634a2a4b..c6e706b10 100644 --- a/cpp/include/gaia/tool_registry.h +++ b/cpp/include/gaia/tool_registry.h @@ -72,6 +72,13 @@ class GAIA_API ToolRegistry { /// @return Tool execution result as JSON. json executeTool(const std::string& name, const json& args); + /// Validate tool arguments against declared ToolParameter schemas. + /// Called automatically by executeTool() when no custom validateArgs is set. + /// Checks: required params present, types match ToolParamType, no unknown params. + /// @return empty string on success, error description on failure. + static std::string validateArgsAgainstSchema(const std::vector& params, + const json& args); + // ---- Enable / disable ---- /// Enable or disable a tool by name. diff --git a/cpp/include/gaia/tui_console.h b/cpp/include/gaia/tui_console.h new file mode 100644 index 000000000..f89f988b2 --- /dev/null +++ b/cpp/include/gaia/tui_console.h @@ -0,0 +1,103 @@ +// Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved. +// SPDX-License-Identifier: MIT +// +// FTXUI-based reactive TUI console for agents. +// Drop-in replacement for CleanConsole that provides a fullscreen terminal UI +// with markdown rendering, streaming token display, and structured chat history. +// +// Usage: +// agent.setOutputHandler(std::make_unique()); +// +// Requires GAIA_BUILD_TUI=ON (defines GAIA_HAS_TUI). + +#pragma once + +#ifdef GAIA_HAS_TUI + +#include +#include +#include + +#include +#include +#include + +#include "gaia/console.h" +#include "gaia/export.h" + +namespace gaia { + +// Forward-declare the standalone markdown renderer (defined in tui_markdown.cpp). +ftxui::Element renderMarkdown(const std::string& markdown); + +/// FTXUI-based reactive TUI console for agents. +/// Implements the OutputHandler interface with a fullscreen terminal UI. +/// +/// Layout: +/// +------------------------------------+ +/// | Chat history (scrollable) | +/// | - User messages | +/// | - Agent responses (markdown) | +/// | - Tool usage indicators | +/// +------------------------------------+ +/// | Status: model | tokens | step N/M | +/// +------------------------------------+ +class GAIA_API TuiConsole : public OutputHandler { +public: + TuiConsole(); + ~TuiConsole() override; + + // --- OutputHandler interface --- + void printProcessingStart(const std::string& query, int maxSteps, + const std::string& modelId) override; + void printStepHeader(int stepNum, int stepLimit) override; + void printStateInfo(const std::string& message) override; + void printThought(const std::string& thought) override; + void printGoal(const std::string& goal) override; + void printPlan(const json& plan, int currentStep) override; + void printToolUsage(const std::string& toolName) override; + void printToolComplete() override; + void prettyPrintJson(const json& data, const std::string& title) override; + void printError(const std::string& message) override; + void printWarning(const std::string& message) override; + void printInfo(const std::string& message) override; + void startProgress(const std::string& message) override; + void stopProgress() override; + void printFinalAnswer(const std::string& answer, + const UsageStats& usage = {}) override; + void printCompletion(int stepsTaken, int stepsLimit) override; + void printDecisionMenu(const std::vector& decisions) override; + void printStreamToken(const std::string& token) override; + void printStreamEnd() override; + + /// Get the accumulated chat entries as FTXUI Elements (for embedding in a larger TUI). + std::vector getChatElements(); + + /// Get the status bar element. + ftxui::Element getStatusBar(); + +private: + // Chat history entries + struct ChatEntry { + enum class Type { USER, ASSISTANT, TOOL, INFO, ERROR, WARNING }; + Type type; + std::string content; + }; + + /// Append a new entry (mutex must NOT be held by caller). + void addEntry(ChatEntry::Type type, const std::string& content); + + mutable std::mutex mutex_; + std::vector entries_; + static constexpr size_t kMaxEntries = 2000; // evict oldest when exceeded + std::string currentModel_; + int currentStep_ = 0; + int maxSteps_ = 0; + std::string streamBuffer_; // accumulates streaming tokens + bool streaming_ = false; + std::string progressMessage_; +}; + +} // namespace gaia + +#endif // GAIA_HAS_TUI diff --git a/cpp/include/gaia/types.h b/cpp/include/gaia/types.h index 4f9d8eff1..4fe9d6c19 100644 --- a/cpp/include/gaia/types.h +++ b/cpp/include/gaia/types.h @@ -248,6 +248,26 @@ struct ToolInfo { std::optional mcpToolName; }; +// ---- LLM Usage Statistics ---- + +struct UsageStats { + int promptTokens = 0; + int completionTokens = 0; + int totalTokens = 0; + + void operator+=(const UsageStats& other) { + promptTokens += other.promptTokens; + completionTokens += other.completionTokens; + totalTokens += other.totalTokens; + } + + json toJson() const { + return {{"prompt_tokens", promptTokens}, + {"completion_tokens", completionTokens}, + {"total_tokens", totalTokens}}; + } +}; + // ---- Parsed LLM Response ---- struct ParsedResponse { @@ -297,6 +317,9 @@ struct AgentConfig { bool showPrompts = false; bool streaming = defaultStreaming(); // also controlled by GAIA_STREAMING=1 bool silentMode = false; + bool structuredEvents = false; // Always emit structured events (thought, goal, answer) + // even during streaming. Used by JsonEventOutputHandler + // so the TUI/WebUI gets both stream tokens AND agent events. double temperature = 0.7; // LLM sampling temperature (0.0 = deterministic) /// Validate config fields; throws std::invalid_argument on violation. diff --git a/cpp/src/agent.cpp b/cpp/src/agent.cpp index 14fb6d4c1..407838455 100644 --- a/cpp/src/agent.cpp +++ b/cpp/src/agent.cpp @@ -254,8 +254,21 @@ std::string Agent::composeSystemPrompt() const { // ---- LLM Communication ---- -std::string Agent::callLlm(const std::vector& messages, const std::string& sysPrompt, - const AgentConfig& cfg) { +namespace { +UsageStats extractUsage(const json& responseJson) { + UsageStats usage; + if (responseJson.contains("usage") && responseJson["usage"].is_object()) { + const auto& u = responseJson["usage"]; + usage.promptTokens = u.value("prompt_tokens", 0); + usage.completionTokens = u.value("completion_tokens", 0); + usage.totalTokens = u.value("total_tokens", 0); + } + return usage; +} +} // namespace + +Agent::LlmResult Agent::callLlm(const std::vector& messages, const std::string& sysPrompt, + const AgentConfig& cfg) { // Build OpenAI-compatible request. // NOTE: n_ctx is intentionally omitted — context size is set at model load // time via LemonadeClient::loadModel() / ensureModelLoaded(), not per-request. @@ -295,7 +308,14 @@ std::string Agent::callLlm(const std::vector& messages, const std::stri if (!accumulated.empty()) { console_->printStreamEnd(); - return accumulated; + // Streaming responses may include usage in the final chunk; + // attempt to extract from the raw bytes. + UsageStats usage; + try { + const json responseJson = json::parse(rawResponse); + usage = extractUsage(responseJson); + } catch (...) {} + return {accumulated, usage}; } // Fallback: server returned a non-streaming response despite "stream":true. @@ -307,7 +327,8 @@ std::string Agent::callLlm(const std::vector& messages, const std::stri const auto& choice = responseJson["choices"][0]; if (choice.contains("message") && choice["message"].contains("content") && choice["message"]["content"].is_string()) { - return choice["message"]["content"].get(); + return {choice["message"]["content"].get(), + extractUsage(responseJson)}; } } } catch (...) {} @@ -316,7 +337,7 @@ std::string Agent::callLlm(const std::vector& messages, const std::stri throw std::runtime_error("Streaming response contained no tokens"); } - // ---- Non-streaming path (unchanged) ---- + // ---- Non-streaming path ---- std::string responseBody = lemonade_.chatCompletions(requestBody); // Parse response @@ -327,7 +348,8 @@ std::string Agent::callLlm(const std::vector& messages, const std::stri auto& choice = responseJson["choices"][0]; if (choice.contains("message") && choice["message"].contains("content") && choice["message"]["content"].is_string()) { - return choice["message"]["content"].get(); + return {choice["message"]["content"].get(), + extractUsage(responseJson)}; } } // Include truncated response body in error for debugging @@ -643,6 +665,9 @@ json Agent::processQueryInternal(const std::vector& userMessages, int m // Re-entrancy guard (RAII — releases on any exit path incl. exceptions). InFlightGuard guard(inFlight_); + // Reset cancel flag at the start of each query. + cancelled_.store(false); + // Snapshot config at start of query for thread-safe consistency throughout. AgentConfig cfg; { @@ -693,8 +718,16 @@ json Agent::processQueryInternal(const std::vector& userMessages, int m std::string lastError; std::vector stepResults; std::vector> toolCallHistory; // (name, args) for loop detection + UsageStats totalUsage; while (stepsTaken < stepsLimit && finalAnswer.empty()) { + // ---- Cancel check ---- + if (cancelled_.load()) { + console_->printWarning("Cancelled by user"); + finalAnswer = "[Cancelled after " + std::to_string(stepsTaken) + " step(s)]"; + break; + } + ++stepsTaken; console_->printStepHeader(stepsTaken, stepsLimit); @@ -719,9 +752,9 @@ json Agent::processQueryInternal(const std::vector& userMessages, int m // Call LLM (retry once on failure). // Skip progress spinner when streaming — tokens serve as live progress. if (!config_.streaming) console_->startProgress("Thinking"); - std::string response; + LlmResult llmResult; try { - response = callLlm(messages, systemPrompt(), cfg); + llmResult = callLlm(messages, systemPrompt(), cfg); } catch (const std::exception& e) { if (!config_.streaming) console_->stopProgress(); console_->printWarning(std::string("LLM call failed, retrying: ") + e.what()); @@ -729,7 +762,7 @@ json Agent::processQueryInternal(const std::vector& userMessages, int m // Retry once if (!config_.streaming) console_->startProgress("Retrying"); try { - response = callLlm(messages, systemPrompt(), cfg); + llmResult = callLlm(messages, systemPrompt(), cfg); } catch (const std::exception& e2) { if (!config_.streaming) console_->stopProgress(); console_->printError(std::string("LLM error: ") + e2.what()); @@ -739,6 +772,9 @@ json Agent::processQueryInternal(const std::vector& userMessages, int m } if (!config_.streaming) console_->stopProgress(); + const std::string& response = llmResult.content; + totalUsage += llmResult.usage; + // Debug: show response if (cfg.showPrompts) { console_->printResponse(response, "LLM Response"); @@ -755,7 +791,9 @@ json Agent::processQueryInternal(const std::vector& userMessages, int m // Display reasoning. // Skip when streaming — the raw tokens were already printed during callLlm(). - if (!config_.streaming) { + // Exception: structuredEvents mode emits both stream tokens AND structured events, + // so the TUI/WebUI gets live progress AND parsed agent activity. + if (!config_.streaming || config_.structuredEvents) { console_->printThought(parsed.thought); console_->printGoal(parsed.goal); } @@ -763,7 +801,7 @@ json Agent::processQueryInternal(const std::vector& userMessages, int m // ---- Handle final answer ---- if (parsed.answer.has_value()) { finalAnswer = parsed.answer.value(); - if (!config_.streaming) console_->printFinalAnswer(finalAnswer); + if (!config_.streaming || config_.structuredEvents) console_->printFinalAnswer(finalAnswer, totalUsage); break; } @@ -848,7 +886,7 @@ json Agent::processQueryInternal(const std::vector& userMessages, int m // No tool call and no answer — treat response as conversational if (!parsed.toolName.has_value() && !parsed.answer.has_value()) { finalAnswer = response; - if (!config_.streaming) console_->printFinalAnswer(finalAnswer); + if (!config_.streaming || config_.structuredEvents) console_->printFinalAnswer(finalAnswer, totalUsage); break; } } @@ -887,11 +925,15 @@ json Agent::processQueryInternal(const std::vector& userMessages, int m } conversationHistory_ = messages; - return json{ + json result = { {"result", finalAnswer}, {"steps_taken", stepsTaken}, {"steps_limit", stepsLimit} }; + if (totalUsage.totalTokens > 0) { + result["usage"] = totalUsage.toJson(); + } + return result; } } // namespace gaia diff --git a/cpp/src/clean_console.cpp b/cpp/src/clean_console.cpp index dfb1fac7d..dcfa2d433 100644 --- a/cpp/src/clean_console.cpp +++ b/cpp/src/clean_console.cpp @@ -207,7 +207,8 @@ void CleanConsole::startProgress(const std::string& /*message*/) {} void CleanConsole::stopProgress() {} -void CleanConsole::printFinalAnswer(const std::string& answer) { +void CleanConsole::printFinalAnswer(const std::string& answer, + const UsageStats& /*usage*/) { if (answer.empty()) return; // Extract clean text from the LLM's final response. diff --git a/cpp/src/console.cpp b/cpp/src/console.cpp index 9102c501d..05e5b71ad 100644 --- a/cpp/src/console.cpp +++ b/cpp/src/console.cpp @@ -106,7 +106,8 @@ void TerminalConsole::stopProgress() { std::cout << "\n"; } -void TerminalConsole::printFinalAnswer(const std::string& answer) { +void TerminalConsole::printFinalAnswer(const std::string& answer, + const UsageStats& /*usage*/) { std::cout << "\n" << BOLD << GREEN << "Answer:" << RESET << "\n" << answer << "\n"; } @@ -139,7 +140,8 @@ void TerminalConsole::printStreamEnd() { // ---- SilentConsole ---- -void SilentConsole::printFinalAnswer(const std::string& answer) { +void SilentConsole::printFinalAnswer(const std::string& answer, + const UsageStats& /*usage*/) { if (!silenceFinalAnswer_) { std::cout << answer << "\n"; } diff --git a/cpp/src/file_tools.cpp b/cpp/src/file_tools.cpp new file mode 100644 index 000000000..f7963d6bb --- /dev/null +++ b/cpp/src/file_tools.cpp @@ -0,0 +1,417 @@ +// Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved. +// SPDX-License-Identifier: MIT + +#include "gaia/file_tools.h" + +#include +#include +#include +#include +#include +#include + +namespace fs = std::filesystem; + +namespace gaia { + +// --------------------------------------------------------------------------- +// registerAll +// --------------------------------------------------------------------------- + +void FileIOTools::registerAll(ToolRegistry& registry) { + registry.registerTool(fileRead()); + registry.registerTool(fileWrite()); + registry.registerTool(fileEdit()); + registry.registerTool(fileSearch()); +} + +// --------------------------------------------------------------------------- +// fileRead +// --------------------------------------------------------------------------- + +ToolInfo FileIOTools::fileRead() { + ToolInfo info; + info.name = "file_read"; + info.description = + "Read the contents of a file. Optionally specify a line range with " + "start_line and end_line (1-based, inclusive)."; + info.policy = ToolPolicy::ALLOW; + info.parameters = { + {"path", ToolParamType::STRING, /*required=*/true, + "Absolute or relative path to the file to read"}, + {"start_line", ToolParamType::INTEGER, /*required=*/false, + "First line to read (1-based, inclusive). Omit to start from the beginning."}, + {"end_line", ToolParamType::INTEGER, /*required=*/false, + "Last line to read (1-based, inclusive). Omit to read to the end."}, + }; + info.callback = doFileRead; + return info; +} + +json FileIOTools::doFileRead(const json& args) { + static constexpr size_t kMaxReadBytes = 32 * 1024; + + try { + std::string path = args.value("path", ""); + if (path.empty()) { + return json{{"error", "path is required"}}; + } + + std::ifstream file(path); + if (!file.is_open()) { + return json{{"error", "Cannot open file: " + path}}; + } + + int startLine = args.value("start_line", 0); + int endLine = args.value("end_line", 0); + + std::string line; + std::ostringstream content; + int lineNumber = 0; + int linesIncluded = 0; + size_t bytesRead = 0; + bool truncated = false; + + while (std::getline(file, line)) { + ++lineNumber; + + bool inRange = true; + if (startLine > 0 && lineNumber < startLine) inRange = false; + if (endLine > 0 && lineNumber > endLine) inRange = false; + + if (inRange) { + size_t lineBytes = line.size() + (linesIncluded > 0 ? 1 : 0); + if (bytesRead + lineBytes > kMaxReadBytes) { + truncated = true; + break; + } + if (linesIncluded > 0) content << '\n'; + content << line; + bytesRead += lineBytes; + ++linesIncluded; + } + + // Optimization: stop reading past end_line + if (endLine > 0 && lineNumber >= endLine) { + // Count remaining lines for total + while (std::getline(file, line)) { + ++lineNumber; + } + break; + } + } + + // Count remaining lines if we truncated early + if (truncated) { + while (std::getline(file, line)) { + ++lineNumber; + } + } + + std::string result = content.str(); + if (truncated) { + result += "\n... [output truncated at 32 KB]"; + } + + return json{ + {"content", result}, + {"lines", lineNumber}, + {"path", path}, + {"truncated", truncated}, + }; + } catch (const std::exception& e) { + return json{{"error", std::string("file_read failed: ") + e.what()}}; + } +} + +// --------------------------------------------------------------------------- +// fileWrite +// --------------------------------------------------------------------------- + +ToolInfo FileIOTools::fileWrite() { + ToolInfo info; + info.name = "file_write"; + info.description = + "Write content to a file. Creates parent directories if they do not " + "exist. Overwrites the file if it already exists."; + info.policy = ToolPolicy::CONFIRM; + info.parameters = { + {"path", ToolParamType::STRING, /*required=*/true, + "Absolute or relative path to the file to write"}, + {"content", ToolParamType::STRING, /*required=*/true, + "The text content to write to the file"}, + }; + info.callback = doFileWrite; + return info; +} + +json FileIOTools::doFileWrite(const json& args) { + try { + std::string path = args.value("path", ""); + if (path.empty()) { + return json{{"error", "path is required"}}; + } + + if (!args.contains("content") || !args["content"].is_string()) { + return json{{"error", "content is required and must be a string"}}; + } + const std::string& content = args["content"].get_ref(); + + // Create parent directories if needed + fs::path filePath(path); + if (filePath.has_parent_path()) { + std::error_code ec; + fs::create_directories(filePath.parent_path(), ec); + if (ec) { + return json{{"error", "Failed to create parent directories: " + ec.message()}}; + } + } + + std::ofstream file(path, std::ios::binary); + if (!file.is_open()) { + return json{{"error", "Cannot open file for writing: " + path}}; + } + + file.write(content.data(), static_cast(content.size())); + if (!file.good()) { + return json{{"error", "Write failed for: " + path}}; + } + file.close(); + + return json{ + {"success", true}, + {"path", path}, + {"bytes_written", static_cast(content.size())}, + }; + } catch (const std::exception& e) { + return json{{"error", std::string("file_write failed: ") + e.what()}}; + } +} + +// --------------------------------------------------------------------------- +// fileEdit +// --------------------------------------------------------------------------- + +ToolInfo FileIOTools::fileEdit() { + ToolInfo info; + info.name = "file_edit"; + info.description = + "Perform surgical string replacement in a file. Finds all occurrences " + "of old_string and replaces them with new_string."; + info.policy = ToolPolicy::CONFIRM; + info.parameters = { + {"path", ToolParamType::STRING, /*required=*/true, + "Absolute or relative path to the file to edit"}, + {"old_string", ToolParamType::STRING, /*required=*/true, + "The exact text to search for and replace"}, + {"new_string", ToolParamType::STRING, /*required=*/true, + "The text to replace old_string with"}, + }; + info.callback = doFileEdit; + return info; +} + +json FileIOTools::doFileEdit(const json& args) { + try { + std::string path = args.value("path", ""); + if (path.empty()) { + return json{{"error", "path is required"}}; + } + + std::string oldStr = args.value("old_string", ""); + if (oldStr.empty()) { + return json{{"error", "old_string is required and must not be empty"}}; + } + + std::string newStr = args.value("new_string", ""); + + // Read entire file + std::ifstream inFile(path); + if (!inFile.is_open()) { + return json{{"error", "Cannot open file: " + path}}; + } + + std::ostringstream buffer; + buffer << inFile.rdbuf(); + std::string content = buffer.str(); + inFile.close(); + + // Replace all occurrences + int replacements = 0; + std::string::size_type pos = 0; + while ((pos = content.find(oldStr, pos)) != std::string::npos) { + content.replace(pos, oldStr.size(), newStr); + pos += newStr.size(); + ++replacements; + } + + if (replacements == 0) { + return json{{"error", "old_string not found in file: " + path}}; + } + + // Write back + std::ofstream outFile(path, std::ios::binary); + if (!outFile.is_open()) { + return json{{"error", "Cannot open file for writing: " + path}}; + } + + outFile.write(content.data(), static_cast(content.size())); + if (!outFile.good()) { + return json{{"error", "Write failed for: " + path}}; + } + outFile.close(); + + return json{ + {"success", true}, + {"path", path}, + {"replacements", replacements}, + }; + } catch (const std::exception& e) { + return json{{"error", std::string("file_edit failed: ") + e.what()}}; + } +} + +// --------------------------------------------------------------------------- +// fileSearch +// --------------------------------------------------------------------------- + +ToolInfo FileIOTools::fileSearch() { + ToolInfo info; + info.name = "file_search"; + info.description = + "Search for files by name pattern and/or content. The pattern is matched " + "against file names using simple glob wildcards (* and ?). Optionally " + "filter by content_pattern (substring match within file contents)."; + info.policy = ToolPolicy::ALLOW; + info.parameters = { + {"pattern", ToolParamType::STRING, /*required=*/true, + "Glob pattern to match file names (e.g. '*.cpp', 'test_*')"}, + {"path", ToolParamType::STRING, /*required=*/false, + "Root directory to search in (default: current directory)"}, + {"content_pattern", ToolParamType::STRING, /*required=*/false, + "Substring to search for within matched files"}, + {"max_results", ToolParamType::INTEGER, /*required=*/false, + "Maximum number of results to return (default: 50)"}, + }; + info.callback = doFileSearch; + return info; +} + +json FileIOTools::doFileSearch(const json& args) { + try { + std::string pattern = args.value("pattern", ""); + if (pattern.empty()) { + return json{{"error", "pattern is required"}}; + } + + std::string searchPath = args.value("path", "."); + std::string contentPattern = args.value("content_pattern", ""); + int maxResults = args.value("max_results", 50); + if (maxResults <= 0) maxResults = 50; + + if (!fs::exists(searchPath)) { + return json{{"error", "Search path does not exist: " + searchPath}}; + } + + if (!fs::is_directory(searchPath)) { + return json{{"error", "Search path is not a directory: " + searchPath}}; + } + + json matches = json::array(); + int total = 0; + + std::error_code ec; + for (auto it = fs::recursive_directory_iterator(searchPath, fs::directory_options::skip_permission_denied, ec); + it != fs::recursive_directory_iterator(); it.increment(ec)) { + if (ec) { + ec.clear(); + continue; + } + + if (!it->is_regular_file(ec)) continue; + if (ec) { ec.clear(); continue; } + + std::string filename = it->path().filename().string(); + + if (!matchGlob(pattern, filename)) { + continue; + } + + // If content_pattern is specified, search within file + if (!contentPattern.empty()) { + std::ifstream file(it->path()); + if (!file.is_open()) continue; + + std::string line; + int lineNum = 0; + while (std::getline(file, line)) { + ++lineNum; + if (line.find(contentPattern) != std::string::npos) { + ++total; + if (static_cast(matches.size()) < maxResults) { + json match; + match["path"] = it->path().generic_string(); + match["line"] = lineNum; + // Trim context to reasonable length + std::string context = line; + if (context.size() > 200) { + context = context.substr(0, 200) + "..."; + } + match["context"] = context; + matches.push_back(std::move(match)); + } + } + } + } else { + // Name match only + ++total; + if (static_cast(matches.size()) < maxResults) { + json match; + match["path"] = it->path().generic_string(); + matches.push_back(std::move(match)); + } + } + } + + return json{ + {"matches", matches}, + {"total", total}, + }; + } catch (const std::exception& e) { + return json{{"error", std::string("file_search failed: ") + e.what()}}; + } +} + +// --------------------------------------------------------------------------- +// matchGlob — simple glob matching (* = any chars, ? = one char) +// --------------------------------------------------------------------------- + +bool FileIOTools::matchGlob(const std::string& pattern, const std::string& text) { + size_t pi = 0, ti = 0; + size_t starPi = std::string::npos, starTi = 0; + + while (ti < text.size()) { + if (pi < pattern.size() && (pattern[pi] == '?' || pattern[pi] == text[ti])) { + ++pi; + ++ti; + } else if (pi < pattern.size() && pattern[pi] == '*') { + starPi = pi; + starTi = ti; + ++pi; + } else if (starPi != std::string::npos) { + pi = starPi + 1; + ++starTi; + ti = starTi; + } else { + return false; + } + } + + while (pi < pattern.size() && pattern[pi] == '*') { + ++pi; + } + + return pi == pattern.size(); +} + +} // namespace gaia diff --git a/cpp/src/git_tools.cpp b/cpp/src/git_tools.cpp new file mode 100644 index 000000000..b65152d4f --- /dev/null +++ b/cpp/src/git_tools.cpp @@ -0,0 +1,281 @@ +// Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved. +// SPDX-License-Identifier: MIT + +#include "gaia/git_tools.h" + +#include "gaia/process.h" +#include "gaia/security.h" + +#include +#include +#include + +namespace { + +/// Maximum output size returned to the LLM (32 KiB). +constexpr std::size_t kMaxOutputBytes = 32u * 1024u; + +/// Run `git ` via ProcessRunner, capturing stdout+stderr. +std::string runGit(const std::string& command) { + auto result = gaia::ProcessRunner::run( + "git " + command, + /*timeoutMs=*/30000, + /*cwd=*/"", + /*env=*/{}, + /*maxOutputBytes=*/kMaxOutputBytes + ); + // Merge stderr into stdout for backwards compatibility + if (!result.stderr_output.empty()) { + if (!result.stdout_output.empty()) { + result.stdout_output += "\n"; + } + result.stdout_output += result.stderr_output; + } + return result.stdout_output; +} + +/// Truncate output to kMaxOutputBytes, appending a notice if truncated. +std::string truncateOutput(const std::string& output) { + if (output.size() <= kMaxOutputBytes) { + return output; + } + return output.substr(0, kMaxOutputBytes) + "\n... [output truncated at 32KB]"; +} + +/// Count non-empty lines in a string. +int countLines(const std::string& text) { + if (text.empty()) { + return 0; + } + int count = 0; + std::istringstream stream(text); + std::string line; + while (std::getline(stream, line)) { + if (!line.empty()) { + ++count; + } + } + return count; +} + +/// Helper to create a ToolParameter (C++17 compatible, no designated initializers). +gaia::ToolParameter makeParam(const std::string& name, gaia::ToolParamType type, + bool required, const std::string& desc) { + gaia::ToolParameter p; + p.name = name; + p.type = type; + p.required = required; + p.description = desc; + return p; +} + +} // anonymous namespace + +namespace gaia { + +// --------------------------------------------------------------------------- +// registerAll +// --------------------------------------------------------------------------- + +void GitTools::registerAll(ToolRegistry& registry) { + registry.registerTool(gitStatus()); + registry.registerTool(gitDiff()); + registry.registerTool(gitLog()); + registry.registerTool(gitShow()); +} + +// --------------------------------------------------------------------------- +// gitStatus +// --------------------------------------------------------------------------- + +ToolInfo GitTools::gitStatus() { + ToolInfo info; + info.name = "git_status"; + info.description = "Get working tree status. Returns porcelain status output " + "and whether the tree is clean."; + info.callback = doGitStatus; + info.policy = ToolPolicy::ALLOW; + // No parameters + return info; +} + +json GitTools::doGitStatus(const json& /*args*/) { + std::string output = runGit("status --porcelain"); + + // Check for git errors (e.g. not a git repo) + if (output.find("fatal:") != std::string::npos) { + return json{{"error", output}}; + } + + // Trim trailing whitespace + while (!output.empty() && (output.back() == '\n' || output.back() == '\r')) { + output.pop_back(); + } + + bool clean = output.empty(); + return json{{"status", truncateOutput(output)}, {"clean", clean}}; +} + +// --------------------------------------------------------------------------- +// gitDiff +// --------------------------------------------------------------------------- + +ToolInfo GitTools::gitDiff() { + ToolInfo info; + info.name = "git_diff"; + info.description = "Show changes in working tree or between refs. " + "Optionally filter by path or show staged changes."; + info.callback = doGitDiff; + info.policy = ToolPolicy::ALLOW; + info.parameters = { + makeParam("path", ToolParamType::STRING, false, + "File or directory path to limit the diff to."), + makeParam("staged", ToolParamType::BOOLEAN, false, + "If true, show staged (cached) changes instead of unstaged."), + makeParam("ref", ToolParamType::STRING, false, + "Git ref to diff against (e.g. a branch name or commit hash)."), + }; + return info; +} + +json GitTools::doGitDiff(const json& args) { + std::string cmd = "diff"; + + // --staged flag + bool staged = args.value("staged", false); + if (staged) { + cmd += " --staged"; + } + + // Optional ref + if (args.contains("ref") && args["ref"].is_string()) { + std::string ref = args["ref"].get(); + if (!isSafeShellArg(ref)) { + return json{{"error", "Invalid ref argument: contains unsafe characters."}}; + } + cmd += " " + ref; + } + + // Optional path + if (args.contains("path") && args["path"].is_string()) { + std::string path = args["path"].get(); + if (!isSafeShellArg(path)) { + return json{{"error", "Invalid path argument: contains unsafe characters."}}; + } + cmd += " -- " + path; + } + + std::string diffOutput = runGit(cmd); + if (diffOutput.find("fatal:") != std::string::npos) { + return json{{"error", diffOutput}}; + } + + // Count files changed via --stat + std::string statCmd = "diff --stat"; + if (staged) { + statCmd += " --staged"; + } + if (args.contains("ref") && args["ref"].is_string()) { + statCmd += " " + args["ref"].get(); + } + if (args.contains("path") && args["path"].is_string()) { + statCmd += " -- " + args["path"].get(); + } + + std::string statOutput = runGit(statCmd); + int filesChanged = 0; + if (!statOutput.empty() && statOutput.find("fatal:") == std::string::npos) { + // Each changed file has its own line; the last line is the summary. + // Count lines that are not the summary line (which contains "changed"). + int totalLines = countLines(statOutput); + filesChanged = (totalLines > 1) ? totalLines - 1 : totalLines; + } + + return json{{"diff", truncateOutput(diffOutput)}, {"files_changed", filesChanged}}; +} + +// --------------------------------------------------------------------------- +// gitLog +// --------------------------------------------------------------------------- + +ToolInfo GitTools::gitLog() { + ToolInfo info; + info.name = "git_log"; + info.description = "Show recent commit history. Returns up to N commits " + "(default 10) in oneline or full format."; + info.callback = doGitLog; + info.policy = ToolPolicy::ALLOW; + info.parameters = { + makeParam("count", ToolParamType::INTEGER, false, + "Number of commits to show (default 10, max 100)."), + makeParam("oneline", ToolParamType::BOOLEAN, false, + "If true (default), show compact one-line format."), + makeParam("path", ToolParamType::STRING, false, + "File or directory path to filter commit history."), + }; + return info; +} + +json GitTools::doGitLog(const json& args) { + int count = args.value("count", 10); + // Clamp to [1, 100] + count = std::max(1, std::min(count, 100)); + + bool oneline = args.value("oneline", true); + + std::string cmd = "log -n " + std::to_string(count); + if (oneline) { + cmd += " --oneline"; + } + + // Optional path filter + if (args.contains("path") && args["path"].is_string()) { + std::string path = args["path"].get(); + if (!isSafeShellArg(path)) { + return json{{"error", "Invalid path argument: contains unsafe characters."}}; + } + cmd += " -- " + path; + } + + std::string output = runGit(cmd); + if (output.find("fatal:") != std::string::npos) { + return json{{"error", output}}; + } + + int commits = countLines(output); + return json{{"log", truncateOutput(output)}, {"commits", commits}}; +} + +// --------------------------------------------------------------------------- +// gitShow +// --------------------------------------------------------------------------- + +ToolInfo GitTools::gitShow() { + ToolInfo info; + info.name = "git_show"; + info.description = "Show a specific commit or object. Defaults to HEAD."; + info.callback = doGitShow; + info.policy = ToolPolicy::ALLOW; + info.parameters = { + makeParam("ref", ToolParamType::STRING, false, + "Git ref to show (commit hash, tag, branch). Defaults to HEAD."), + }; + return info; +} + +json GitTools::doGitShow(const json& args) { + std::string ref = args.value("ref", std::string("HEAD")); + + if (!isSafeShellArg(ref)) { + return json{{"error", "Invalid ref argument: contains unsafe characters."}}; + } + + std::string output = runGit("show " + ref); + if (output.find("fatal:") != std::string::npos) { + return json{{"error", output}}; + } + + return json{{"content", truncateOutput(output)}, {"ref", ref}}; +} + +} // namespace gaia diff --git a/cpp/src/json_event_handler.cpp b/cpp/src/json_event_handler.cpp new file mode 100644 index 000000000..d00b05e34 --- /dev/null +++ b/cpp/src/json_event_handler.cpp @@ -0,0 +1,200 @@ +// Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved. +// SPDX-License-Identifier: MIT + +#include "gaia/json_event_handler.h" + +#include + +namespace gaia { + +// --------------------------------------------------------------------------- +// Core emit — one JSON object per line, flushed immediately +// --------------------------------------------------------------------------- + +void JsonEventOutputHandler::emit(const json& event) { + std::lock_guard lock(mutex_); + std::cout << event.dump(-1, ' ', false, json::error_handler_t::replace) + << "\n" << std::flush; +} + +// --------------------------------------------------------------------------- +// Core Progress/State +// --------------------------------------------------------------------------- + +void JsonEventOutputHandler::printProcessingStart(const std::string& /*query*/, + int maxSteps, + const std::string& /*modelId*/) { + // Reset counters for the new query. + stepsTaken_ = 0; + stepsLimit_ = maxSteps; + toolsUsed_ = 0; + currentTool_.clear(); + // No event emitted — matches Python SSEOutputHandler behavior. +} + +void JsonEventOutputHandler::printStepHeader(int stepNum, int stepLimit) { + stepsTaken_ = stepNum; + stepsLimit_ = stepLimit; + emit({{"type", "step"}, + {"step", stepNum}, + {"total", stepLimit}, + {"status", "started"}}); +} + +void JsonEventOutputHandler::printStateInfo(const std::string& message) { + emit({{"type", "status"}, + {"status", "warning"}, + {"message", message}}); +} + +void JsonEventOutputHandler::printThought(const std::string& thought) { + if (thought.empty()) return; + emit({{"type", "thinking"}, + {"content", thought}}); +} + +void JsonEventOutputHandler::printGoal(const std::string& goal) { + if (goal.empty()) return; + emit({{"type", "status"}, + {"status", "working"}, + {"message", goal}}); +} + +void JsonEventOutputHandler::printPlan(const json& plan, int currentStep) { + emit({{"type", "plan"}, + {"steps", plan}, + {"current_step", currentStep}}); +} + +// --------------------------------------------------------------------------- +// Tool Execution +// --------------------------------------------------------------------------- + +void JsonEventOutputHandler::printToolUsage(const std::string& toolName) { + currentTool_ = toolName; + ++toolsUsed_; + emit({{"type", "tool_start"}, + {"tool", toolName}}); +} + +void JsonEventOutputHandler::printToolComplete() { + emit({{"type", "tool_end"}, + {"success", true}}); +} + +void JsonEventOutputHandler::prettyPrintJson(const json& data, + const std::string& title) { + if (title == "Tool Args") { + // Emit tool_args with the full argument object. + emit({{"type", "tool_args"}, + {"tool", currentTool_}, + {"args", data}}); + } else if (title == "Tool Result") { + // Build a tool_result event from the result JSON. + json event = { + {"type", "tool_result"}, + {"title", currentTool_}, + {"success", data.value("status", "success") != "error"} + }; + + // Include command_output if the tool result has stdout/stderr. + if (data.contains("stdout") || data.contains("stderr") || data.contains("output")) { + json cmdOutput; + if (data.contains("stdout")) cmdOutput["stdout"] = data["stdout"]; + if (data.contains("stderr")) cmdOutput["stderr"] = data["stderr"]; + if (data.contains("output")) cmdOutput["output"] = data["output"]; + event["command_output"] = cmdOutput; + } + + // Summary: prefer error message, then a short description. + if (data.contains("error")) { + event["summary"] = data["error"]; + } else if (data.contains("stdout") && data["stdout"].is_string()) { + const auto& out = data["stdout"].get_ref(); + event["summary"] = out.size() > 200 ? out.substr(0, 200) + "..." : out; + } else { + event["summary"] = data.value("status", "completed"); + } + + event["result_data"] = data; + emit(event); + } else { + // Generic JSON output — emit as status info. + emit({{"type", "status"}, + {"status", "info"}, + {"message", data.dump()}}); + } +} + +// --------------------------------------------------------------------------- +// Status Messages +// --------------------------------------------------------------------------- + +void JsonEventOutputHandler::printError(const std::string& message) { + emit({{"type", "agent_error"}, + {"content", message}}); +} + +void JsonEventOutputHandler::printWarning(const std::string& message) { + emit({{"type", "status"}, + {"status", "warning"}, + {"message", message}}); +} + +void JsonEventOutputHandler::printInfo(const std::string& message) { + emit({{"type", "status"}, + {"status", "info"}, + {"message", message}}); +} + +// --------------------------------------------------------------------------- +// Progress Indicators +// --------------------------------------------------------------------------- + +void JsonEventOutputHandler::startProgress(const std::string& message) { + emit({{"type", "status"}, + {"status", "working"}, + {"message", message}}); +} + +void JsonEventOutputHandler::stopProgress() { + // No event — progress end is implicit when the next event arrives. +} + +// --------------------------------------------------------------------------- +// Completion +// --------------------------------------------------------------------------- + +void JsonEventOutputHandler::printFinalAnswer(const std::string& answer, + const UsageStats& usage) { + json event = {{"type", "answer"}, + {"content", answer}, + {"steps", stepsTaken_}, + {"tools_used", toolsUsed_}}; + if (usage.totalTokens > 0) { + event["usage"] = usage.toJson(); + } + emit(event); +} + +void JsonEventOutputHandler::printCompletion(int stepsTaken, int stepsLimit) { + emit({{"type", "status"}, + {"status", "complete"}, + {"steps", stepsTaken}, + {"total", stepsLimit}}); +} + +// --------------------------------------------------------------------------- +// Streaming +// --------------------------------------------------------------------------- + +void JsonEventOutputHandler::printStreamToken(const std::string& token) { + emit({{"type", "chunk"}, + {"content", token}}); +} + +void JsonEventOutputHandler::printStreamEnd() { + // No event — stream end is signaled by the answer event. +} + +} // namespace gaia diff --git a/cpp/src/process.cpp b/cpp/src/process.cpp new file mode 100644 index 000000000..4ffdf66ec --- /dev/null +++ b/cpp/src/process.cpp @@ -0,0 +1,595 @@ +// Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved. +// SPDX-License-Identifier: MIT + +#include "gaia/process.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef _WIN32 +# ifndef WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN +# endif +# ifndef NOMINMAX +# define NOMINMAX +# endif +# include +# include +# include +#else +# include +# include +# include +# include +# include +# include +# include +#endif + +namespace gaia { + +// --------------------------------------------------------------------------- +// Internal helpers +// --------------------------------------------------------------------------- + +namespace { + +/// Read up to maxBytes from a FILE*, returning the captured data. +std::string readStream(FILE* stream, size_t maxBytes) { + if (!stream) return ""; + + std::string output; + std::array buffer; + size_t totalRead = 0; + + while (totalRead < maxBytes) { + size_t toRead = std::min(buffer.size(), + maxBytes - totalRead); + size_t bytesRead = std::fread(buffer.data(), 1, toRead, stream); + if (bytesRead == 0) break; + output.append(buffer.data(), bytesRead); + totalRead += bytesRead; + } + + // Drain remaining data so the process doesn't block on a full pipe, + // but don't store it. + while (std::fread(buffer.data(), 1, buffer.size(), stream) > 0) { + // discard + } + + return output; +} + +#ifdef _WIN32 + +/// Save current working directory (Windows). +std::string saveCwd() { + char buf[MAX_PATH]; + if (_getcwd(buf, sizeof(buf))) { + return std::string(buf); + } + return ""; +} + +/// Change working directory (Windows). Returns true on success. +bool changeCwd(const std::string& dir) { + return _chdir(dir.c_str()) == 0; +} + +#else + +/// Save current working directory (POSIX). +std::string saveCwd() { + char buf[4096]; + if (getcwd(buf, sizeof(buf))) { + return std::string(buf); + } + return ""; +} + +/// Change working directory (POSIX). Returns true on success. +bool changeCwd(const std::string& dir) { + return chdir(dir.c_str()) == 0; +} + +#endif + +/// Set environment variables for the current process. +/// Returns the previous values so they can be restored. +std::map setEnvVars( + const std::map& env) { + std::map previous; + for (const auto& kv : env) { +#ifdef _WIN32 + // Save previous value (use getenv — _dupenv_s is MSVC-only, unavailable in MinGW) + const char* oldVal = std::getenv(kv.first.c_str()); + if (oldVal) { + previous[kv.first] = std::string(oldVal); + } else { + previous[kv.first] = ""; // mark as absent + } + _putenv_s(kv.first.c_str(), kv.second.c_str()); +#else + const char* oldVal = std::getenv(kv.first.c_str()); + if (oldVal) { + previous[kv.first] = std::string(oldVal); + } else { + previous[kv.first] = ""; // mark as absent + } + setenv(kv.first.c_str(), kv.second.c_str(), 1); +#endif + } + return previous; +} + +/// Restore environment variables to their previous values. +void restoreEnvVars(const std::map& previous, + const std::map& env) { + for (const auto& kv : env) { + auto it = previous.find(kv.first); + if (it != previous.end() && !it->second.empty()) { + // Restore previous value +#ifdef _WIN32 + _putenv_s(kv.first.c_str(), it->second.c_str()); +#else + setenv(kv.first.c_str(), it->second.c_str(), 1); +#endif + } else { + // Variable was not set before — unset it +#ifdef _WIN32 + _putenv_s(kv.first.c_str(), ""); +#else + unsetenv(kv.first.c_str()); +#endif + } + } +} + +// --------------------------------------------------------------------------- +// Simple (no-timeout) execution via popen +// --------------------------------------------------------------------------- + +ProcessResult runSimple(const std::string& command, size_t maxOutputBytes) { + ProcessResult result; + + // Build command that captures stderr to a temp file so we can read it + // separately. stdout comes through the pipe. + std::string stderrFile; + std::string fullCmd; + +#ifdef _WIN32 + // Use a temp file for stderr capture + char tmpPath[MAX_PATH]; + char tmpFile[MAX_PATH]; + GetTempPathA(MAX_PATH, tmpPath); + GetTempFileNameA(tmpPath, "gaia", 0, tmpFile); + stderrFile = tmpFile; + fullCmd = command + " 2>\"" + stderrFile + "\""; +#else + // mkstemp for safe temp file creation + char tmpTemplate[] = "/tmp/gaia_stderr_XXXXXX"; + int fd = mkstemp(tmpTemplate); + if (fd >= 0) { + close(fd); + stderrFile = tmpTemplate; + } + fullCmd = command + " 2>\"" + stderrFile + "\""; +#endif + + struct PipeCloser { + void operator()(FILE* f) const { +#ifdef _WIN32 + if (f) _pclose(f); +#else + if (f) pclose(f); +#endif + } + }; + + std::unique_ptr pipe( +#ifdef _WIN32 + _popen(fullCmd.c_str(), "r") +#else + popen(fullCmd.c_str(), "r") +#endif + ); + + if (!pipe) { + result.exitCode = -1; + result.stderr_output = "Failed to execute command: " + command; + // Clean up temp file + if (!stderrFile.empty()) std::remove(stderrFile.c_str()); + return result; + } + + // Read stdout + result.stdout_output = readStream(pipe.get(), maxOutputBytes); + + // Get exit code + int status; +#ifdef _WIN32 + status = _pclose(pipe.release()); + result.exitCode = status; +#else + status = pclose(pipe.release()); + if (WIFEXITED(status)) { + result.exitCode = WEXITSTATUS(status); + } else { + result.exitCode = -1; + } +#endif + + // Read stderr from temp file + if (!stderrFile.empty()) { + FILE* errFile = std::fopen(stderrFile.c_str(), "r"); + if (errFile) { + result.stderr_output = readStream(errFile, maxOutputBytes); + std::fclose(errFile); + } + std::remove(stderrFile.c_str()); + } + + return result; +} + +// --------------------------------------------------------------------------- +// Timeout execution via CreateProcess (Windows) / fork+exec (POSIX) +// --------------------------------------------------------------------------- + +#ifdef _WIN32 + +ProcessResult runWithTimeout(const std::string& command, + int timeoutMs, + size_t maxOutputBytes) { + ProcessResult result; + + // Create pipes for stdout and stderr + SECURITY_ATTRIBUTES sa; + sa.nLength = sizeof(sa); + sa.bInheritHandle = TRUE; + sa.lpSecurityDescriptor = nullptr; + + HANDLE stdoutReadH = nullptr, stdoutWriteH = nullptr; + HANDLE stderrReadH = nullptr, stderrWriteH = nullptr; + + if (!CreatePipe(&stdoutReadH, &stdoutWriteH, &sa, 0) || + !CreatePipe(&stderrReadH, &stderrWriteH, &sa, 0)) { + result.exitCode = -1; + result.stderr_output = "Failed to create pipes"; + return result; + } + + // Ensure read handles are not inherited + SetHandleInformation(stdoutReadH, HANDLE_FLAG_INHERIT, 0); + SetHandleInformation(stderrReadH, HANDLE_FLAG_INHERIT, 0); + + STARTUPINFOA si; + ZeroMemory(&si, sizeof(si)); + si.cb = sizeof(si); + si.dwFlags = STARTF_USESTDHANDLES; + si.hStdOutput = stdoutWriteH; + si.hStdError = stderrWriteH; + si.hStdInput = GetStdHandle(STD_INPUT_HANDLE); + + PROCESS_INFORMATION pi; + ZeroMemory(&pi, sizeof(pi)); + + // Build command line: cmd /c + std::string cmdLine = "cmd /c " + command; + // CreateProcessA needs a mutable char* + std::vector cmdBuf(cmdLine.begin(), cmdLine.end()); + cmdBuf.push_back('\0'); + + BOOL created = CreateProcessA( + nullptr, + cmdBuf.data(), + nullptr, + nullptr, + TRUE, // inherit handles + 0, // creation flags + nullptr, // use parent environment + nullptr, // use parent working directory + &si, + &pi + ); + + // Close the write ends of the pipes — the child owns them now + CloseHandle(stdoutWriteH); + CloseHandle(stderrWriteH); + + if (!created) { + CloseHandle(stdoutReadH); + CloseHandle(stderrReadH); + result.exitCode = -1; + result.stderr_output = "CreateProcess failed for: " + command; + return result; + } + + // Read stdout and stderr from pipes using file descriptors + // Convert HANDLEs to FILE* for readStream() + int stdoutFd = _open_osfhandle(reinterpret_cast(stdoutReadH), 0); + int stderrFd = _open_osfhandle(reinterpret_cast(stderrReadH), 0); + + FILE* stdoutFile = nullptr; + FILE* stderrFile = nullptr; + + if (stdoutFd >= 0) stdoutFile = _fdopen(stdoutFd, "r"); + if (stderrFd >= 0) stderrFile = _fdopen(stderrFd, "r"); + + // Read pipes in background threads while waiting for process with timeout. + // This avoids deadlock: reading before waiting blocks if child keeps stdout + // open; waiting before reading loses output if pipe buffer fills. + std::string capturedStdout, capturedStderr; + + std::thread convergentStdout([&]() { + capturedStdout = readStream(stdoutFile, maxOutputBytes); + }); + std::thread convergentStderr([&]() { + capturedStderr = readStream(stderrFile, maxOutputBytes); + }); + + // Wait for process with timeout + DWORD waitResult = WaitForSingleObject(pi.hProcess, + static_cast(timeoutMs)); + + if (waitResult == WAIT_TIMEOUT) { + result.timedOut = true; + TerminateProcess(pi.hProcess, 1); + WaitForSingleObject(pi.hProcess, 5000); // wait for termination + result.exitCode = -1; + } else { + DWORD exitCodeDw = 0; + GetExitCodeProcess(pi.hProcess, &exitCodeDw); + result.exitCode = static_cast(exitCodeDw); + } + + // Wait for reader threads to finish (process is dead, pipes will EOF) + convergentStdout.join(); + convergentStderr.join(); + + result.stdout_output = std::move(capturedStdout); + result.stderr_output = std::move(capturedStderr); + + if (stdoutFile) std::fclose(stdoutFile); + else CloseHandle(stdoutReadH); + + if (stderrFile) std::fclose(stderrFile); + else CloseHandle(stderrReadH); + + CloseHandle(pi.hProcess); + CloseHandle(pi.hThread); + + return result; +} + +#else // POSIX + +ProcessResult runWithTimeout(const std::string& command, + int timeoutMs, + size_t maxOutputBytes) { + ProcessResult result; + + // Create pipes for stdout and stderr + int stdoutPipe[2]; + int stderrPipe[2]; + + if (pipe(stdoutPipe) != 0 || pipe(stderrPipe) != 0) { + result.exitCode = -1; + result.stderr_output = "Failed to create pipes"; + return result; + } + + pid_t pid = fork(); + + if (pid < 0) { + // Fork failed + close(stdoutPipe[0]); close(stdoutPipe[1]); + close(stderrPipe[0]); close(stderrPipe[1]); + result.exitCode = -1; + result.stderr_output = "Fork failed: " + std::string(strerror(errno)); + return result; + } + + if (pid == 0) { + // Child process + close(stdoutPipe[0]); // close read end + close(stderrPipe[0]); // close read end + + dup2(stdoutPipe[1], STDOUT_FILENO); + dup2(stderrPipe[1], STDERR_FILENO); + + close(stdoutPipe[1]); + close(stderrPipe[1]); + + execl("/bin/sh", "sh", "-c", command.c_str(), static_cast(nullptr)); + _exit(127); // exec failed + } + + // Parent process + close(stdoutPipe[1]); // close write end + close(stderrPipe[1]); // close write end + + // Set read ends to non-blocking for timeout-aware reading + fcntl(stdoutPipe[0], F_SETFL, O_NONBLOCK); + fcntl(stderrPipe[0], F_SETFL, O_NONBLOCK); + + // Poll for output and timeout + auto startTime = std::chrono::steady_clock::now(); + bool processFinished = false; + + std::string stdoutBuf; + std::string stderrBuf; + std::array readBuf; + + while (!processFinished) { + // Check timeout + auto elapsed = std::chrono::duration_cast( + std::chrono::steady_clock::now() - startTime).count(); + + if (elapsed >= timeoutMs) { + result.timedOut = true; + kill(pid, SIGKILL); + waitpid(pid, nullptr, 0); + break; + } + + // Try reading stdout + if (stdoutBuf.size() < maxOutputBytes) { + ssize_t n = read(stdoutPipe[0], readBuf.data(), + std::min(readBuf.size(), + maxOutputBytes - stdoutBuf.size())); + if (n > 0) { + stdoutBuf.append(readBuf.data(), static_cast(n)); + } + } + + // Try reading stderr + if (stderrBuf.size() < maxOutputBytes) { + ssize_t n = read(stderrPipe[0], readBuf.data(), + std::min(readBuf.size(), + maxOutputBytes - stderrBuf.size())); + if (n > 0) { + stderrBuf.append(readBuf.data(), static_cast(n)); + } + } + + // Check if child has exited + int status = 0; + pid_t w = waitpid(pid, &status, WNOHANG); + if (w == pid) { + processFinished = true; + if (WIFEXITED(status)) { + result.exitCode = WEXITSTATUS(status); + } else if (WIFSIGNALED(status)) { + result.exitCode = -1; + } + } else { + // Brief sleep to avoid busy-waiting + usleep(1000); // 1ms + } + } + + // Final reads to drain any remaining data + while (true) { + ssize_t n = read(stdoutPipe[0], readBuf.data(), readBuf.size()); + if (n <= 0) break; + if (stdoutBuf.size() < maxOutputBytes) { + size_t space = maxOutputBytes - stdoutBuf.size(); + stdoutBuf.append(readBuf.data(), + std::min(static_cast(n), space)); + } + } + while (true) { + ssize_t n = read(stderrPipe[0], readBuf.data(), readBuf.size()); + if (n <= 0) break; + if (stderrBuf.size() < maxOutputBytes) { + size_t space = maxOutputBytes - stderrBuf.size(); + stderrBuf.append(readBuf.data(), + std::min(static_cast(n), space)); + } + } + + close(stdoutPipe[0]); + close(stderrPipe[0]); + + result.stdout_output = std::move(stdoutBuf); + result.stderr_output = std::move(stderrBuf); + + return result; +} + +#endif // _WIN32 + +} // anonymous namespace + +// --------------------------------------------------------------------------- +// ProcessRunner public API +// --------------------------------------------------------------------------- + +ProcessResult ProcessRunner::run( + const std::string& command, + int timeoutMs, + const std::string& cwd, + const std::map& env, + size_t maxOutputBytes) { + + // Handle empty command + if (command.empty()) { + ProcessResult result; + result.exitCode = -1; + result.stderr_output = "Empty command"; + return result; + } + + // Save and change working directory if requested + std::string originalCwd; + if (!cwd.empty()) { + originalCwd = saveCwd(); + if (!changeCwd(cwd)) { + ProcessResult result; + result.exitCode = -1; + result.stderr_output = "Failed to change to directory: " + cwd; + return result; + } + } + + // Set environment variables + std::map previousEnv; + if (!env.empty()) { + previousEnv = setEnvVars(env); + } + + // Run the command + ProcessResult result; + if (timeoutMs > 0) { + result = runWithTimeout(command, timeoutMs, maxOutputBytes); + } else { + result = runSimple(command, maxOutputBytes); + } + + // Restore environment variables + if (!env.empty()) { + restoreEnvVars(previousEnv, env); + } + + // Restore working directory + if (!originalCwd.empty()) { + changeCwd(originalCwd); + } + + return result; +} + +std::string ProcessRunner::runOrThrow( + const std::string& command, + int timeoutMs, + const std::string& cwd) { + ProcessResult result = run(command, timeoutMs, cwd); + + if (result.timedOut) { + throw std::runtime_error( + "Command timed out after " + std::to_string(timeoutMs) + + "ms: " + command); + } + + if (result.exitCode != 0) { + std::string msg = "Command failed with exit code " + + std::to_string(result.exitCode) + ": " + command; + if (!result.stderr_output.empty()) { + msg += "\nstderr: " + result.stderr_output; + } + throw std::runtime_error(msg); + } + + return result.stdout_output; +} + +} // namespace gaia diff --git a/cpp/src/repl.cpp b/cpp/src/repl.cpp new file mode 100644 index 000000000..ada2b89c5 --- /dev/null +++ b/cpp/src/repl.cpp @@ -0,0 +1,384 @@ +// Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved. +// SPDX-License-Identifier: MIT + +#include "gaia/repl.h" +#include "gaia/agent.h" +#include "gaia/clean_console.h" +#include "gaia/session.h" + +#ifdef GAIA_HAS_TUI +#include "gaia/tui_console.h" +#endif + +#include +#include +#include +#include +#include + +#ifdef _WIN32 +#include +#define GAIA_ISATTY _isatty +#define GAIA_FILENO _fileno +#else +#include +#define GAIA_ISATTY isatty +#define GAIA_FILENO fileno +#endif + +namespace gaia { + +// --------------------------------------------------------------------------- +// Signal handling — file-scope atomic pointer for Ctrl-C cancellation +// --------------------------------------------------------------------------- + +namespace { + +/// Global pointer to the active agent, used by the SIGINT handler. +/// Only one ReplRunner::run() should be active at a time. +std::atomic g_activeAgent{nullptr}; + +/// Previous SIGINT handler, restored when run() exits. +void (*g_previousSigintHandler)(int) = SIG_DFL; + +/// SIGINT handler that cancels the active agent instead of terminating. +void sigintHandler(int /*sig*/) { + Agent* agent = g_activeAgent.load(); + if (agent) { + agent->requestCancel(); + } +} + +/// Trim leading and trailing whitespace from a string. +std::string trim(const std::string& s) { + auto start = s.find_first_not_of(" \t\r\n"); + if (start == std::string::npos) return ""; + auto end = s.find_last_not_of(" \t\r\n"); + return s.substr(start, end - start + 1); +} + +} // anonymous namespace + +// --------------------------------------------------------------------------- +// Construction +// --------------------------------------------------------------------------- + +ReplRunner::ReplRunner(Agent& agent, const std::string& prompt) + : agent_(agent), prompt_(prompt) { + registerBuiltinCommands(); +} + +// --------------------------------------------------------------------------- +// Built-in command registration +// --------------------------------------------------------------------------- + +void ReplRunner::registerBuiltinCommands() { + addCommand("/clear", "Clear conversation history", + [this](const std::string& args, Agent& agent) { cmdClear(args, agent); }); + + addCommand("/help", "Show available commands", + [this](const std::string& args, Agent& agent) { cmdHelp(args, agent); }); + + addCommand("/model", "Show or change the active model", + [this](const std::string& args, Agent& agent) { cmdModel(args, agent); }); + + addCommand("/history", "List saved sessions", + [this](const std::string& args, Agent& agent) { cmdHistory(args, agent); }); + + addCommand("/exit", "Exit the REPL", + [this](const std::string& args, Agent& agent) { cmdExit(args, agent); }); +} + +// --------------------------------------------------------------------------- +// Built-in command handlers +// --------------------------------------------------------------------------- + +void ReplRunner::cmdClear(const std::string& /*args*/, Agent& agent) { + agent.clearHistory(); + std::cout << "Conversation history cleared." << std::endl; +} + +void ReplRunner::cmdHelp(const std::string& /*args*/, Agent& /*agent*/) { + std::cout << "\nAvailable commands:\n"; + for (const auto& [name, entry] : commands_) { + std::cout << " " << name << " - " << entry.description << "\n"; + } + std::cout << std::endl; +} + +void ReplRunner::cmdModel(const std::string& args, Agent& agent) { + std::string modelName = trim(args); + if (modelName.empty()) { + std::cout << "Current model: " << agent.config().modelId << std::endl; + } else { + agent.setModel(modelName); + std::cout << "Model set to: " << modelName << std::endl; + } +} + +void ReplRunner::cmdHistory(const std::string& /*args*/, Agent& /*agent*/) { + if (!sessionStore_) { + std::cout << "No session store configured." << std::endl; + return; + } + + auto sessions = sessionStore_->list(); + if (sessions.empty()) { + std::cout << "No saved sessions." << std::endl; + return; + } + + std::cout << "\nSaved sessions:\n"; + for (const auto& info : sessions) { + std::cout << " " << info.id + << " (" << info.messageCount << " messages"; + if (!info.preview.empty()) { + std::cout << ", \"" << info.preview << "\""; + } + std::cout << ")\n"; + } + std::cout << std::endl; +} + +void ReplRunner::cmdExit(const std::string& /*args*/, Agent& /*agent*/) { + exitRequested_ = true; +} + +// --------------------------------------------------------------------------- +// Command dispatch +// --------------------------------------------------------------------------- + +bool ReplRunner::tryDispatchCommand(const std::string& input) { + if (input.empty() || input[0] != '/') { + return false; + } + + // Extract command name and args: "/model qwen3" -> name="/model", args="qwen3" + std::string::size_type spacePos = input.find(' '); + std::string cmdName; + std::string cmdArgs; + + if (spacePos == std::string::npos) { + cmdName = input; + } else { + cmdName = input.substr(0, spacePos); + cmdArgs = trim(input.substr(spacePos + 1)); + } + + auto it = commands_.find(cmdName); + if (it == commands_.end()) { + std::cout << "Unknown command: " << cmdName + << ". Type /help for available commands." << std::endl; + return true; // It was a command attempt, just unknown + } + + it->second.callback(cmdArgs, agent_); + return true; +} + +// --------------------------------------------------------------------------- +// Command registration +// --------------------------------------------------------------------------- + +void ReplRunner::addCommand(const std::string& name, const std::string& description, + SlashCommandCallback callback) { + commands_[name] = CommandEntry{description, std::move(callback)}; +} + +bool ReplRunner::hasCommand(const std::string& name) const { + return commands_.find(name) != commands_.end(); +} + +// --------------------------------------------------------------------------- +// Session management +// --------------------------------------------------------------------------- + +void ReplRunner::setSessionStore(std::shared_ptr store) { + sessionStore_ = std::move(store); +} + +void ReplRunner::setResumeId(const std::string& sessionId) { + resumeId_ = sessionId; +} + +void ReplRunner::saveSession() { + if (!sessionStore_ || sessionId_.empty()) { + return; + } + const auto& history = agent_.history(); + if (history.empty()) { + return; + } + try { + sessionStore_->save(sessionId_, history); + } catch (const std::exception& e) { + std::cerr << "Warning: failed to save session: " << e.what() << std::endl; + } +} + +bool ReplRunner::isInteractiveTerminal() { + return GAIA_ISATTY(GAIA_FILENO(stdout)) != 0; +} + +void ReplRunner::configureOutputHandler() { + bool shouldUseTui = tuiOverride_ ? useTui_ : isInteractiveTerminal(); + +#ifdef GAIA_HAS_TUI + if (shouldUseTui) { + agent_.setOutputHandler(std::make_unique()); + return; + } +#else + (void)shouldUseTui; // suppress unused warning +#endif + // Fallback: CleanConsole for piped output or --no-tui + agent_.setOutputHandler(std::make_unique()); +} + +// --------------------------------------------------------------------------- +// Banner +// --------------------------------------------------------------------------- + +void ReplRunner::printBanner() { + std::cout << "\n"; + std::cout << "GAIA Agent | Model: " << agent_.config().modelId << "\n"; + std::cout << "Type /help for commands, /exit to quit.\n"; + std::cout << std::endl; +} + +// --------------------------------------------------------------------------- +// run() — main interactive loop +// --------------------------------------------------------------------------- + +void ReplRunner::run() { + exitRequested_ = false; + + // Configure output handler (TuiConsole vs CleanConsole) + configureOutputHandler(); + + // Print welcome banner + if (showBanner_) { + printBanner(); + } + + // Resume session if requested + if (!resumeId_.empty() && sessionStore_) { + try { + auto history = sessionStore_->load(resumeId_); + agent_.setHistory(std::move(history)); + sessionId_ = resumeId_; + std::cout << "Resumed session: " << resumeId_ << std::endl; + } catch (const std::exception& e) { + std::cout << "Failed to resume session: " << e.what() << std::endl; + } + } + + // Generate a new session ID if not resuming + if (sessionId_.empty() && sessionStore_) { + sessionId_ = SessionStore::generateId(); + } + + // Install SIGINT handler for Ctrl-C cancellation + g_activeAgent.store(&agent_); + g_previousSigintHandler = std::signal(SIGINT, sigintHandler); + + // Main input loop + std::string input; + while (!exitRequested_) { + std::cout << prompt_ << std::flush; + + if (!std::getline(std::cin, input)) { + // EOF (Ctrl-D on Unix, Ctrl-Z+Enter on Windows) + std::cout << std::endl; + break; + } + + input = trim(input); + if (input.empty()) { + continue; + } + + // Check for bare exit/quit + if (input == "exit" || input == "quit") { + break; + } + + // Try slash command dispatch + if (tryDispatchCommand(input)) { + continue; + } + + // Regular query — run agent in a worker thread so SIGINT can + // cancel it via requestCancel() without killing the process. + { + json result; + std::exception_ptr eptr; + + std::thread worker([&]() { + try { + result = agent_.processQuery(input); + } catch (...) { + eptr = std::current_exception(); + } + }); + + worker.join(); + + if (eptr) { + try { + std::rethrow_exception(eptr); + } catch (const std::exception& e) { + std::cout << "Error: " << e.what() << std::endl; + } + } else if (result.contains("result") && result["result"].is_string()) { + // Final answer is already printed by the console handler + // in most configurations. Only print if silent mode. + if (agent_.config().silentMode) { + std::cout << result["result"].get() << std::endl; + } + } + } + } + + // Restore previous signal handler + std::signal(SIGINT, g_previousSigintHandler); + g_activeAgent.store(nullptr); + + // Save session on exit + saveSession(); + + std::cout << "Goodbye!" << std::endl; +} + +// --------------------------------------------------------------------------- +// runOnce() — single query mode +// --------------------------------------------------------------------------- + +int ReplRunner::runOnce(const std::string& query) { + // Configure output handler before the query (TuiConsole vs CleanConsole) + configureOutputHandler(); + + try { + auto result = agent_.processQuery(query); + + if (result.contains("status") && result["status"] == "error") { + return 1; + } + + // The console handler already prints the final answer via + // printFinalAnswer() during processQuery(). Only print here + // if the agent is in silent mode (no console output). + if (agent_.config().silentMode) { + if (result.contains("result") && result["result"].is_string()) { + std::cout << result["result"].get() << std::endl; + } + } + + return 0; + } catch (const std::exception& e) { + std::cout << "Error: " << e.what() << std::endl; + return 1; + } +} + +} // namespace gaia diff --git a/cpp/src/session.cpp b/cpp/src/session.cpp new file mode 100644 index 000000000..cd8487454 --- /dev/null +++ b/cpp/src/session.cpp @@ -0,0 +1,334 @@ +// Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved. +// SPDX-License-Identifier: MIT + +#include "gaia/session.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace fs = std::filesystem; + +namespace gaia { + +// --------------------------------------------------------------------------- +// Internal helpers +// --------------------------------------------------------------------------- + +namespace { + +/// Convert a MessageRole enum to/from string for JSON serialization. +MessageRole roleFromString(const std::string& s) { + if (s == "system") return MessageRole::SYSTEM; + if (s == "user") return MessageRole::USER; + if (s == "assistant") return MessageRole::ASSISTANT; + if (s == "tool") return MessageRole::TOOL; + throw std::runtime_error("Unknown message role: " + s); +} + +/// Get the current UTC time as an ISO 8601 string. +std::string nowIso8601() { + auto now = std::chrono::system_clock::now(); + auto time = std::chrono::system_clock::to_time_t(now); + std::tm tm{}; +#ifdef _WIN32 + gmtime_s(&tm, &time); +#else + gmtime_r(&time, &tm); +#endif + std::ostringstream oss; + oss << std::put_time(&tm, "%Y-%m-%dT%H:%M:%SZ"); + return oss.str(); +} + +/// Get a timestamp string suitable for an ID (YYYYMMDD-HHMMSS). +std::string nowIdTimestamp() { + auto now = std::chrono::system_clock::now(); + auto time = std::chrono::system_clock::to_time_t(now); + std::tm tm{}; +#ifdef _WIN32 + gmtime_s(&tm, &time); +#else + gmtime_r(&time, &tm); +#endif + std::ostringstream oss; + oss << std::put_time(&tm, "%Y%m%d-%H%M%S"); + return oss.str(); +} + +} // anonymous namespace + +// --------------------------------------------------------------------------- +// SessionStore — construction +// --------------------------------------------------------------------------- + +std::string SessionStore::defaultDir() { +#ifdef _WIN32 + const char* profile = std::getenv("USERPROFILE"); + std::string home = profile ? profile : "C:\\Users\\Default"; + return home + "\\.gaia\\sessions"; +#else + const char* home = std::getenv("HOME"); // NOLINT(concurrency-mt-unsafe) + std::string h = home ? home : "/tmp"; + return h + "/.gaia/sessions"; +#endif +} + +SessionStore::SessionStore() + : SessionStore(defaultDir()) {} + +SessionStore::SessionStore(const std::string& dir) + : dir_(dir) {} + +// --------------------------------------------------------------------------- +// ID validation +// --------------------------------------------------------------------------- + +void SessionStore::validateId(const std::string& id) { + if (id.empty()) { + throw std::invalid_argument("Session ID must not be empty"); + } + for (char c : id) { + if (!std::isalnum(static_cast(c)) && c != '-' && c != '_') { + throw std::invalid_argument( + "Session ID contains invalid character '" + std::string(1, c) + + "'. Only alphanumeric, hyphens, and underscores are allowed."); + } + } +} + +// --------------------------------------------------------------------------- +// Path helper +// --------------------------------------------------------------------------- + +std::string SessionStore::pathForId(const std::string& id) const { + fs::path p = fs::path(dir_) / (id + ".json"); + return p.string(); +} + +// --------------------------------------------------------------------------- +// Message serialization +// --------------------------------------------------------------------------- + +Message SessionStore::messageFromJson(const json& j) { + Message m; + + // Role (required) + if (!j.contains("role") || !j["role"].is_string()) { + throw std::runtime_error("Message JSON missing 'role' string field"); + } + m.role = roleFromString(j["role"].get()); + + // Content — accept string only (parts/array content not round-tripped) + if (j.contains("content")) { + if (j["content"].is_string()) { + m.content = j["content"].get(); + } else if (j["content"].is_array()) { + // Flatten array content to text-only for simplicity + std::string combined; + for (const auto& part : j["content"]) { + if (part.is_object() && part.value("type", "") == "text" && + part.contains("text") && part["text"].is_string()) { + if (!combined.empty()) combined += "\n"; + combined += part["text"].get(); + } + } + m.content = combined; + } + } + + // Optional fields + if (j.contains("name") && j["name"].is_string()) { + m.name = j["name"].get(); + } + if (j.contains("tool_call_id") && j["tool_call_id"].is_string()) { + m.toolCallId = j["tool_call_id"].get(); + } + + return m; +} + +// --------------------------------------------------------------------------- +// save +// --------------------------------------------------------------------------- + +void SessionStore::save(const std::string& id, const std::vector& history) { + validateId(id); + + // Ensure directory exists + std::error_code ec; + fs::create_directories(dir_, ec); + if (ec) { + throw std::runtime_error( + "Failed to create session directory '" + dir_ + "': " + ec.message()); + } + + // Build JSON envelope + json j; + j["version"] = 1; + j["id"] = id; + j["timestamp"] = nowIso8601(); + + json messages = json::array(); + for (const auto& msg : history) { + messages.push_back(msg.toJson()); + } + j["messages"] = messages; + + // Write atomically-ish: write to file directly (no temp-rename on Windows + // for simplicity, matching the AllowedToolsStore pattern) + std::string path = pathForId(id); + std::ofstream f(path); + if (!f.is_open()) { + throw std::runtime_error("Failed to open session file for writing: " + path); + } + f << j.dump(2) << "\n"; + if (!f.good()) { + throw std::runtime_error("Failed to write session file: " + path); + } +} + +// --------------------------------------------------------------------------- +// load +// --------------------------------------------------------------------------- + +std::vector SessionStore::load(const std::string& id) const { + validateId(id); + + std::string path = pathForId(id); + std::ifstream f(path); + if (!f.is_open()) { + throw std::runtime_error("Session not found: " + id); + } + + json j; + try { + f >> j; + } catch (const json::parse_error& e) { + throw std::runtime_error( + "Failed to parse session file '" + path + "': " + e.what()); + } + + if (!j.contains("messages") || !j["messages"].is_array()) { + throw std::runtime_error( + "Session file '" + path + "' is malformed: missing 'messages' array"); + } + + std::vector history; + history.reserve(j["messages"].size()); + for (const auto& msgJson : j["messages"]) { + history.push_back(messageFromJson(msgJson)); + } + return history; +} + +// --------------------------------------------------------------------------- +// exists +// --------------------------------------------------------------------------- + +bool SessionStore::exists(const std::string& id) const { + validateId(id); + return fs::exists(pathForId(id)); +} + +// --------------------------------------------------------------------------- +// remove +// --------------------------------------------------------------------------- + +bool SessionStore::remove(const std::string& id) { + validateId(id); + std::error_code ec; + return fs::remove(pathForId(id), ec); +} + +// --------------------------------------------------------------------------- +// list +// --------------------------------------------------------------------------- + +std::vector SessionStore::list() const { + std::vector sessions; + + if (!fs::exists(dir_) || !fs::is_directory(dir_)) { + return sessions; + } + + for (const auto& entry : fs::directory_iterator(dir_)) { + if (!entry.is_regular_file()) continue; + if (entry.path().extension() != ".json") continue; + + try { + std::ifstream f(entry.path()); + if (!f.is_open()) continue; + + json j = json::parse(f); + + SessionInfo info; + info.id = j.value("id", entry.path().stem().string()); + info.timestamp = j.value("timestamp", ""); + + // Message count + if (j.contains("messages") && j["messages"].is_array()) { + info.messageCount = j["messages"].size(); + + // Preview: first user message, truncated + for (const auto& msg : j["messages"]) { + if (msg.value("role", "") == "user") { + std::string content; + if (msg.contains("content") && msg["content"].is_string()) { + content = msg["content"].get(); + } + if (content.size() > 100) { + content = content.substr(0, 97) + "..."; + } + info.preview = content; + break; + } + } + } + + sessions.push_back(std::move(info)); + } catch (...) { + // Skip malformed session files + continue; + } + } + + // Sort by timestamp, newest first + std::sort(sessions.begin(), sessions.end(), + [](const SessionInfo& a, const SessionInfo& b) { + return a.timestamp > b.timestamp; + }); + + return sessions; +} + +// --------------------------------------------------------------------------- +// generateId +// --------------------------------------------------------------------------- + +std::string SessionStore::generateId() { + std::string base = "session-" + nowIdTimestamp(); + + // Check for collision — append a suffix if needed + // This handles the case where generateId() is called twice within the same second + static int counter = 0; + static std::string lastTimestamp; + + std::string ts = nowIdTimestamp(); + if (ts == lastTimestamp) { + ++counter; + lastTimestamp = ts; + return base + "-" + std::to_string(counter); + } + + lastTimestamp = ts; + counter = 0; + return base; +} + +} // namespace gaia diff --git a/cpp/src/tool_registry.cpp b/cpp/src/tool_registry.cpp index 648ece11f..098f115a7 100644 --- a/cpp/src/tool_registry.cpp +++ b/cpp/src/tool_registry.cpp @@ -169,6 +169,12 @@ json ToolRegistry::executeTool(const std::string& name, const json& args) { } catch (const std::invalid_argument& e) { return json{{"status", "error"}, {"error", std::string("Argument validation failed: ") + e.what()}}; } + } else if (!tool->parameters.empty()) { + // Auto-validate against declared parameter schema + std::string validationError = validateArgsAgainstSchema(tool->parameters, effectiveArgs); + if (!validationError.empty()) { + return json{{"status", "error"}, {"error", "Invalid arguments for '" + resolvedName + "': " + validationError}}; + } } // 3. CONFIRM check @@ -225,4 +231,56 @@ std::string ToolRegistry::toLower(const std::string& s) { return result; } +std::string ToolRegistry::validateArgsAgainstSchema( + const std::vector& params, const json& args) { + // args should be an object (or null/missing treated as empty object) + json effectiveArgs = (args.is_null() || args.is_discarded()) ? json::object() : args; + if (!effectiveArgs.is_object()) { + return "expected object, got " + std::string(effectiveArgs.type_name()); + } + + // Check required parameters are present + for (const auto& param : params) { + if (param.required && !effectiveArgs.contains(param.name)) { + return "missing required parameter '" + param.name + "'"; + } + + // Type-check if the parameter is present + if (effectiveArgs.contains(param.name)) { + const auto& val = effectiveArgs[param.name]; + bool typeOk = false; + switch (param.type) { + case ToolParamType::STRING: + typeOk = val.is_string(); + break; + case ToolParamType::INTEGER: + typeOk = val.is_number_integer(); + break; + case ToolParamType::NUMBER: + typeOk = val.is_number(); + break; + case ToolParamType::BOOLEAN: + typeOk = val.is_boolean(); + break; + case ToolParamType::ARRAY: + typeOk = val.is_array(); + break; + case ToolParamType::OBJECT: + typeOk = val.is_object(); + break; + case ToolParamType::UNKNOWN: + typeOk = true; // accept anything + break; + } + if (!typeOk) { + return "parameter '" + param.name + "' should be " + + paramTypeToString(param.type) + ", got " + + std::string(val.type_name()); + } + } + } + + return ""; // valid +} + } // namespace gaia diff --git a/cpp/src/tui_console.cpp b/cpp/src/tui_console.cpp new file mode 100644 index 000000000..c04c657a3 --- /dev/null +++ b/cpp/src/tui_console.cpp @@ -0,0 +1,271 @@ +// Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved. +// SPDX-License-Identifier: MIT +// +// FTXUI-based reactive TUI console implementation. +// Each print*() method appends a ChatEntry to the internal history. +// getChatElements() and getStatusBar() convert the history to FTXUI Elements +// for embedding in a larger TUI layout. + +#ifdef GAIA_HAS_TUI + +#include "gaia/tui_console.h" + +#include + +namespace gaia { + +using namespace ftxui; + +// --------------------------------------------------------------------------- +// Construction / destruction +// --------------------------------------------------------------------------- + +TuiConsole::TuiConsole() = default; +TuiConsole::~TuiConsole() = default; + +// --------------------------------------------------------------------------- +// Internal helpers +// --------------------------------------------------------------------------- + +void TuiConsole::addEntry(ChatEntry::Type type, const std::string& content) { + std::lock_guard lock(mutex_); + entries_.push_back({type, content}); + // Evict oldest entries to prevent unbounded memory growth + if (entries_.size() > kMaxEntries) { + entries_.erase(entries_.begin(), + entries_.begin() + static_cast(entries_.size() - kMaxEntries)); + } +} + +// --------------------------------------------------------------------------- +// OutputHandler: core progress / state +// --------------------------------------------------------------------------- + +void TuiConsole::printProcessingStart(const std::string& query, int maxSteps, + const std::string& modelId) { + std::lock_guard lock(mutex_); + currentModel_ = modelId; + currentStep_ = 0; + maxSteps_ = maxSteps; + streamBuffer_.clear(); + streaming_ = false; + progressMessage_.clear(); + entries_.push_back({ChatEntry::Type::USER, query}); +} + +void TuiConsole::printStepHeader(int stepNum, int stepLimit) { + std::lock_guard lock(mutex_); + currentStep_ = stepNum; + maxSteps_ = stepLimit; +} + +void TuiConsole::printStateInfo(const std::string& message) { + if (message.empty()) return; + addEntry(ChatEntry::Type::INFO, message); +} + +void TuiConsole::printThought(const std::string& thought) { + if (thought.empty()) return; + addEntry(ChatEntry::Type::INFO, "Thinking: " + thought); +} + +void TuiConsole::printGoal(const std::string& goal) { + if (goal.empty()) return; + addEntry(ChatEntry::Type::INFO, "Goal: " + goal); +} + +void TuiConsole::printPlan(const json& plan, int currentStep) { + if (!plan.is_array() || plan.empty()) return; + + std::ostringstream oss; + oss << "Plan (" << plan.size() << " steps):"; + int idx = 0; + for (const auto& step : plan) { + std::string marker = (idx == currentStep) ? " >> " : " "; + std::string toolName = step.value("tool", "???"); + oss << "\n" << marker << (idx + 1) << ". " << toolName; + ++idx; + } + addEntry(ChatEntry::Type::INFO, oss.str()); +} + +// --------------------------------------------------------------------------- +// OutputHandler: tool execution +// --------------------------------------------------------------------------- + +void TuiConsole::printToolUsage(const std::string& toolName) { + addEntry(ChatEntry::Type::TOOL, "Using tool: " + toolName + "..."); +} + +void TuiConsole::printToolComplete() { + addEntry(ChatEntry::Type::TOOL, "Tool completed"); +} + +void TuiConsole::prettyPrintJson(const json& data, const std::string& title) { + if (data.empty()) return; + std::ostringstream oss; + if (!title.empty()) { + oss << title << ": "; + } + oss << data.dump(2); + addEntry(ChatEntry::Type::INFO, oss.str()); +} + +// --------------------------------------------------------------------------- +// OutputHandler: status messages +// --------------------------------------------------------------------------- + +void TuiConsole::printError(const std::string& message) { + if (message.empty()) return; + addEntry(ChatEntry::Type::ERROR, message); +} + +void TuiConsole::printWarning(const std::string& message) { + if (message.empty()) return; + addEntry(ChatEntry::Type::WARNING, message); +} + +void TuiConsole::printInfo(const std::string& message) { + if (message.empty()) return; + addEntry(ChatEntry::Type::INFO, message); +} + +// --------------------------------------------------------------------------- +// OutputHandler: progress indicators +// --------------------------------------------------------------------------- + +void TuiConsole::startProgress(const std::string& message) { + std::lock_guard lock(mutex_); + progressMessage_ = message; +} + +void TuiConsole::stopProgress() { + std::lock_guard lock(mutex_); + progressMessage_.clear(); +} + +// --------------------------------------------------------------------------- +// OutputHandler: completion +// --------------------------------------------------------------------------- + +void TuiConsole::printFinalAnswer(const std::string& answer, + const UsageStats& /*usage*/) { + if (answer.empty()) return; + addEntry(ChatEntry::Type::ASSISTANT, answer); +} + +void TuiConsole::printCompletion(int stepsTaken, int stepsLimit) { + std::ostringstream oss; + oss << "Completed in " << stepsTaken << "/" << stepsLimit << " steps"; + addEntry(ChatEntry::Type::INFO, oss.str()); +} + +void TuiConsole::printDecisionMenu(const std::vector& decisions) { + if (decisions.empty()) return; + + std::ostringstream oss; + oss << "Choose an option:"; + for (size_t i = 0; i < decisions.size(); ++i) { + oss << "\n [" << (i + 1) << "] " << decisions[i].label; + if (!decisions[i].description.empty()) { + oss << " - " << decisions[i].description; + } + } + addEntry(ChatEntry::Type::INFO, oss.str()); +} + +// --------------------------------------------------------------------------- +// OutputHandler: streaming +// --------------------------------------------------------------------------- + +void TuiConsole::printStreamToken(const std::string& token) { + std::lock_guard lock(mutex_); + if (!streaming_) { + // Start a new assistant entry for streaming + entries_.push_back({ChatEntry::Type::ASSISTANT, ""}); + streaming_ = true; + streamBuffer_.clear(); + } + streamBuffer_ += token; + // Update the last entry's content with accumulated tokens + if (!entries_.empty()) { + entries_.back().content = streamBuffer_; + } +} + +void TuiConsole::printStreamEnd() { + std::lock_guard lock(mutex_); + if (streaming_ && !entries_.empty()) { + entries_.back().content = streamBuffer_; + } + streaming_ = false; + streamBuffer_.clear(); +} + +// --------------------------------------------------------------------------- +// FTXUI element accessors +// --------------------------------------------------------------------------- + +std::vector TuiConsole::getChatElements() { + std::lock_guard lock(mutex_); + std::vector elements; + elements.reserve(entries_.size()); + + for (const auto& entry : entries_) { + switch (entry.type) { + case ChatEntry::Type::USER: + elements.push_back( + hbox(text("> ") | bold, paragraph(entry.content)) + ); + break; + + case ChatEntry::Type::ASSISTANT: + elements.push_back(renderMarkdown(entry.content)); + break; + + case ChatEntry::Type::TOOL: + elements.push_back(text(entry.content) | dim); + break; + + case ChatEntry::Type::INFO: + elements.push_back( + text(entry.content) | color(Color::Blue) + ); + break; + + case ChatEntry::Type::ERROR: + elements.push_back( + text("Error: " + entry.content) | color(Color::Red) | bold + ); + break; + + case ChatEntry::Type::WARNING: + elements.push_back( + text("Warning: " + entry.content) | color(Color::Yellow) + ); + break; + } + } + + // Append progress indicator if active + if (!progressMessage_.empty()) { + elements.push_back( + text(progressMessage_ + "...") | dim | blink + ); + } + + return elements; +} + +Element TuiConsole::getStatusBar() { + std::lock_guard lock(mutex_); + return hbox( + text(currentModel_.empty() ? "model" : currentModel_) | bold, + separator(), + text("step " + std::to_string(currentStep_) + "/" + std::to_string(maxSteps_)) + ); +} + +} // namespace gaia + +#endif // GAIA_HAS_TUI diff --git a/cpp/src/tui_markdown.cpp b/cpp/src/tui_markdown.cpp new file mode 100644 index 000000000..4b0e23433 --- /dev/null +++ b/cpp/src/tui_markdown.cpp @@ -0,0 +1,290 @@ +// Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved. +// SPDX-License-Identifier: MIT +// +// Standalone markdown-to-FTXUI element renderer. +// +// Supported syntax (C++17 parser -- no external deps): +// - # Headings (H1-H3: bold + color) +// - **bold** text +// - `inline code` (dim/inverted) +// - ```fenced code blocks``` (bordered, with optional language label) +// - - Bullet lists (indented) +// - > Blockquotes (dim + border) +// - Regular paragraphs (word-wrapped) +// +// Unsupported syntax is rendered as plain text (graceful degradation). + +#ifdef GAIA_HAS_TUI + +#include +#include + +#include + +namespace gaia { + +using namespace ftxui; + +// --------------------------------------------------------------------------- +// Inline formatting: scan a single line for **bold** and `inline code` +// --------------------------------------------------------------------------- +namespace { + +/// Parse inline formatting within a single line and return an hbox of Elements. +Element parseInline(const std::string& line) { + if (line.empty()) { + return text(""); + } + + Elements parts; + size_t i = 0; + std::string current; + + auto flushCurrent = [&]() { + if (!current.empty()) { + parts.push_back(text(current)); + current.clear(); + } + }; + + while (i < line.size()) { + // Check for **bold** + if (i + 1 < line.size() && line[i] == '*' && line[i + 1] == '*') { + flushCurrent(); + size_t end = line.find("**", i + 2); + if (end != std::string::npos) { + std::string boldText = line.substr(i + 2, end - (i + 2)); + parts.push_back(text(boldText) | bold); + i = end + 2; + continue; + } + // Unclosed **: treat as literal + current += '*'; + ++i; + continue; + } + + // Check for `inline code` + if (line[i] == '`') { + flushCurrent(); + size_t end = line.find('`', i + 1); + if (end != std::string::npos) { + std::string codeText = line.substr(i + 1, end - (i + 1)); + parts.push_back(text(codeText) | dim | inverted); + i = end + 1; + continue; + } + // Unclosed `: treat as literal + current += '`'; + ++i; + continue; + } + + current += line[i]; + ++i; + } + + flushCurrent(); + + if (parts.empty()) { + return text(""); + } + if (parts.size() == 1) { + return parts[0]; + } + return hbox(std::move(parts)); +} + +/// Split a string by a delimiter character. +std::vector splitLines(const std::string& s) { + std::vector result; + std::string line; + for (char c : s) { + if (c == '\n') { + result.push_back(line); + line.clear(); + } else { + line += c; + } + } + // Include last line even without trailing newline + result.push_back(line); + return result; +} + +/// Trim leading whitespace from a string. +std::string ltrim(const std::string& s) { + size_t start = s.find_first_not_of(" \t"); + return (start == std::string::npos) ? "" : s.substr(start); +} + +/// Check if a line starts with a given prefix. +bool startsWith(const std::string& s, const std::string& prefix) { + return s.size() >= prefix.size() && s.compare(0, prefix.size(), prefix) == 0; +} + +} // anonymous namespace + +// --------------------------------------------------------------------------- +// renderMarkdown — public entry point +// --------------------------------------------------------------------------- + +Element renderMarkdown(const std::string& markdown) { + if (markdown.empty()) { + return text(""); + } + + auto lines = splitLines(markdown); + Elements blocks; + + enum class State { NORMAL, IN_CODE_BLOCK, IN_BLOCKQUOTE }; + State state = State::NORMAL; + + std::string codeLang; + Elements codeLines; + Elements quoteLines; + + auto flushCodeBlock = [&]() { + Element codeContent; + if (codeLines.empty()) { + codeContent = text(""); + } else { + codeContent = vbox(std::move(codeLines)); + } + + Elements codeBox; + if (!codeLang.empty()) { + codeBox.push_back(text(" " + codeLang + " ") | dim | bold); + } + codeBox.push_back(codeContent | dim); + + blocks.push_back(vbox(std::move(codeBox)) | borderLight); + codeLines.clear(); + codeLang.clear(); + }; + + auto flushBlockquote = [&]() { + if (quoteLines.empty()) return; + Element content = vbox(std::move(quoteLines)); + blocks.push_back( + hbox(text(" ") | dim, separatorLight(), text(" "), content) | dim + ); + quoteLines.clear(); + }; + + // Process a single line in NORMAL state. Extracted so that the blockquote + // exit path can re-process the current line without goto. + auto processNormal = [&](const std::string& rawLine) { + std::string trimmed = ltrim(rawLine); + + // Empty line: paragraph break + if (trimmed.empty()) { + blocks.push_back(text("")); + return; + } + + // Fenced code block start + if (startsWith(trimmed, "```")) { + codeLang = trimmed.substr(3); + // Trim the language tag + size_t end = codeLang.find_first_of(" \t\n\r"); + if (end != std::string::npos) { + codeLang = codeLang.substr(0, end); + } + state = State::IN_CODE_BLOCK; + return; + } + + // Blockquote + if (startsWith(rawLine, "> ") || rawLine == ">") { + state = State::IN_BLOCKQUOTE; + if (startsWith(rawLine, "> ")) { + quoteLines.push_back(parseInline(rawLine.substr(2))); + } else { + quoteLines.push_back(text("")); + } + return; + } + + // Headings (check longest prefix first to avoid false matches) + if (startsWith(trimmed, "### ")) { + std::string heading = trimmed.substr(4); + blocks.push_back(text(heading) | bold); + return; + } + if (startsWith(trimmed, "## ")) { + std::string heading = trimmed.substr(3); + blocks.push_back( + text(heading) | bold | color(Color::Blue) + ); + return; + } + if (startsWith(trimmed, "# ")) { + std::string heading = trimmed.substr(2); + blocks.push_back(text(heading) | bold | underlined); + return; + } + + // Bullet list item + if (startsWith(trimmed, "- ")) { + std::string item = trimmed.substr(2); + blocks.push_back( + hbox(text(" * ") | bold, parseInline(item)) + ); + return; + } + + // Regular paragraph line with inline formatting + blocks.push_back(parseInline(trimmed)); + }; + + for (const auto& rawLine : lines) { + switch (state) { + case State::IN_CODE_BLOCK: { + if (startsWith(ltrim(rawLine), "```")) { + flushCodeBlock(); + state = State::NORMAL; + } else { + codeLines.push_back(text(rawLine)); + } + break; + } + + case State::IN_BLOCKQUOTE: { + if (startsWith(rawLine, "> ")) { + quoteLines.push_back(parseInline(rawLine.substr(2))); + } else if (rawLine == ">") { + quoteLines.push_back(text("")); + } else { + // End of blockquote — re-process line in NORMAL state + flushBlockquote(); + state = State::NORMAL; + processNormal(rawLine); + } + break; + } + + case State::NORMAL: { + processNormal(rawLine); + break; + } + } + } + + // Flush any unclosed blocks (graceful degradation) + if (state == State::IN_CODE_BLOCK) { + flushCodeBlock(); + } + if (state == State::IN_BLOCKQUOTE) { + flushBlockquote(); + } + + if (blocks.empty()) { + return text(""); + } + return vbox(std::move(blocks)); +} + +} // namespace gaia + +#endif // GAIA_HAS_TUI diff --git a/cpp/src/types.cpp b/cpp/src/types.cpp index f48cbef55..dc14192aa 100644 --- a/cpp/src/types.cpp +++ b/cpp/src/types.cpp @@ -155,6 +155,7 @@ AgentConfig AgentConfig::fromJson(const json& j) { c.showPrompts = j.value("showPrompts", c.showPrompts); c.streaming = j.value("streaming", c.streaming); c.silentMode = j.value("silentMode", c.silentMode); + c.structuredEvents = j.value("structuredEvents", c.structuredEvents); c.temperature = j.value("temperature", c.temperature); c.validate(); return c; @@ -189,6 +190,7 @@ json AgentConfig::toJson() const { {"showPrompts", showPrompts}, {"streaming", streaming}, {"silentMode", silentMode}, + {"structuredEvents", structuredEvents}, {"temperature", temperature} }; } diff --git a/cpp/tests/test_file_tools.cpp b/cpp/tests/test_file_tools.cpp new file mode 100644 index 000000000..0120f9aff --- /dev/null +++ b/cpp/tests/test_file_tools.cpp @@ -0,0 +1,352 @@ +// Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved. +// SPDX-License-Identifier: MIT + +#include +#include +#include + +#include +#include +#include + +namespace fs = std::filesystem; +using namespace gaia; + +class FileToolsTest : public ::testing::Test { +protected: + fs::path tempDir_; + + void SetUp() override { + tempDir_ = fs::temp_directory_path() / "gaia_file_tools_test"; + fs::create_directories(tempDir_); + } + + void TearDown() override { + std::error_code ec; + fs::remove_all(tempDir_, ec); + } + + /// Write a helper file and return its path. + std::string writeFile(const std::string& name, const std::string& content) { + fs::path p = tempDir_ / name; + if (p.has_parent_path()) { + fs::create_directories(p.parent_path()); + } + std::ofstream f(p, std::ios::binary); + f << content; + f.close(); + return p.string(); + } + + /// Read a file back for verification. + std::string readFile(const std::string& path) { + std::ifstream f(path); + std::ostringstream buf; + buf << f.rdbuf(); + return buf.str(); + } +}; + +// --------------------------------------------------------------------------- +// file_read tests +// --------------------------------------------------------------------------- + +TEST_F(FileToolsTest, FileRead_BasicContent) { + std::string path = writeFile("hello.txt", "line1\nline2\nline3\n"); + + ToolInfo tool = FileIOTools::fileRead(); + ASSERT_TRUE(tool.callback); + + json result = tool.callback({{"path", path}}); + EXPECT_FALSE(result.contains("error")); + EXPECT_EQ(result["path"], path); + EXPECT_EQ(result["lines"], 3); + // Content should contain all three lines + std::string content = result["content"].get(); + EXPECT_TRUE(content.find("line1") != std::string::npos); + EXPECT_TRUE(content.find("line2") != std::string::npos); + EXPECT_TRUE(content.find("line3") != std::string::npos); +} + +TEST_F(FileToolsTest, FileRead_WithLineRange) { + std::string path = writeFile("lines.txt", "AAA\nBBB\nCCC\nDDD\nEEE\n"); + + ToolInfo tool = FileIOTools::fileRead(); + json result = tool.callback({{"path", path}, {"start_line", 2}, {"end_line", 4}}); + + EXPECT_FALSE(result.contains("error")); + EXPECT_EQ(result["lines"], 5); + + std::string content = result["content"].get(); + EXPECT_TRUE(content.find("BBB") != std::string::npos); + EXPECT_TRUE(content.find("CCC") != std::string::npos); + EXPECT_TRUE(content.find("DDD") != std::string::npos); + EXPECT_TRUE(content.find("AAA") == std::string::npos); + EXPECT_TRUE(content.find("EEE") == std::string::npos); +} + +TEST_F(FileToolsTest, FileRead_MissingFile) { + ToolInfo tool = FileIOTools::fileRead(); + json result = tool.callback({{"path", (tempDir_ / "nonexistent.txt").string()}}); + + EXPECT_TRUE(result.contains("error")); + EXPECT_TRUE(result["error"].get().find("Cannot open") != std::string::npos); +} + +TEST_F(FileToolsTest, FileRead_EmptyPath) { + ToolInfo tool = FileIOTools::fileRead(); + json result = tool.callback({{"path", ""}}); + + EXPECT_TRUE(result.contains("error")); + EXPECT_TRUE(result["error"].get().find("required") != std::string::npos); +} + +// --------------------------------------------------------------------------- +// file_write tests +// --------------------------------------------------------------------------- + +TEST_F(FileToolsTest, FileWrite_BasicWrite) { + std::string path = (tempDir_ / "output.txt").string(); + + ToolInfo tool = FileIOTools::fileWrite(); + ASSERT_TRUE(tool.callback); + + json result = tool.callback({{"path", path}, {"content", "Hello, world!"}}); + EXPECT_FALSE(result.contains("error")); + EXPECT_EQ(result["success"], true); + EXPECT_EQ(result["path"], path); + EXPECT_EQ(result["bytes_written"], 13); + + // Verify on disk + EXPECT_EQ(readFile(path), "Hello, world!"); +} + +TEST_F(FileToolsTest, FileWrite_CreatesParentDirs) { + std::string path = (tempDir_ / "sub" / "dir" / "nested.txt").string(); + + ToolInfo tool = FileIOTools::fileWrite(); + json result = tool.callback({{"path", path}, {"content", "nested content"}}); + + EXPECT_FALSE(result.contains("error")); + EXPECT_EQ(result["success"], true); + EXPECT_TRUE(fs::exists(path)); + EXPECT_EQ(readFile(path), "nested content"); +} + +TEST_F(FileToolsTest, FileWrite_EmptyPath) { + ToolInfo tool = FileIOTools::fileWrite(); + json result = tool.callback({{"path", ""}, {"content", "data"}}); + + EXPECT_TRUE(result.contains("error")); +} + +TEST_F(FileToolsTest, FileWrite_MissingContent) { + std::string path = (tempDir_ / "no_content.txt").string(); + + ToolInfo tool = FileIOTools::fileWrite(); + json result = tool.callback({{"path", path}}); + + EXPECT_TRUE(result.contains("error")); + EXPECT_TRUE(result["error"].get().find("content") != std::string::npos); +} + +// --------------------------------------------------------------------------- +// file_edit tests +// --------------------------------------------------------------------------- + +TEST_F(FileToolsTest, FileEdit_BasicReplacement) { + std::string path = writeFile("edit_me.txt", "foo bar baz foo"); + + ToolInfo tool = FileIOTools::fileEdit(); + ASSERT_TRUE(tool.callback); + + json result = tool.callback({{"path", path}, {"old_string", "foo"}, {"new_string", "qux"}}); + EXPECT_FALSE(result.contains("error")); + EXPECT_EQ(result["success"], true); + EXPECT_EQ(result["replacements"], 2); + EXPECT_EQ(result["path"], path); + + EXPECT_EQ(readFile(path), "qux bar baz qux"); +} + +TEST_F(FileToolsTest, FileEdit_StringNotFound) { + std::string path = writeFile("no_match.txt", "hello world"); + + ToolInfo tool = FileIOTools::fileEdit(); + json result = tool.callback({{"path", path}, {"old_string", "xyz"}, {"new_string", "abc"}}); + + EXPECT_TRUE(result.contains("error")); + EXPECT_TRUE(result["error"].get().find("not found") != std::string::npos); +} + +TEST_F(FileToolsTest, FileEdit_MissingFile) { + ToolInfo tool = FileIOTools::fileEdit(); + json result = tool.callback({ + {"path", (tempDir_ / "gone.txt").string()}, + {"old_string", "a"}, + {"new_string", "b"}, + }); + + EXPECT_TRUE(result.contains("error")); + EXPECT_TRUE(result["error"].get().find("Cannot open") != std::string::npos); +} + +TEST_F(FileToolsTest, FileEdit_EmptyOldString) { + std::string path = writeFile("empty_old.txt", "data"); + + ToolInfo tool = FileIOTools::fileEdit(); + json result = tool.callback({{"path", path}, {"old_string", ""}, {"new_string", "x"}}); + + EXPECT_TRUE(result.contains("error")); + EXPECT_TRUE(result["error"].get().find("old_string") != std::string::npos); +} + +// --------------------------------------------------------------------------- +// file_search tests +// --------------------------------------------------------------------------- + +TEST_F(FileToolsTest, FileSearch_ByNamePattern) { + writeFile("alpha.cpp", "int main() {}"); + writeFile("beta.cpp", "void foo() {}"); + writeFile("gamma.h", "#pragma once"); + + ToolInfo tool = FileIOTools::fileSearch(); + ASSERT_TRUE(tool.callback); + + json result = tool.callback({{"pattern", "*.cpp"}, {"path", tempDir_.string()}}); + EXPECT_FALSE(result.contains("error")); + EXPECT_EQ(result["total"], 2); + EXPECT_EQ(result["matches"].size(), 2u); +} + +TEST_F(FileToolsTest, FileSearch_WithContentPattern) { + writeFile("a.txt", "hello world\ngoodbye world\n"); + writeFile("b.txt", "nothing here\n"); + writeFile("c.txt", "hello again\n"); + + ToolInfo tool = FileIOTools::fileSearch(); + json result = tool.callback({ + {"pattern", "*.txt"}, + {"path", tempDir_.string()}, + {"content_pattern", "hello"}, + }); + + EXPECT_FALSE(result.contains("error")); + // a.txt has "hello" on line 1, c.txt has "hello" on line 1 => 2 matches + EXPECT_EQ(result["total"], 2); + + // Each match should have line and context + for (const auto& m : result["matches"]) { + EXPECT_TRUE(m.contains("line")); + EXPECT_TRUE(m.contains("context")); + std::string ctx = m["context"].get(); + EXPECT_TRUE(ctx.find("hello") != std::string::npos); + } +} + +TEST_F(FileToolsTest, FileSearch_NonexistentPath) { + ToolInfo tool = FileIOTools::fileSearch(); + json result = tool.callback({{"pattern", "*"}, {"path", (tempDir_ / "nope").string()}}); + + EXPECT_TRUE(result.contains("error")); + EXPECT_TRUE(result["error"].get().find("does not exist") != std::string::npos); +} + +TEST_F(FileToolsTest, FileSearch_EmptyPattern) { + ToolInfo tool = FileIOTools::fileSearch(); + json result = tool.callback({{"pattern", ""}, {"path", tempDir_.string()}}); + + EXPECT_TRUE(result.contains("error")); + EXPECT_TRUE(result["error"].get().find("required") != std::string::npos); +} + +TEST_F(FileToolsTest, FileSearch_MaxResults) { + // Create more files than max_results + for (int i = 0; i < 10; ++i) { + writeFile("file" + std::to_string(i) + ".txt", "content"); + } + + ToolInfo tool = FileIOTools::fileSearch(); + json result = tool.callback({ + {"pattern", "*.txt"}, + {"path", tempDir_.string()}, + {"max_results", 3}, + }); + + EXPECT_FALSE(result.contains("error")); + EXPECT_EQ(result["total"], 10); + EXPECT_EQ(result["matches"].size(), 3u); +} + +// --------------------------------------------------------------------------- +// registerAll +// --------------------------------------------------------------------------- + +TEST_F(FileToolsTest, RegisterAll_RegistersAllTools) { + ToolRegistry registry; + FileIOTools::registerAll(registry); + + EXPECT_EQ(registry.size(), 4u); + EXPECT_TRUE(registry.hasTool("file_read")); + EXPECT_TRUE(registry.hasTool("file_write")); + EXPECT_TRUE(registry.hasTool("file_edit")); + EXPECT_TRUE(registry.hasTool("file_search")); +} + +// --------------------------------------------------------------------------- +// ToolInfo structure validation +// --------------------------------------------------------------------------- + +TEST_F(FileToolsTest, ToolInfo_FileReadParams) { + ToolInfo info = FileIOTools::fileRead(); + EXPECT_EQ(info.name, "file_read"); + EXPECT_EQ(info.policy, ToolPolicy::ALLOW); + EXPECT_EQ(info.parameters.size(), 3u); + // First param: path (required) + EXPECT_EQ(info.parameters[0].name, "path"); + EXPECT_TRUE(info.parameters[0].required); + // Second/third params: optional + EXPECT_EQ(info.parameters[1].name, "start_line"); + EXPECT_FALSE(info.parameters[1].required); + EXPECT_EQ(info.parameters[2].name, "end_line"); + EXPECT_FALSE(info.parameters[2].required); +} + +TEST_F(FileToolsTest, ToolInfo_FileWriteParams) { + ToolInfo info = FileIOTools::fileWrite(); + EXPECT_EQ(info.name, "file_write"); + EXPECT_EQ(info.policy, ToolPolicy::CONFIRM); + EXPECT_EQ(info.parameters.size(), 2u); + EXPECT_EQ(info.parameters[0].name, "path"); + EXPECT_TRUE(info.parameters[0].required); + EXPECT_EQ(info.parameters[1].name, "content"); + EXPECT_TRUE(info.parameters[1].required); +} + +TEST_F(FileToolsTest, ToolInfo_FileEditParams) { + ToolInfo info = FileIOTools::fileEdit(); + EXPECT_EQ(info.name, "file_edit"); + EXPECT_EQ(info.policy, ToolPolicy::CONFIRM); + EXPECT_EQ(info.parameters.size(), 3u); + EXPECT_EQ(info.parameters[0].name, "path"); + EXPECT_EQ(info.parameters[1].name, "old_string"); + EXPECT_EQ(info.parameters[2].name, "new_string"); + for (const auto& p : info.parameters) { + EXPECT_TRUE(p.required); + } +} + +TEST_F(FileToolsTest, ToolInfo_FileSearchParams) { + ToolInfo info = FileIOTools::fileSearch(); + EXPECT_EQ(info.name, "file_search"); + EXPECT_EQ(info.policy, ToolPolicy::ALLOW); + EXPECT_EQ(info.parameters.size(), 4u); + EXPECT_EQ(info.parameters[0].name, "pattern"); + EXPECT_TRUE(info.parameters[0].required); + EXPECT_EQ(info.parameters[1].name, "path"); + EXPECT_FALSE(info.parameters[1].required); + EXPECT_EQ(info.parameters[2].name, "content_pattern"); + EXPECT_FALSE(info.parameters[2].required); + EXPECT_EQ(info.parameters[3].name, "max_results"); + EXPECT_FALSE(info.parameters[3].required); +} diff --git a/cpp/tests/test_git_tools.cpp b/cpp/tests/test_git_tools.cpp new file mode 100644 index 000000000..e366a8e04 --- /dev/null +++ b/cpp/tests/test_git_tools.cpp @@ -0,0 +1,237 @@ +// Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved. +// SPDX-License-Identifier: MIT + +#include +#include +#include + +#include + +using namespace gaia; + +// --------------------------------------------------------------------------- +// git_status +// --------------------------------------------------------------------------- + +TEST(GitToolsTest, StatusReturnsExpectedKeys) { + ToolInfo info = GitTools::gitStatus(); + ASSERT_TRUE(info.callback); + + json result = info.callback(json::object()); + + // Must have either "status"+"clean" keys or "error" key + if (result.contains("error")) { + EXPECT_TRUE(result["error"].is_string()); + } else { + EXPECT_TRUE(result.contains("status")); + EXPECT_TRUE(result.contains("clean")); + EXPECT_TRUE(result["clean"].is_boolean()); + } +} + +TEST(GitToolsTest, StatusToolInfo) { + ToolInfo info = GitTools::gitStatus(); + EXPECT_EQ(info.name, "git_status"); + EXPECT_FALSE(info.description.empty()); + EXPECT_EQ(info.policy, ToolPolicy::ALLOW); + EXPECT_TRUE(info.parameters.empty()); +} + +// --------------------------------------------------------------------------- +// git_diff +// --------------------------------------------------------------------------- + +TEST(GitToolsTest, DiffReturnsExpectedKeys) { + ToolInfo info = GitTools::gitDiff(); + ASSERT_TRUE(info.callback); + + json result = info.callback(json::object()); + + if (result.contains("error")) { + EXPECT_TRUE(result["error"].is_string()); + } else { + EXPECT_TRUE(result.contains("diff")); + EXPECT_TRUE(result.contains("files_changed")); + EXPECT_TRUE(result["files_changed"].is_number_integer()); + } +} + +TEST(GitToolsTest, DiffToolInfo) { + ToolInfo info = GitTools::gitDiff(); + EXPECT_EQ(info.name, "git_diff"); + EXPECT_FALSE(info.description.empty()); + EXPECT_EQ(info.policy, ToolPolicy::ALLOW); + EXPECT_EQ(info.parameters.size(), 3u); +} + +// --------------------------------------------------------------------------- +// git_log +// --------------------------------------------------------------------------- + +TEST(GitToolsTest, LogReturnsExpectedKeys) { + ToolInfo info = GitTools::gitLog(); + ASSERT_TRUE(info.callback); + + json result = info.callback(json::object()); + + if (result.contains("error")) { + EXPECT_TRUE(result["error"].is_string()); + } else { + EXPECT_TRUE(result.contains("log")); + EXPECT_TRUE(result.contains("commits")); + EXPECT_TRUE(result["commits"].is_number_integer()); + } +} + +TEST(GitToolsTest, LogDefaultCount) { + ToolInfo info = GitTools::gitLog(); + + // Default count is 10 — verify we get at most 10 commits + json result = info.callback(json::object()); + + if (!result.contains("error")) { + EXPECT_LE(result["commits"].get(), 10); + EXPECT_GT(result["commits"].get(), 0); + } +} + +TEST(GitToolsTest, LogRespectsCount) { + ToolInfo info = GitTools::gitLog(); + + json args = {{"count", 3}}; + json result = info.callback(args); + + if (!result.contains("error")) { + EXPECT_LE(result["commits"].get(), 3); + EXPECT_GT(result["commits"].get(), 0); + } +} + +TEST(GitToolsTest, LogToolInfo) { + ToolInfo info = GitTools::gitLog(); + EXPECT_EQ(info.name, "git_log"); + EXPECT_FALSE(info.description.empty()); + EXPECT_EQ(info.policy, ToolPolicy::ALLOW); + EXPECT_EQ(info.parameters.size(), 3u); + + // Verify parameter names + EXPECT_EQ(info.parameters[0].name, "count"); + EXPECT_EQ(info.parameters[0].type, ToolParamType::INTEGER); + EXPECT_FALSE(info.parameters[0].required); + + EXPECT_EQ(info.parameters[1].name, "oneline"); + EXPECT_EQ(info.parameters[1].type, ToolParamType::BOOLEAN); + EXPECT_FALSE(info.parameters[1].required); + + EXPECT_EQ(info.parameters[2].name, "path"); + EXPECT_EQ(info.parameters[2].type, ToolParamType::STRING); + EXPECT_FALSE(info.parameters[2].required); +} + +// --------------------------------------------------------------------------- +// git_show +// --------------------------------------------------------------------------- + +TEST(GitToolsTest, ShowReturnsContentForHEAD) { + ToolInfo info = GitTools::gitShow(); + ASSERT_TRUE(info.callback); + + json result = info.callback(json::object()); + + if (result.contains("error")) { + EXPECT_TRUE(result["error"].is_string()); + } else { + EXPECT_TRUE(result.contains("content")); + EXPECT_TRUE(result.contains("ref")); + EXPECT_EQ(result["ref"].get(), "HEAD"); + EXPECT_FALSE(result["content"].get().empty()); + } +} + +TEST(GitToolsTest, ShowWithBadRefReturnsError) { + ToolInfo info = GitTools::gitShow(); + ASSERT_TRUE(info.callback); + + json args = {{"ref", "nonexistent_ref_abc123xyz"}}; + json result = info.callback(args); + + // Should return an error for a ref that doesn't exist + EXPECT_TRUE(result.contains("error")); + EXPECT_TRUE(result["error"].is_string()); +} + +TEST(GitToolsTest, ShowToolInfo) { + ToolInfo info = GitTools::gitShow(); + EXPECT_EQ(info.name, "git_show"); + EXPECT_FALSE(info.description.empty()); + EXPECT_EQ(info.policy, ToolPolicy::ALLOW); + EXPECT_EQ(info.parameters.size(), 1u); + EXPECT_EQ(info.parameters[0].name, "ref"); + EXPECT_EQ(info.parameters[0].type, ToolParamType::STRING); + EXPECT_FALSE(info.parameters[0].required); +} + +// --------------------------------------------------------------------------- +// registerAll +// --------------------------------------------------------------------------- + +TEST(GitToolsTest, RegisterAllAddsAllTools) { + ToolRegistry registry; + + GitTools::registerAll(registry); + + EXPECT_EQ(registry.size(), 4u); + EXPECT_TRUE(registry.hasTool("git_status")); + EXPECT_TRUE(registry.hasTool("git_diff")); + EXPECT_TRUE(registry.hasTool("git_log")); + EXPECT_TRUE(registry.hasTool("git_show")); +} + +// --------------------------------------------------------------------------- +// Security: shell metacharacter rejection +// --------------------------------------------------------------------------- + +TEST(GitToolsTest, ShowRejectsUnsafeRef) { + ToolInfo info = GitTools::gitShow(); + + // Semicolon injection + json args1 = {{"ref", "HEAD; rm -rf /"}}; + json result1 = info.callback(args1); + EXPECT_TRUE(result1.contains("error")); + EXPECT_NE(result1["error"].get().find("unsafe"), std::string::npos); + + // Pipe injection + json args2 = {{"ref", "HEAD | cat /etc/passwd"}}; + json result2 = info.callback(args2); + EXPECT_TRUE(result2.contains("error")); + + // Backtick injection + json args3 = {{"ref", "`whoami`"}}; + json result3 = info.callback(args3); + EXPECT_TRUE(result3.contains("error")); +} + +TEST(GitToolsTest, DiffRejectsUnsafePath) { + ToolInfo info = GitTools::gitDiff(); + + json args = {{"path", "file.txt; cat /etc/shadow"}}; + json result = info.callback(args); + EXPECT_TRUE(result.contains("error")); + EXPECT_NE(result["error"].get().find("unsafe"), std::string::npos); +} + +TEST(GitToolsTest, DiffRejectsUnsafeRef) { + ToolInfo info = GitTools::gitDiff(); + + json args = {{"ref", "main && whoami"}}; + json result = info.callback(args); + EXPECT_TRUE(result.contains("error")); +} + +TEST(GitToolsTest, LogRejectsUnsafePath) { + ToolInfo info = GitTools::gitLog(); + + json args = {{"path", "$(evil)"}}; + json result = info.callback(args); + EXPECT_TRUE(result.contains("error")); +} diff --git a/cpp/tests/test_json_event_handler.cpp b/cpp/tests/test_json_event_handler.cpp new file mode 100644 index 000000000..0786fda15 --- /dev/null +++ b/cpp/tests/test_json_event_handler.cpp @@ -0,0 +1,417 @@ +// Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved. +// SPDX-License-Identifier: MIT + +#include +#include + +#include + +using namespace gaia; +using json = nlohmann::json; + +// --------------------------------------------------------------------------- +// RAII helper to capture stdout into an ostringstream. +// --------------------------------------------------------------------------- +class CoutCapture { +public: + CoutCapture() : captured_(), oldBuf_(std::cout.rdbuf(captured_.rdbuf())) {} + ~CoutCapture() { std::cout.rdbuf(oldBuf_); } + + std::string str() const { return captured_.str(); } + + /// Parse the captured output as one or more JSONL lines. + /// Returns a vector of parsed JSON objects. + std::vector lines() const { + std::vector result; + std::istringstream iss(captured_.str()); + std::string line; + while (std::getline(iss, line)) { + if (!line.empty()) { + result.push_back(json::parse(line)); + } + } + return result; + } + + /// Parse the first (and usually only) JSONL line. + json first() const { + auto l = lines(); + EXPECT_FALSE(l.empty()) << "Expected at least one JSONL line, got none"; + return l.empty() ? json{} : l[0]; + } + +private: + std::ostringstream captured_; + std::streambuf* oldBuf_; +}; + +// =========================================================================== +// Step Events +// =========================================================================== + +TEST(JsonEventHandlerTest, StepHeader) { + JsonEventOutputHandler handler; + CoutCapture cap; + handler.printStepHeader(3, 10); + auto ev = cap.first(); + EXPECT_EQ(ev["type"], "step"); + EXPECT_EQ(ev["step"], 3); + EXPECT_EQ(ev["total"], 10); + EXPECT_EQ(ev["status"], "started"); +} + +// =========================================================================== +// Thinking Events +// =========================================================================== + +TEST(JsonEventHandlerTest, Thought) { + JsonEventOutputHandler handler; + CoutCapture cap; + handler.printThought("Analyzing the request..."); + auto ev = cap.first(); + EXPECT_EQ(ev["type"], "thinking"); + EXPECT_EQ(ev["content"], "Analyzing the request..."); +} + +TEST(JsonEventHandlerTest, EmptyThoughtSkipped) { + JsonEventOutputHandler handler; + CoutCapture cap; + handler.printThought(""); + EXPECT_TRUE(cap.str().empty()); +} + +// =========================================================================== +// Goal / Status Events +// =========================================================================== + +TEST(JsonEventHandlerTest, Goal) { + JsonEventOutputHandler handler; + CoutCapture cap; + handler.printGoal("Check network status"); + auto ev = cap.first(); + EXPECT_EQ(ev["type"], "status"); + EXPECT_EQ(ev["status"], "working"); + EXPECT_EQ(ev["message"], "Check network status"); +} + +TEST(JsonEventHandlerTest, EmptyGoalSkipped) { + JsonEventOutputHandler handler; + CoutCapture cap; + handler.printGoal(""); + EXPECT_TRUE(cap.str().empty()); +} + +TEST(JsonEventHandlerTest, StateInfo) { + JsonEventOutputHandler handler; + CoutCapture cap; + handler.printStateInfo("ERROR RECOVERY"); + auto ev = cap.first(); + EXPECT_EQ(ev["type"], "status"); + EXPECT_EQ(ev["status"], "warning"); + EXPECT_EQ(ev["message"], "ERROR RECOVERY"); +} + +// =========================================================================== +// Plan Events +// =========================================================================== + +TEST(JsonEventHandlerTest, Plan) { + JsonEventOutputHandler handler; + json plan = json::array({{{"tool", "bash_execute"}, {"args", "ls"}}, + {{"tool", "read_file"}, {"args", "foo.txt"}}}); + CoutCapture cap; + handler.printPlan(plan, 1); + auto ev = cap.first(); + EXPECT_EQ(ev["type"], "plan"); + EXPECT_EQ(ev["steps"].size(), 2); + EXPECT_EQ(ev["current_step"], 1); +} + +// =========================================================================== +// Tool Events +// =========================================================================== + +TEST(JsonEventHandlerTest, ToolLifecycle) { + JsonEventOutputHandler handler; + CoutCapture cap; + + handler.printToolUsage("bash_execute"); + handler.prettyPrintJson({{"command", "ls -la"}}, "Tool Args"); + handler.printToolComplete(); + handler.prettyPrintJson({{"status", "success"}, {"stdout", "file1\nfile2"}}, "Tool Result"); + + auto events = cap.lines(); + ASSERT_EQ(events.size(), 4); + + // tool_start + EXPECT_EQ(events[0]["type"], "tool_start"); + EXPECT_EQ(events[0]["tool"], "bash_execute"); + + // tool_args + EXPECT_EQ(events[1]["type"], "tool_args"); + EXPECT_EQ(events[1]["tool"], "bash_execute"); + EXPECT_EQ(events[1]["args"]["command"], "ls -la"); + + // tool_end + EXPECT_EQ(events[2]["type"], "tool_end"); + EXPECT_EQ(events[2]["success"], true); + + // tool_result + EXPECT_EQ(events[3]["type"], "tool_result"); + EXPECT_EQ(events[3]["title"], "bash_execute"); + EXPECT_EQ(events[3]["success"], true); + EXPECT_TRUE(events[3].contains("command_output")); + EXPECT_EQ(events[3]["command_output"]["stdout"], "file1\nfile2"); +} + +TEST(JsonEventHandlerTest, ToolResultError) { + JsonEventOutputHandler handler; + CoutCapture cap; + + handler.printToolUsage("bash_execute"); + handler.prettyPrintJson({{"status", "error"}, {"error", "command not found"}}, "Tool Result"); + + auto events = cap.lines(); + ASSERT_GE(events.size(), 2); + auto result = events[1]; + EXPECT_EQ(result["type"], "tool_result"); + EXPECT_EQ(result["success"], false); + EXPECT_EQ(result["summary"], "command not found"); +} + +// =========================================================================== +// Status Message Events +// =========================================================================== + +TEST(JsonEventHandlerTest, Error) { + JsonEventOutputHandler handler; + CoutCapture cap; + handler.printError("Something went wrong"); + auto ev = cap.first(); + EXPECT_EQ(ev["type"], "agent_error"); + EXPECT_EQ(ev["content"], "Something went wrong"); +} + +TEST(JsonEventHandlerTest, Warning) { + JsonEventOutputHandler handler; + CoutCapture cap; + handler.printWarning("Running low on context"); + auto ev = cap.first(); + EXPECT_EQ(ev["type"], "status"); + EXPECT_EQ(ev["status"], "warning"); + EXPECT_EQ(ev["message"], "Running low on context"); +} + +TEST(JsonEventHandlerTest, Info) { + JsonEventOutputHandler handler; + CoutCapture cap; + handler.printInfo("Model loaded successfully"); + auto ev = cap.first(); + EXPECT_EQ(ev["type"], "status"); + EXPECT_EQ(ev["status"], "info"); + EXPECT_EQ(ev["message"], "Model loaded successfully"); +} + +// =========================================================================== +// Progress Events +// =========================================================================== + +TEST(JsonEventHandlerTest, StartProgress) { + JsonEventOutputHandler handler; + CoutCapture cap; + handler.startProgress("Executing bash_execute"); + auto ev = cap.first(); + EXPECT_EQ(ev["type"], "status"); + EXPECT_EQ(ev["status"], "working"); + EXPECT_EQ(ev["message"], "Executing bash_execute"); +} + +TEST(JsonEventHandlerTest, StopProgressNoEvent) { + JsonEventOutputHandler handler; + CoutCapture cap; + handler.stopProgress(); + EXPECT_TRUE(cap.str().empty()); +} + +// =========================================================================== +// Answer / Completion Events +// =========================================================================== + +TEST(JsonEventHandlerTest, FinalAnswer) { + JsonEventOutputHandler handler; + + // Simulate some steps and tools + { + CoutCapture cap; + handler.printProcessingStart("test query", 10, "model"); + handler.printStepHeader(1, 10); + handler.printToolUsage("bash_execute"); + handler.printToolComplete(); + handler.printStepHeader(2, 10); + } + + CoutCapture cap; + handler.printFinalAnswer("Your WiFi is working correctly."); + auto ev = cap.first(); + EXPECT_EQ(ev["type"], "answer"); + EXPECT_EQ(ev["content"], "Your WiFi is working correctly."); + EXPECT_EQ(ev["steps"], 2); + EXPECT_EQ(ev["tools_used"], 1); + // No usage object when UsageStats is default (zero) + EXPECT_FALSE(ev.contains("usage")); +} + +TEST(JsonEventHandlerTest, FinalAnswerWithUsage) { + JsonEventOutputHandler handler; + + { + CoutCapture cap; + handler.printProcessingStart("test query", 10, "model"); + handler.printStepHeader(1, 10); + } + + UsageStats usage; + usage.promptTokens = 150; + usage.completionTokens = 45; + usage.totalTokens = 195; + + CoutCapture cap; + handler.printFinalAnswer("The answer is 42.", usage); + auto ev = cap.first(); + EXPECT_EQ(ev["type"], "answer"); + EXPECT_EQ(ev["content"], "The answer is 42."); + EXPECT_EQ(ev["steps"], 1); + ASSERT_TRUE(ev.contains("usage")); + EXPECT_EQ(ev["usage"]["prompt_tokens"], 150); + EXPECT_EQ(ev["usage"]["completion_tokens"], 45); + EXPECT_EQ(ev["usage"]["total_tokens"], 195); +} + +TEST(JsonEventHandlerTest, FinalAnswerZeroUsageOmitted) { + JsonEventOutputHandler handler; + + CoutCapture cap; + handler.printFinalAnswer("Result", UsageStats{}); + auto ev = cap.first(); + EXPECT_EQ(ev["type"], "answer"); + EXPECT_FALSE(ev.contains("usage")); +} + +TEST(JsonEventHandlerTest, Completion) { + JsonEventOutputHandler handler; + CoutCapture cap; + handler.printCompletion(5, 10); + auto ev = cap.first(); + EXPECT_EQ(ev["type"], "status"); + EXPECT_EQ(ev["status"], "complete"); + EXPECT_EQ(ev["steps"], 5); + EXPECT_EQ(ev["total"], 10); +} + +// =========================================================================== +// Streaming Events +// =========================================================================== + +TEST(JsonEventHandlerTest, StreamToken) { + JsonEventOutputHandler handler; + CoutCapture cap; + handler.printStreamToken("Hello"); + auto ev = cap.first(); + EXPECT_EQ(ev["type"], "chunk"); + EXPECT_EQ(ev["content"], "Hello"); +} + +TEST(JsonEventHandlerTest, StreamEndNoEvent) { + JsonEventOutputHandler handler; + CoutCapture cap; + handler.printStreamEnd(); + EXPECT_TRUE(cap.str().empty()); +} + +// =========================================================================== +// Processing Reset +// =========================================================================== + +TEST(JsonEventHandlerTest, ProcessingStartResetsCounters) { + JsonEventOutputHandler handler; + + // Run some steps/tools to accumulate counts + { + CoutCapture cap; + handler.printStepHeader(1, 10); + handler.printToolUsage("bash_execute"); + handler.printToolComplete(); + handler.printStepHeader(2, 10); + } + + // Reset + { + CoutCapture cap; + handler.printProcessingStart("new query", 20, "model"); + // No event emitted for processingStart + EXPECT_TRUE(cap.str().empty()); + } + + // Final answer should have reset counters + CoutCapture cap; + handler.printFinalAnswer("Result"); + auto ev = cap.first(); + EXPECT_EQ(ev["steps"], 0); + EXPECT_EQ(ev["tools_used"], 0); +} + +// =========================================================================== +// Generic prettyPrintJson (not Tool Args or Tool Result) +// =========================================================================== + +TEST(JsonEventHandlerTest, GenericPrettyPrintJson) { + JsonEventOutputHandler handler; + CoutCapture cap; + handler.prettyPrintJson({{"key", "value"}}, "Custom"); + auto ev = cap.first(); + EXPECT_EQ(ev["type"], "status"); + EXPECT_EQ(ev["status"], "info"); +} + +// =========================================================================== +// Full Query Simulation +// =========================================================================== + +TEST(JsonEventHandlerTest, FullQueryFlow) { + JsonEventOutputHandler handler; + CoutCapture cap; + + handler.printProcessingStart("list files", 10, "Qwen3-4B"); + handler.printStepHeader(1, 10); + handler.printThought("I need to list the files in the current directory."); + handler.printGoal("List files"); + handler.printToolUsage("bash_execute"); + handler.prettyPrintJson({{"command", "ls"}}, "Tool Args"); + handler.startProgress("Executing bash_execute"); + handler.stopProgress(); + handler.printToolComplete(); + handler.prettyPrintJson({{"status", "success"}, {"stdout", "file1.txt\nfile2.txt"}}, "Tool Result"); + handler.printStepHeader(2, 10); + handler.printFinalAnswer("The directory contains file1.txt and file2.txt."); + handler.printCompletion(2, 10); + + auto events = cap.lines(); + + // Count event types + int steps = 0, thinking = 0, tool_starts = 0, answers = 0; + for (const auto& ev : events) { + if (ev["type"] == "step") ++steps; + if (ev["type"] == "thinking") ++thinking; + if (ev["type"] == "tool_start") ++tool_starts; + if (ev["type"] == "answer") ++answers; + } + + EXPECT_EQ(steps, 2); + EXPECT_EQ(thinking, 1); + EXPECT_EQ(tool_starts, 1); + EXPECT_EQ(answers, 1); + + // Verify last event is completion + EXPECT_EQ(events.back()["type"], "status"); + EXPECT_EQ(events.back()["status"], "complete"); +} diff --git a/cpp/tests/test_process.cpp b/cpp/tests/test_process.cpp new file mode 100644 index 000000000..d167cf3e2 --- /dev/null +++ b/cpp/tests/test_process.cpp @@ -0,0 +1,162 @@ +// Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved. +// SPDX-License-Identifier: MIT + +#include +#include + +#include +#include + +using namespace gaia; + +// --------------------------------------------------------------------------- +// Helper: platform-appropriate commands +// --------------------------------------------------------------------------- + +#ifdef _WIN32 +static const char* ECHO_HELLO = "cmd /c echo hello"; +static const char* FAIL_CMD = "cmd /c exit 1"; +static const char* STDERR_CMD = "cmd /c echo error_msg 1>&2"; +static const char* LARGE_OUTPUT = "cmd /c \"for /L %i in (1,1,5000) do @echo line_%i\""; +#else +static const char* ECHO_HELLO = "echo hello"; +static const char* FAIL_CMD = "false"; +static const char* STDERR_CMD = "echo error_msg >&2"; +static const char* LARGE_OUTPUT = "seq 1 5000 | while read i; do echo \"line_$i\"; done"; +#endif + +// --------------------------------------------------------------------------- +// ProcessRunner::run +// --------------------------------------------------------------------------- + +TEST(ProcessRunnerTest, EchoHello) { + auto result = ProcessRunner::run(ECHO_HELLO, 10000); + + EXPECT_EQ(result.exitCode, 0); + EXPECT_FALSE(result.timedOut); + // stdout should contain "hello" (may have trailing newline / \r\n) + EXPECT_NE(result.stdout_output.find("hello"), std::string::npos); +} + +TEST(ProcessRunnerTest, FailingCommand) { + auto result = ProcessRunner::run(FAIL_CMD, 10000); + + EXPECT_NE(result.exitCode, 0); + EXPECT_FALSE(result.timedOut); +} + +TEST(ProcessRunnerTest, StderrCapture) { + auto result = ProcessRunner::run(STDERR_CMD, 10000); + + // stderr should contain "error_msg" + EXPECT_NE(result.stderr_output.find("error_msg"), std::string::npos); +} + +TEST(ProcessRunnerTest, OutputCapping) { + // Run a command that produces many lines, cap at 256 bytes + const size_t capBytes = 256; + auto result = ProcessRunner::run(LARGE_OUTPUT, 30000, "", {}, capBytes); + + EXPECT_EQ(result.exitCode, 0); + EXPECT_FALSE(result.timedOut); + // stdout should be capped at or near the limit + EXPECT_LE(result.stdout_output.size(), capBytes); + // Should have captured at least something + EXPECT_FALSE(result.stdout_output.empty()); +} + +TEST(ProcessRunnerTest, EmptyCommand) { + auto result = ProcessRunner::run("", 10000); + + // Empty command should fail gracefully + EXPECT_EQ(result.exitCode, -1); + EXPECT_FALSE(result.stderr_output.empty()); +} + +// --------------------------------------------------------------------------- +// ProcessRunner::runOrThrow +// --------------------------------------------------------------------------- + +TEST(ProcessRunnerTest, RunOrThrowSuccess) { + std::string output = ProcessRunner::runOrThrow(ECHO_HELLO, 10000); + + EXPECT_NE(output.find("hello"), std::string::npos); +} + +TEST(ProcessRunnerTest, RunOrThrowFailure) { + EXPECT_THROW( + ProcessRunner::runOrThrow(FAIL_CMD, 10000), + std::runtime_error + ); +} + +// --------------------------------------------------------------------------- +// Timeout behavior +// --------------------------------------------------------------------------- + +TEST(ProcessRunnerTest, TimeoutKillsProcess) { + // Run a command that sleeps forever, with a short timeout +#ifdef _WIN32 + const char* sleepCmd = "cmd /c ping -n 60 127.0.0.1 >nul"; +#else + const char* sleepCmd = "sleep 60"; +#endif + + auto result = ProcessRunner::run(sleepCmd, 1000); // 1 second timeout + + EXPECT_TRUE(result.timedOut); +} + +// --------------------------------------------------------------------------- +// Working directory +// --------------------------------------------------------------------------- + +TEST(ProcessRunnerTest, WorkingDirectory) { + // Use temp directory as cwd +#ifdef _WIN32 + const char* pwdCmd = "cmd /c cd"; + const char* testDir = "C:\\"; +#else + const char* pwdCmd = "pwd"; + const char* testDir = "/tmp"; +#endif + + auto result = ProcessRunner::run(pwdCmd, 10000, testDir); + + EXPECT_EQ(result.exitCode, 0); + // Output should contain the directory we specified + EXPECT_NE(result.stdout_output.find(testDir), std::string::npos); +} + +// --------------------------------------------------------------------------- +// Environment variables +// --------------------------------------------------------------------------- + +TEST(ProcessRunnerTest, EnvironmentVariables) { + std::map env = { + {"GAIA_TEST_VAR", "test_value_12345"} + }; + +#ifdef _WIN32 + const char* printEnvCmd = "cmd /c echo %GAIA_TEST_VAR%"; +#else + const char* printEnvCmd = "echo $GAIA_TEST_VAR"; +#endif + + auto result = ProcessRunner::run(printEnvCmd, 10000, "", env); + + EXPECT_EQ(result.exitCode, 0); + EXPECT_NE(result.stdout_output.find("test_value_12345"), std::string::npos); +} + +// --------------------------------------------------------------------------- +// No-timeout mode (timeoutMs = 0) +// --------------------------------------------------------------------------- + +TEST(ProcessRunnerTest, NoTimeoutMode) { + auto result = ProcessRunner::run(ECHO_HELLO, 0); + + EXPECT_EQ(result.exitCode, 0); + EXPECT_FALSE(result.timedOut); + EXPECT_NE(result.stdout_output.find("hello"), std::string::npos); +} diff --git a/cpp/tests/test_repl.cpp b/cpp/tests/test_repl.cpp new file mode 100644 index 000000000..2ead047e9 --- /dev/null +++ b/cpp/tests/test_repl.cpp @@ -0,0 +1,342 @@ +// Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved. +// SPDX-License-Identifier: MIT + +#include +#include +#include +#include + +#include +#include +#include + +using namespace gaia; +namespace fs = std::filesystem; + +// --------------------------------------------------------------------------- +// Mock Agent — minimal subclass for REPL testing (no real LLM calls) +// --------------------------------------------------------------------------- + +class ReplMockAgent : public Agent { +public: + explicit ReplMockAgent(const AgentConfig& config = {}) : Agent(config) { + init(); + } + + // Track whether clearHistory was called + bool historyClearCalled = false; + + // Override clearHistory to track calls (clearHistory is non-virtual, so + // we track via a tool or direct observation). Instead, we verify through + // the /clear command's behavior. + +protected: + void registerTools() override { + // Register a simple echo tool for testing + toolRegistry().registerTool("echo", "Echo the input", + [](const json& args) -> json { + return json{{"echoed", args.value("message", "")}}; + }, + {}); + } + + std::string getSystemPrompt() const override { + return "You are a test agent for REPL testing."; + } +}; + +// --------------------------------------------------------------------------- +// Test fixture +// --------------------------------------------------------------------------- + +class ReplRunnerTest : public ::testing::Test { +protected: + AgentConfig config; + std::unique_ptr agent; + std::unique_ptr repl; + + void SetUp() override { + config.silentMode = true; + agent = std::make_unique(config); + repl = std::make_unique(*agent); + } +}; + +// --------------------------------------------------------------------------- +// 1. Built-in commands are registered on construction +// --------------------------------------------------------------------------- + +TEST_F(ReplRunnerTest, BuiltinCommandsRegistered) { + EXPECT_TRUE(repl->hasCommand("/clear")); + EXPECT_TRUE(repl->hasCommand("/help")); + EXPECT_TRUE(repl->hasCommand("/model")); + EXPECT_TRUE(repl->hasCommand("/history")); + EXPECT_TRUE(repl->hasCommand("/exit")); + EXPECT_EQ(repl->commandCount(), 5u); +} + +// --------------------------------------------------------------------------- +// 2. addCommand registers a custom command +// --------------------------------------------------------------------------- + +TEST_F(ReplRunnerTest, AddCustomCommand) { + EXPECT_FALSE(repl->hasCommand("/lint")); + + bool called = false; + repl->addCommand("/lint", "Run linter", + [&called](const std::string& /*args*/, Agent& /*agent*/) { + called = true; + }); + + EXPECT_TRUE(repl->hasCommand("/lint")); + EXPECT_EQ(repl->commandCount(), 6u); // 5 built-in + 1 custom +} + +// --------------------------------------------------------------------------- +// 3. tryDispatchCommand — slash command is dispatched +// --------------------------------------------------------------------------- + +TEST_F(ReplRunnerTest, DispatchSlashCommand) { + bool called = false; + std::string receivedArgs; + + repl->addCommand("/test", "Test command", + [&](const std::string& args, Agent& /*agent*/) { + called = true; + receivedArgs = args; + }); + + bool dispatched = repl->tryDispatchCommand("/test hello world"); + EXPECT_TRUE(dispatched); + EXPECT_TRUE(called); + EXPECT_EQ(receivedArgs, "hello world"); +} + +// --------------------------------------------------------------------------- +// 4. tryDispatchCommand — non-command returns false +// --------------------------------------------------------------------------- + +TEST_F(ReplRunnerTest, DispatchNonCommand) { + bool dispatched = repl->tryDispatchCommand("What is the weather?"); + EXPECT_FALSE(dispatched); +} + +// --------------------------------------------------------------------------- +// 5. tryDispatchCommand — empty input returns false +// --------------------------------------------------------------------------- + +TEST_F(ReplRunnerTest, DispatchEmptyInput) { + bool dispatched = repl->tryDispatchCommand(""); + EXPECT_FALSE(dispatched); +} + +// --------------------------------------------------------------------------- +// 6. tryDispatchCommand — unknown command handled gracefully (returns true) +// --------------------------------------------------------------------------- + +TEST_F(ReplRunnerTest, DispatchUnknownCommand) { + // Unknown commands are still recognized as command attempts (starts with /) + // but print a message. They return true to prevent sending to LLM. + bool dispatched = repl->tryDispatchCommand("/foobar"); + EXPECT_TRUE(dispatched); +} + +// --------------------------------------------------------------------------- +// 7. tryDispatchCommand — /clear calls agent.clearHistory() +// --------------------------------------------------------------------------- + +TEST_F(ReplRunnerTest, ClearCommandDispatch) { + // /clear should not throw and should complete without error + bool dispatched = repl->tryDispatchCommand("/clear"); + EXPECT_TRUE(dispatched); +} + +// --------------------------------------------------------------------------- +// 8. tryDispatchCommand — /model with no args shows current model +// --------------------------------------------------------------------------- + +TEST_F(ReplRunnerTest, ModelCommandNoArgs) { + bool dispatched = repl->tryDispatchCommand("/model"); + EXPECT_TRUE(dispatched); +} + +// --------------------------------------------------------------------------- +// 9. tryDispatchCommand — /model with arg changes model +// --------------------------------------------------------------------------- + +TEST_F(ReplRunnerTest, ModelCommandWithArgs) { + bool dispatched = repl->tryDispatchCommand("/model Qwen3-8B-GGUF"); + EXPECT_TRUE(dispatched); + + // Verify the model was changed + EXPECT_EQ(agent->config().modelId, "Qwen3-8B-GGUF"); +} + +// --------------------------------------------------------------------------- +// 10. tryDispatchCommand — /help lists commands (smoke test) +// --------------------------------------------------------------------------- + +TEST_F(ReplRunnerTest, HelpCommandDispatch) { + bool dispatched = repl->tryDispatchCommand("/help"); + EXPECT_TRUE(dispatched); +} + +// --------------------------------------------------------------------------- +// 11. tryDispatchCommand — /history without store prints message +// --------------------------------------------------------------------------- + +TEST_F(ReplRunnerTest, HistoryWithoutStore) { + // Should not throw even without a session store + bool dispatched = repl->tryDispatchCommand("/history"); + EXPECT_TRUE(dispatched); +} + +// --------------------------------------------------------------------------- +// 12. tryDispatchCommand — /history with store lists sessions +// --------------------------------------------------------------------------- + +TEST_F(ReplRunnerTest, HistoryWithStore) { + fs::path storeDir = fs::temp_directory_path() / "gaia_repl_test_history"; + fs::remove_all(storeDir); + + auto store = std::make_shared(storeDir.string()); + repl->setSessionStore(store); + + bool dispatched = repl->tryDispatchCommand("/history"); + EXPECT_TRUE(dispatched); + + fs::remove_all(storeDir); +} + +// --------------------------------------------------------------------------- +// 13. tryDispatchCommand — /exit sets exit flag +// --------------------------------------------------------------------------- + +TEST_F(ReplRunnerTest, ExitCommandDispatch) { + bool dispatched = repl->tryDispatchCommand("/exit"); + EXPECT_TRUE(dispatched); + // exitRequested_ is private, but we verify the command was dispatched +} + +// --------------------------------------------------------------------------- +// 14. addCommand — overwrite an existing command +// --------------------------------------------------------------------------- + +TEST_F(ReplRunnerTest, OverwriteExistingCommand) { + bool customCalled = false; + + repl->addCommand("/clear", "Custom clear", + [&customCalled](const std::string& /*args*/, Agent& /*agent*/) { + customCalled = true; + }); + + repl->tryDispatchCommand("/clear"); + EXPECT_TRUE(customCalled); + // Command count should not increase (overwrite, not add) + EXPECT_EQ(repl->commandCount(), 5u); +} + +// --------------------------------------------------------------------------- +// 15. Command args are trimmed +// --------------------------------------------------------------------------- + +TEST_F(ReplRunnerTest, CommandArgsTrimmed) { + std::string receivedArgs; + + repl->addCommand("/test", "Test", + [&receivedArgs](const std::string& args, Agent& /*agent*/) { + receivedArgs = args; + }); + + repl->tryDispatchCommand("/test padded args "); + EXPECT_EQ(receivedArgs, "padded args"); +} + +// --------------------------------------------------------------------------- +// 16. Command with no args passes empty string +// --------------------------------------------------------------------------- + +TEST_F(ReplRunnerTest, CommandNoArgsPasses) { + std::string receivedArgs = "NOT_CALLED"; + + repl->addCommand("/test", "Test", + [&receivedArgs](const std::string& args, Agent& /*agent*/) { + receivedArgs = args; + }); + + repl->tryDispatchCommand("/test"); + // When there's no space after the command name, the callback is still called + // with an empty string for args. + EXPECT_EQ(receivedArgs, ""); +} + +// --------------------------------------------------------------------------- +// 17. setSessionStore and setResumeId — basic setter smoke test +// --------------------------------------------------------------------------- + +TEST_F(ReplRunnerTest, SessionStoreSetters) { + fs::path storeDir = fs::temp_directory_path() / "gaia_repl_test_setters"; + fs::remove_all(storeDir); + + auto store = std::make_shared(storeDir.string()); + repl->setSessionStore(store); + repl->setResumeId("test-session-123"); + + // No crash — setters work + SUCCEED(); + + fs::remove_all(storeDir); +} + +// --------------------------------------------------------------------------- +// 18. setShowBanner — setter works +// --------------------------------------------------------------------------- + +TEST_F(ReplRunnerTest, SetShowBanner) { + repl->setShowBanner(false); + repl->setShowBanner(true); + SUCCEED(); // No crash +} + +// --------------------------------------------------------------------------- +// 19. Custom prompt in constructor +// --------------------------------------------------------------------------- + +TEST(ReplRunnerStandaloneTest, CustomPrompt) { + AgentConfig config; + config.silentMode = true; + ReplMockAgent agent(config); + ReplRunner repl(agent, ">> "); + + // Verify built-in commands still registered with custom prompt + EXPECT_TRUE(repl.hasCommand("/clear")); + EXPECT_EQ(repl.commandCount(), 5u); +} + +// --------------------------------------------------------------------------- +// 20. Multiple custom commands +// --------------------------------------------------------------------------- + +TEST_F(ReplRunnerTest, MultipleCustomCommands) { + int lintCalls = 0; + int reviewCalls = 0; + int deployCalls = 0; + + repl->addCommand("/lint", "Run linter", + [&lintCalls](const std::string&, Agent&) { ++lintCalls; }); + repl->addCommand("/review", "Code review", + [&reviewCalls](const std::string&, Agent&) { ++reviewCalls; }); + repl->addCommand("/deploy", "Deploy", + [&deployCalls](const std::string&, Agent&) { ++deployCalls; }); + + EXPECT_EQ(repl->commandCount(), 8u); // 5 built-in + 3 custom + + repl->tryDispatchCommand("/lint"); + repl->tryDispatchCommand("/review"); + repl->tryDispatchCommand("/deploy"); + repl->tryDispatchCommand("/lint"); + + EXPECT_EQ(lintCalls, 2); + EXPECT_EQ(reviewCalls, 1); + EXPECT_EQ(deployCalls, 1); +} diff --git a/cpp/tests/test_session.cpp b/cpp/tests/test_session.cpp new file mode 100644 index 000000000..b5911c3aa --- /dev/null +++ b/cpp/tests/test_session.cpp @@ -0,0 +1,324 @@ +// Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved. +// SPDX-License-Identifier: MIT + +#include +#include +#include + +#include +#include +#include + +#include + +using json = nlohmann::json; + +using namespace gaia; +namespace fs = std::filesystem; + +// --------------------------------------------------------------------------- +// Test fixture — uses a temp directory, cleaned up after each test +// --------------------------------------------------------------------------- + +class SessionStoreTest : public ::testing::Test { +protected: + fs::path storeDir; + std::unique_ptr store; + + void SetUp() override { + storeDir = fs::temp_directory_path() / "gaia_session_test"; + fs::remove_all(storeDir); + store = std::make_unique(storeDir.string()); + } + + void TearDown() override { + fs::remove_all(storeDir); + } + + /// Helper: create a simple conversation history with mixed roles. + static std::vector makeSampleHistory() { + std::vector history; + + Message sys; + sys.role = MessageRole::SYSTEM; + sys.content = "You are a helpful assistant."; + history.push_back(sys); + + Message user; + user.role = MessageRole::USER; + user.content = "What is the capital of France?"; + history.push_back(user); + + Message asst; + asst.role = MessageRole::ASSISTANT; + asst.content = "The capital of France is Paris."; + history.push_back(asst); + + return history; + } + + /// Helper: create a history with TOOL messages. + static std::vector makeToolHistory() { + std::vector history; + + Message user; + user.role = MessageRole::USER; + user.content = "Search for information about AMD."; + history.push_back(user); + + Message asst; + asst.role = MessageRole::ASSISTANT; + asst.content = "I'll search for that."; + history.push_back(asst); + + Message tool; + tool.role = MessageRole::TOOL; + tool.content = "{\"result\": \"AMD makes processors.\"}"; + tool.name = "web_search"; + tool.toolCallId = "call_12345"; + history.push_back(tool); + + Message asst2; + asst2.role = MessageRole::ASSISTANT; + asst2.content = "AMD is a semiconductor company that makes processors."; + history.push_back(asst2); + + return history; + } +}; + +// --------------------------------------------------------------------------- +// 1. Save and load round-trip +// --------------------------------------------------------------------------- + +TEST_F(SessionStoreTest, SaveAndLoadRoundTrip) { + auto history = makeSampleHistory(); + store->save("test-session", history); + + auto loaded = store->load("test-session"); + ASSERT_EQ(loaded.size(), history.size()); + + for (size_t i = 0; i < history.size(); ++i) { + EXPECT_EQ(roleToString(loaded[i].role), roleToString(history[i].role)); + EXPECT_EQ(loaded[i].content, history[i].content); + } +} + +// --------------------------------------------------------------------------- +// 2. Save with multiple message roles (USER, ASSISTANT, TOOL) +// --------------------------------------------------------------------------- + +TEST_F(SessionStoreTest, MultipleRolesRoundTrip) { + auto history = makeToolHistory(); + store->save("tool-session", history); + + auto loaded = store->load("tool-session"); + ASSERT_EQ(loaded.size(), 4u); + + // User message + EXPECT_EQ(loaded[0].role, MessageRole::USER); + EXPECT_EQ(loaded[0].content, "Search for information about AMD."); + + // Assistant message + EXPECT_EQ(loaded[1].role, MessageRole::ASSISTANT); + EXPECT_EQ(loaded[1].content, "I'll search for that."); + + // Tool message — verify name and toolCallId + EXPECT_EQ(loaded[2].role, MessageRole::TOOL); + EXPECT_EQ(loaded[2].content, "{\"result\": \"AMD makes processors.\"}"); + ASSERT_TRUE(loaded[2].name.has_value()); + EXPECT_EQ(loaded[2].name.value(), "web_search"); + ASSERT_TRUE(loaded[2].toolCallId.has_value()); + EXPECT_EQ(loaded[2].toolCallId.value(), "call_12345"); + + // Final assistant message + EXPECT_EQ(loaded[3].role, MessageRole::ASSISTANT); +} + +// --------------------------------------------------------------------------- +// 3. Load non-existent session throws +// --------------------------------------------------------------------------- + +TEST_F(SessionStoreTest, LoadNonExistentThrows) { + EXPECT_THROW(store->load("nonexistent-session"), std::runtime_error); +} + +// --------------------------------------------------------------------------- +// 4. exists() returns true after save, false before +// --------------------------------------------------------------------------- + +TEST_F(SessionStoreTest, ExistsBeforeAndAfterSave) { + EXPECT_FALSE(store->exists("check-session")); + + auto history = makeSampleHistory(); + store->save("check-session", history); + + EXPECT_TRUE(store->exists("check-session")); +} + +// --------------------------------------------------------------------------- +// 5. remove() — verify file deleted, subsequent load throws +// --------------------------------------------------------------------------- + +TEST_F(SessionStoreTest, RemoveDeletesSession) { + auto history = makeSampleHistory(); + store->save("remove-me", history); + ASSERT_TRUE(store->exists("remove-me")); + + bool removed = store->remove("remove-me"); + EXPECT_TRUE(removed); + EXPECT_FALSE(store->exists("remove-me")); + + // Subsequent load should throw + EXPECT_THROW(store->load("remove-me"), std::runtime_error); +} + +TEST_F(SessionStoreTest, RemoveNonExistentReturnsFalse) { + bool removed = store->remove("never-existed"); + EXPECT_FALSE(removed); +} + +// --------------------------------------------------------------------------- +// 6. list() — save multiple sessions, verify all returned, sorted by timestamp +// --------------------------------------------------------------------------- + +TEST_F(SessionStoreTest, ListMultipleSessions) { + // Write session files directly with known timestamps to ensure deterministic + // ordering (avoids relying on sub-second timing in CI). + auto writeSession = [&](const std::string& id, const std::string& timestamp) { + auto history = makeSampleHistory(); + // Save normally first to create the file + store->save(id, history); + // Then overwrite with a controlled timestamp + fs::path filePath = fs::path(store->directory()) / (id + ".json"); + std::ifstream fin(filePath); + json j = json::parse(fin); + fin.close(); + j["timestamp"] = timestamp; + std::ofstream fout(filePath); + fout << j.dump(2) << "\n"; + }; + + writeSession("session-a", "2026-01-01T10:00:00Z"); + writeSession("session-b", "2026-01-01T11:00:00Z"); + writeSession("session-c", "2026-01-01T12:00:00Z"); + + auto sessions = store->list(); + ASSERT_EQ(sessions.size(), 3u); + + // Newest first — session-c should be first + EXPECT_EQ(sessions[0].id, "session-c"); + EXPECT_EQ(sessions[1].id, "session-b"); + EXPECT_EQ(sessions[2].id, "session-a"); + + // Verify metadata + for (const auto& info : sessions) { + EXPECT_FALSE(info.timestamp.empty()); + EXPECT_EQ(info.messageCount, 3u); + EXPECT_EQ(info.preview, "What is the capital of France?"); + } +} + +// --------------------------------------------------------------------------- +// 7. list() on empty directory — verify empty vector +// --------------------------------------------------------------------------- + +TEST_F(SessionStoreTest, ListEmptyDirectory) { + auto sessions = store->list(); + EXPECT_TRUE(sessions.empty()); +} + +TEST_F(SessionStoreTest, ListNonExistentDirectory) { + SessionStore nonExistent((storeDir / "does_not_exist").string()); + auto sessions = nonExistent.list(); + EXPECT_TRUE(sessions.empty()); +} + +// --------------------------------------------------------------------------- +// 8. generateId() — verify format and uniqueness +// --------------------------------------------------------------------------- + +TEST_F(SessionStoreTest, GenerateIdFormat) { + std::string id = SessionStore::generateId(); + + // Must start with "session-" + EXPECT_EQ(id.substr(0, 8), "session-"); + + // Must contain only valid characters (alphanumeric, hyphens) + for (char c : id) { + EXPECT_TRUE(std::isalnum(static_cast(c)) || c == '-' || c == '_'); + } +} + +TEST_F(SessionStoreTest, GenerateIdUniqueness) { + std::string id1 = SessionStore::generateId(); + std::string id2 = SessionStore::generateId(); + + // Two rapid calls should produce different IDs + EXPECT_NE(id1, id2); +} + +// --------------------------------------------------------------------------- +// 9. Invalid session ID (contains path separator) — verify rejected +// --------------------------------------------------------------------------- + +TEST_F(SessionStoreTest, InvalidIdPathSeparator) { + auto history = makeSampleHistory(); + + EXPECT_THROW(store->save("../escape", history), std::invalid_argument); + EXPECT_THROW(store->save("sub/dir", history), std::invalid_argument); + EXPECT_THROW(store->save("back\\slash", history), std::invalid_argument); + EXPECT_THROW(store->load("../escape"), std::invalid_argument); + EXPECT_THROW(store->exists("sub/dir"), std::invalid_argument); + EXPECT_THROW(store->remove("has.dot"), std::invalid_argument); +} + +TEST_F(SessionStoreTest, InvalidIdDot) { + auto history = makeSampleHistory(); + + EXPECT_THROW(store->save("has.dot", history), std::invalid_argument); + EXPECT_THROW(store->save(".hidden", history), std::invalid_argument); + EXPECT_THROW(store->save("..", history), std::invalid_argument); +} + +TEST_F(SessionStoreTest, InvalidIdEmpty) { + auto history = makeSampleHistory(); + EXPECT_THROW(store->save("", history), std::invalid_argument); + EXPECT_THROW(store->load(""), std::invalid_argument); +} + +// --------------------------------------------------------------------------- +// Additional: save overwrites existing session +// --------------------------------------------------------------------------- + +TEST_F(SessionStoreTest, SaveOverwritesExisting) { + auto history1 = makeSampleHistory(); + store->save("overwrite-test", history1); + + auto history2 = makeToolHistory(); + store->save("overwrite-test", history2); + + auto loaded = store->load("overwrite-test"); + ASSERT_EQ(loaded.size(), history2.size()); + EXPECT_EQ(loaded[0].content, "Search for information about AMD."); +} + +// --------------------------------------------------------------------------- +// Additional: directory is returned correctly +// --------------------------------------------------------------------------- + +TEST_F(SessionStoreTest, DirectoryAccessor) { + EXPECT_EQ(store->directory(), storeDir.string()); +} + +// --------------------------------------------------------------------------- +// Additional: empty history saves and loads correctly +// --------------------------------------------------------------------------- + +TEST_F(SessionStoreTest, EmptyHistory) { + std::vector empty; + store->save("empty-session", empty); + + auto loaded = store->load("empty-session"); + EXPECT_TRUE(loaded.empty()); +} diff --git a/cpp/tests/test_tool_integration.cpp b/cpp/tests/test_tool_integration.cpp index 2bcd378f4..fbe642768 100644 --- a/cpp/tests/test_tool_integration.cpp +++ b/cpp/tests/test_tool_integration.cpp @@ -6,6 +6,7 @@ // (see ANONYMIZATION section below). No real shell commands are executed. #include +#include #include #include #include @@ -505,7 +506,8 @@ TEST_F(WiFiToolsTest, PingHostMissingArgReturnsError) { // Empty host json result = agent_->tools().executeTool("ping_host", json::object()); EXPECT_TRUE(result.contains("error")); - EXPECT_EQ(result["error"], "host parameter is required"); + EXPECT_THAT(result["error"].get(), + ::testing::HasSubstr("missing required parameter")); EXPECT_FALSE(result.contains("tool")); } @@ -520,13 +522,15 @@ TEST_F(WiFiToolsTest, SetDnsServersMissingArgsReturnsError) { // No arguments at all json result = agent_->tools().executeTool("set_dns_servers", json::object()); EXPECT_TRUE(result.contains("error")); - EXPECT_EQ(result["error"], "adapter_name and primary_dns are required"); + EXPECT_THAT(result["error"].get(), + ::testing::HasSubstr("missing required parameter")); // Only adapter, no primary_dns result = agent_->tools().executeTool( "set_dns_servers", {{"adapter_name", "Wi-Fi"}}); EXPECT_TRUE(result.contains("error")); - EXPECT_EQ(result["error"], "adapter_name and primary_dns are required"); + EXPECT_THAT(result["error"].get(), + ::testing::HasSubstr("missing required parameter")); } TEST_F(WiFiToolsTest, SetDnsServersReturnsExpectedFormat) { @@ -557,13 +561,15 @@ TEST_F(WiFiToolsTest, RenewDhcpLeaseReturnsStatus) { TEST_F(WiFiToolsTest, RestartWiFiAdapterMissingArgReturnsError) { json result = agent_->tools().executeTool("restart_wifi_adapter", json::object()); EXPECT_TRUE(result.contains("error")); - EXPECT_EQ(result["error"], "adapter_name is required"); + EXPECT_THAT(result["error"].get(), + ::testing::HasSubstr("missing required parameter")); } TEST_F(WiFiToolsTest, EnableWiFiAdapterMissingArgReturnsError) { json result = agent_->tools().executeTool("enable_wifi_adapter", json::object()); EXPECT_TRUE(result.contains("error")); - EXPECT_EQ(result["error"], "adapter_name is required"); + EXPECT_THAT(result["error"].get(), + ::testing::HasSubstr("missing required parameter")); } TEST_F(WiFiToolsTest, ToggleWiFiRadioDefaultsToOn) { diff --git a/cpp/tests/test_tui_console.cpp b/cpp/tests/test_tui_console.cpp new file mode 100644 index 000000000..bcaf01a5e --- /dev/null +++ b/cpp/tests/test_tui_console.cpp @@ -0,0 +1,173 @@ +// Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved. +// SPDX-License-Identifier: MIT +// +// Unit tests for the TUI markdown renderer (renderMarkdown). +// Tests the markdown parser only, not FTXUI screen rendering. +// Wrapped in GAIA_HAS_TUI so it compiles away when FTXUI is unavailable. + +#ifdef GAIA_HAS_TUI + +#include +#include + +// Declare the function (defined in tui_markdown.cpp). +namespace gaia { +ftxui::Element renderMarkdown(const std::string& markdown); +} + +// ---- Basic rendering ---- + +TEST(TuiMarkdown, PlainText) { + auto elem = gaia::renderMarkdown("Hello world"); + ASSERT_TRUE(elem); +} + +TEST(TuiMarkdown, EmptyString) { + auto elem = gaia::renderMarkdown(""); + ASSERT_TRUE(elem); // Should not crash +} + +TEST(TuiMarkdown, WhitespaceOnly) { + auto elem = gaia::renderMarkdown(" \n\n "); + ASSERT_TRUE(elem); +} + +// ---- Headings ---- + +TEST(TuiMarkdown, HeadingH1) { + auto elem = gaia::renderMarkdown("# Title\n\nBody text"); + ASSERT_TRUE(elem); +} + +TEST(TuiMarkdown, HeadingH2) { + auto elem = gaia::renderMarkdown("## Subtitle"); + ASSERT_TRUE(elem); +} + +TEST(TuiMarkdown, HeadingH3) { + auto elem = gaia::renderMarkdown("### Minor heading"); + ASSERT_TRUE(elem); +} + +TEST(TuiMarkdown, MultipleHeadings) { + auto elem = gaia::renderMarkdown("# One\n## Two\n### Three"); + ASSERT_TRUE(elem); +} + +// ---- Code blocks ---- + +TEST(TuiMarkdown, CodeBlock) { + auto elem = gaia::renderMarkdown("```bash\necho hello\n```"); + ASSERT_TRUE(elem); +} + +TEST(TuiMarkdown, CodeBlockNoLanguage) { + auto elem = gaia::renderMarkdown("```\nsome code\n```"); + ASSERT_TRUE(elem); +} + +TEST(TuiMarkdown, CodeBlockMultipleLines) { + std::string md = "```python\ndef hello():\n print('hello')\n```"; + auto elem = gaia::renderMarkdown(md); + ASSERT_TRUE(elem); +} + +TEST(TuiMarkdown, UnclosedCodeBlock) { + // Graceful degradation: unclosed code block should not crash + auto elem = gaia::renderMarkdown("```\nsome code without closing"); + ASSERT_TRUE(elem); +} + +// ---- Bullet lists ---- + +TEST(TuiMarkdown, BulletList) { + auto elem = gaia::renderMarkdown("- item 1\n- item 2\n- item 3"); + ASSERT_TRUE(elem); +} + +TEST(TuiMarkdown, SingleBullet) { + auto elem = gaia::renderMarkdown("- just one item"); + ASSERT_TRUE(elem); +} + +// ---- Blockquotes ---- + +TEST(TuiMarkdown, Blockquote) { + auto elem = gaia::renderMarkdown("> This is a quote"); + ASSERT_TRUE(elem); +} + +TEST(TuiMarkdown, MultilineBlockquote) { + auto elem = gaia::renderMarkdown("> Line one\n> Line two\n> Line three"); + ASSERT_TRUE(elem); +} + +TEST(TuiMarkdown, BlockquoteFollowedByText) { + auto elem = gaia::renderMarkdown("> A quote\n\nRegular text after"); + ASSERT_TRUE(elem); +} + +// ---- Inline formatting ---- + +TEST(TuiMarkdown, BoldText) { + auto elem = gaia::renderMarkdown("Some **bold** text"); + ASSERT_TRUE(elem); +} + +TEST(TuiMarkdown, InlineCode) { + auto elem = gaia::renderMarkdown("Use the `printf` function"); + ASSERT_TRUE(elem); +} + +TEST(TuiMarkdown, UnclosedBold) { + // Graceful degradation: unclosed ** treated as literal + auto elem = gaia::renderMarkdown("This is **unclosed bold"); + ASSERT_TRUE(elem); +} + +TEST(TuiMarkdown, UnclosedInlineCode) { + // Graceful degradation: unclosed ` treated as literal + auto elem = gaia::renderMarkdown("This is `unclosed code"); + ASSERT_TRUE(elem); +} + +// ---- Mixed content ---- + +TEST(TuiMarkdown, MixedContent) { + std::string md = + "# Header\n" + "\n" + "Some **bold** text and `code`.\n" + "\n" + "```\n" + "code block\n" + "```\n" + "\n" + "- list item\n" + "- another item\n" + "\n" + "> A blockquote"; + auto elem = gaia::renderMarkdown(md); + ASSERT_TRUE(elem); +} + +TEST(TuiMarkdown, LongDocument) { + // Stress test: many lines of mixed content + std::string md; + for (int i = 0; i < 50; ++i) { + md += "## Section " + std::to_string(i) + "\n"; + md += "Some text with **bold** and `code`.\n"; + md += "- bullet " + std::to_string(i) + "\n"; + md += "\n"; + } + auto elem = gaia::renderMarkdown(md); + ASSERT_TRUE(elem); +} + +TEST(TuiMarkdown, NoMarkdown) { + // Plain text with no markdown syntax should still render + auto elem = gaia::renderMarkdown("Just a plain sentence with no special formatting."); + ASSERT_TRUE(elem); +} + +#endif // GAIA_HAS_TUI diff --git a/docs/cpp/bash-agent.mdx b/docs/cpp/bash-agent.mdx new file mode 100644 index 000000000..d31fd963e --- /dev/null +++ b/docs/cpp/bash-agent.mdx @@ -0,0 +1,525 @@ +--- +title: "Bash Coding Agent" +description: "AI-powered bash scripting assistant — write, review, test, and debug shell scripts locally on AMD hardware" +icon: "terminal" +--- + + + **First time here?** Complete the [C++ Framework Setup](/cpp/setup) guide first to build the `gaia_core` library. + + + + **Native binary.** `gaia-bash` is a compiled C++ binary built on the GAIA C++ framework (`gaia_core`). No Python runtime required for the agent itself — just Lemonade Server for LLM inference. + + +## Overview + +`gaia-bash` is a domain-specialized coding agent for bash/shell scripting. It provides: + +- **Interactive TUI** — Claude Code-style terminal interface with markdown rendering, streaming, and split-pane layout +- **Bash expertise** — system prompt tuned for POSIX compliance, `shellcheck` integration, BATS test generation +- **16 built-in tools** — file I/O, git, process execution, linting, testing, and more +- **REST API server** — OpenAI-compatible endpoint for external tool integration +- **MCP server** — stdio transport for Claude Code, OpenCode, and other MCP-compatible agents +- **Session persistence** — save and resume conversations across runs +- **100% local** — runs entirely on AMD hardware via Lemonade Server, no cloud dependency + +--- + +## Quick Start + + + + ```bash + cd cpp + cmake -B build -DGAIA_BUILD_TUI=ON + cmake --build build --target gaia-bash + ``` + + + + ```bash + lemonade-server serve + ``` + + Ensure a coding model is loaded (Qwen3-Coder-Next recommended): + ```bash + gaia download Qwen3-Coder-Next-GGUF + ``` + + + + ```bash + # Interactive TUI mode + ./build/gaia-bash + + # Single query + ./build/gaia-bash "write a script that finds duplicate files by checksum" + + # Pipe-friendly (no TUI) + ./build/gaia-bash --print "explain what set -euo pipefail does" + ``` + + + +--- + +## Modes of Operation + +### Interactive TUI + +The default mode. Launches a fullscreen terminal UI with: + +- **Chat history** — scrollable, markdown-rendered responses with syntax highlighting +- **Status bar** — current model, token count, step counter +- **Input area** — multi-line input with history (up/down arrows) +- **Tool approval** — modal dialog for destructive operations + +```bash +./build/gaia-bash +``` + +### Single Query + +Run one query, print the result, and exit: + +```bash +./build/gaia-bash "write a cron job that rotates logs daily" +``` + +### Pipe Mode + +No TUI — streams plain text to stdout. Ideal for scripting and CI: + +```bash +echo "review this script for security issues" | ./build/gaia-bash --print +./build/gaia-bash --print "generate a BATS test for backup.sh" > test_backup.bats +``` + +### API Server + +Expose the agent as an HTTP REST API: + +```bash +./build/gaia-bash --serve --port 8200 +``` + +See [API Server](#api-server) below. + +### MCP Server + +Run as an MCP tool server for external agents: + +```bash +./build/gaia-bash --mcp +``` + +See [MCP Server](#mcp-server) below. + +### Session Resume + +Resume a previous conversation: + +```bash +# List saved sessions +./build/gaia-bash --list-sessions + +# Resume a specific session +./build/gaia-bash --resume session-20260506-143045 +``` + +--- + +## Slash Commands + +Built-in commands available in interactive mode: + +| Command | Description | +|---|---| +| `/help` | Show all available commands | +| `/clear` | Clear conversation history | +| `/model [name]` | Show or switch the LLM model | +| `/history` | Browse saved sessions | +| `/exit` | Exit the REPL | + +Bash-specific commands: + +| Command | Description | +|---|---| +| `/run ` | Execute a bash command directly (bypass LLM) | +| `/env` | Show shell environment info | + + +The following slash commands are planned but not yet available. + + +| Command | Description | +|---|---| +| `/lint [file]` | Run ShellCheck on a script | +| `/test [file]` | Generate and run BATS tests | +| `/review [file]` | Multi-pass code review | +| `/edit ` | Open file in `$EDITOR` | + +--- + +## Built-in Tools + +### Framework Tools (shared with all C++ agents) + +| Tool | Policy | Description | +|---|---|---| +| `file_read` | ALLOW | Read file contents with optional line range | +| `file_write` | CONFIRM | Write/create files (creates parent dirs) | +| `file_edit` | CONFIRM | Surgical string replacement in files | +| `file_search` | ALLOW | Search files by glob pattern and content | +| `git_status` | ALLOW | Git status, diff, log, show | + +### Bash-Specific Tools + +| Tool | Policy | Description | +|---|---|---| +| `bash_execute` | CONFIRM | Run bash commands with timeout and output capture | +| `env_inspect` | ALLOW | Shell version, PATH, installed tools | + + +The following tools are planned but not yet available in this release. + + +| Tool | Policy | Description | +|---|---|---| +| `script_lint` | ALLOW | ShellCheck integration with structured diagnostics | +| `script_test` | CONFIRM | BATS test runner in sandboxed temp directory | +| `man_lookup` | ALLOW | Query man pages or `--help` output | +| `git_commit` | CONFIRM | Stage + commit with AI-generated message | +| `bash_background` | CONFIRM | Background process execution with PID tracking | +| `process_list` | ALLOW | List running processes | +| `clipboard_copy` | ALLOW | Copy text to system clipboard | + +--- + +## API Server + +The API server exposes the bash agent via an OpenAI-compatible HTTP REST API, enabling integration with any tool that speaks the OpenAI protocol. + +### Start the server + +```bash +./build/gaia-bash --serve --port 8200 +``` + +### Endpoints + +#### `POST /v1/chat/completions` + +Main endpoint — send messages, get agent responses with tool calls. + +```bash +curl http://localhost:8200/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "messages": [ + {"role": "user", "content": "write a script that monitors disk usage"} + ], + "stream": false + }' +``` + +**Streaming mode:** + +```bash +curl http://localhost:8200/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{ + "messages": [ + {"role": "user", "content": "explain awk column processing"} + ], + "stream": true + }' +``` + +#### `GET /v1/tools` + +List all registered tools with their parameter schemas: + +```bash +curl http://localhost:8200/v1/tools | jq '.tools[].name' +``` + +#### `POST /v1/tools/{name}` + +Execute a specific tool directly (bypass the LLM): + +```bash +curl http://localhost:8200/v1/tools/bash_execute \ + -H "Content-Type: application/json" \ + -d '{"command": "df -h", "timeoutMs": 5000}' + +curl http://localhost:8200/v1/tools/file_read \ + -H "Content-Type: application/json" \ + -d '{"path": "/etc/hostname"}' +``` + +#### `GET /health` + +Health check: + +```bash +curl http://localhost:8200/health +# {"status":"ok","model":"Qwen3-Coder-Next","tools":16} +``` + +#### `GET /sessions`, `POST /sessions`, `DELETE /sessions/{id}` + +Session management: + +```bash +# List sessions +curl http://localhost:8200/sessions + +# Delete a session +curl -X DELETE http://localhost:8200/sessions/session-20260506-143045 +``` + +--- + +## MCP Server + +The MCP server exposes the bash agent as a tool server over the [Model Context Protocol](https://modelcontextprotocol.io/), enabling any MCP-compatible agent to use bash tools. + +### Configure in Claude Code + +Add to `~/.claude/settings.json`: + +```json +{ + "mcpServers": { + "gaia-bash": { + "command": "/path/to/gaia-bash", + "args": ["--mcp"] + } + } +} +``` + +Now Claude Code can use bash agent tools: + +``` +> Use gaia-bash to run shellcheck on my deploy.sh script +> Use gaia-bash to write a BATS test for my backup.sh +``` + +### Configure in OpenCode + +Add to OpenCode's MCP config: + +```json +{ + "mcpServers": { + "gaia-bash": { + "command": "gaia-bash", + "args": ["--mcp"], + "env": { + "LEMONADE_BASE_URL": "http://localhost:13305/api/v1" + } + } + } +} +``` + +### MCP Capabilities + +| Capability | Description | +|---|---| +| `tools/list` | Returns all bash agent tools as MCP tool definitions | +| `tools/call` | Executes a tool call and returns structured results | +| `prompts/list` | Exposes prompt templates: `review-script`, `generate-bats-test`, `explain-command`, `posix-check` | +| `prompts/get` | Returns the prompt template with parameter substitution | + +### Test the MCP server + +```bash +# Start MCP server manually and send a tools/list request +echo '{"jsonrpc":"2.0","id":1,"method":"tools/list"}' | ./build/gaia-bash --mcp +``` + +--- + +## Demo Walkthrough + +A quick demo showing the key features of `gaia-bash`: + +### 1. Write a Script + +``` +> Write a script that finds all .log files older than 30 days and compresses them + + #!/usr/bin/env bash + set -euo pipefail + + LOG_DIR="${1:-.}" + DAYS="${2:-30}" + + find "$LOG_DIR" -name '*.log' -mtime +"$DAYS" -print0 | + while IFS= read -r -d '' file; do + gzip --best "$file" + echo "Compressed: ${file}.gz" + done + + Using tool: file_write → compress_old_logs.sh + Using tool: script_lint → 0 warnings +``` + +### 2. Review the Script + +``` +> /review compress_old_logs.sh + + ## Code Review: compress_old_logs.sh + + Correctness: Handles filenames with spaces/newlines via -print0 + Security: No injection risks, all variables quoted + Portability: gzip --best is GNU-specific (use gzip -9 for broader compat) + Performance: Streaming via pipe, no temp files + Style: ShellCheck clean, well-quoted +``` + +### 3. Generate Tests + +``` +> /test compress_old_logs.sh + + Running: bats test_compress_old_logs.bats + + 4/4 tests passed + compresses .log files older than 30 days + skips .log files newer than threshold + handles filenames with spaces + respects custom directory argument +``` + +### 4. Use via API + +```bash +# From another terminal +curl -s http://localhost:8200/v1/tools/script_lint \ + -d '{"path":"compress_old_logs.sh"}' | jq . +``` + +### 5. Use via MCP from Claude Code + +``` +Claude Code> Use gaia-bash to check my deploy.sh for POSIX compliance + + gaia-bash: Running POSIX compliance check on deploy.sh... + Found 3 bashisms in #!/bin/sh script: + Line 12: [[ ]] → use [ ] for POSIX + Line 25: ${var,,} → use tr '[:upper:]' '[:lower:]' + Line 41: arrays → use positional parameters or files +``` + +--- + +## Configuration + +### Environment Variables + +| Variable | Default | Description | +|---|---|---| +| `LEMONADE_BASE_URL` | `http://localhost:8000/api/v1` | Lemonade Server URL | +| `LEMONADE_MODEL` | `Qwen3-Coder-Next-GGUF` | Model to load | +| `GAIA_CPP_CTX_SIZE` | `16384` | Context window size (tokens) | +| `GAIA_STREAMING` | `0` | Enable streaming (`1` = on) | +| `GAIA_DEBUG` | unset | Enable debug logging | + +### CLI Flags + +| Flag | Description | +|---|---| +| `--serve [--port N]` | Start REST API server (default port 8200) | +| `--mcp` | Start as MCP stdio server | +| `--print` | Pipe-friendly output (no TUI) | +| `--no-tui` | Force CleanConsole even on interactive terminal | +| `--resume ` | Resume a saved session | +| `--list-sessions` | List saved sessions and exit | +| `--model ` | Override the LLM model | +| `--debug` | Enable debug logging | + +--- + +## Building from Source + +### Prerequisites + +- CMake 3.14+ +- C++17 compiler (MSVC 2019+, GCC 9+, Clang 10+) +- Lemonade Server running with a coding model loaded + +### Build + +```bash +cd cpp +cmake -B build \ + -DGAIA_BUILD_TUI=ON \ + -DGAIA_BUILD_TESTS=ON \ + -DGAIA_BUILD_EXAMPLES=ON + +cmake --build build --target gaia-bash +cmake --build build --target tests_mock +``` + +### Run Tests + +```bash +cd build +ctest --output-on-failure +``` + +### Build Options + +| Option | Default | Description | +|---|---|---| +| `GAIA_BUILD_TUI` | `ON` | Build FTXUI-based TUI console | +| `GAIA_BUILD_TESTS` | `ON` | Build unit tests | +| `GAIA_BUILD_EXAMPLES` | `ON` | Build example agents | +| `GAIA_BUILD_INTEGRATION_TESTS` | `OFF` | Build LLM integration tests | + +--- + +## Architecture + +``` +┌─────────────────────────────────────────────────────────┐ +│ gaia-bash binary │ +│ │ +│ ┌──────────────┐ ┌──────────────┐ ┌───────────────┐ │ +│ │ TUI Console │ │ API Server │ │ MCP Server │ │ +│ │ (FTXUI) │ │ (cpp-httplib)│ │ (stdio) │ │ +│ │ --default-- │ │ --serve │ │ --mcp │ │ +│ └──────┬───────┘ └──────┬───────┘ └──────┬────────┘ │ +│ │ │ │ │ +│ └──────────┬───────┴──────────────────┘ │ +│ │ │ +│ ┌──────────┴──────────┐ │ +│ │ BashAgent : Agent │ │ +│ │ 16 bash tools │ │ +│ │ Session persistence │ │ +│ └──────────┬──────────┘ │ +│ │ │ +├────────────────────┼─────────────────────────────────────┤ +│ gaia_core library │ │ +│ ┌──────────┴──────────┐ │ +│ │ Agent loop │ │ +│ │ ToolRegistry │ │ +│ │ LemonadeClient │ │ +│ │ ProcessRunner │ │ +│ │ FileIOTools/GitTools│ │ +│ │ SessionStore │ │ +│ │ ReplRunner │ │ +│ └──────────┬──────────┘ │ +│ │ HTTP (SSE) │ +│ ┌──────────┴──────────┐ │ +│ │ Lemonade Server │ │ +│ │ (AMD NPU/GPU/CPU) │ │ +│ └─────────────────────┘ │ +└─────────────────────────────────────────────────────────┘ +``` diff --git a/docs/docs.json b/docs/docs.json index 28b2ac92e..25315bbd2 100644 --- a/docs/docs.json +++ b/docs/docs.json @@ -42,6 +42,7 @@ { "group": "User Guides", "pages": [ + "cpp/bash-agent", "cpp/health-agent", "cpp/wifi-agent", "cpp/process-agent", diff --git a/docs/plans/bash-agent.mdx b/docs/plans/bash-agent.mdx new file mode 100644 index 000000000..80297c080 --- /dev/null +++ b/docs/plans/bash-agent.mdx @@ -0,0 +1,697 @@ +--- +title: Bash Coding Agent Plan +description: Native C++ CLI agent specialized for bash/shell scripting with a Claude Code-style TUI, running locally on AMD hardware via Lemonade Server +--- + +# Bash Coding Agent (`gaia-bash`) - Implementation Plan + + +**Status:** Implemented +**Priority:** Medium +**Target:** v0.22.0+ + + +--- + +## Executive Summary + +A native binary CLI tool (`gaia-bash`) built on the GAIA C++ framework (`gaia_core`), providing a Claude Code-style TUI experience specialized for bash/shell scripting. It runs entirely locally on AMD hardware via Lemonade Server, targeting sub-second startup, streaming token output, and a reactive terminal interface with markdown rendering, syntax highlighting, and inline script execution. + +**The pitch:** Claude Code is excellent but cloud-only and language-agnostic. `gaia-bash` is local-first, offline-capable, AMD-accelerated, and domain-specialized — it knows bash idioms, ShellCheck rules, POSIX portability traps, and BATS testing patterns out of the box. + +--- + +## The Problem + +GAIA's existing coding capabilities are Python-centric and web-centric: + +| Current Limitation | Impact | +|---|---| +| Python CodeAgent is the only code agent | No first-class support for shell/DevOps/sysadmin workflows | +| CodeAgent runs via Python runtime | ~3s startup, heavy memory footprint, Python dependency chain | +| Shell tools (`ShellToolsMixin`) are read-only whitelist | Cannot run arbitrary commands, no piping/redirection, no script execution | +| No ShellCheck / BATS integration | No bash-specific linting or testing | +| No native binary coding agent exists | C++ framework has example agents but no production CLI tool | +| No Claude Code-style TUI in C++ | `CleanConsole` is basic ANSI print-to-stdout, not reactive/interactive | + +Meanwhile, the GAIA C++ framework (`cpp/`) already provides a production-grade agent runtime with LLM client, tool registry, MCP integration, security, and streaming — but no one has built a real CLI product on it yet. + +--- + +## The Solution + +Build `gaia-bash` as the **first production native binary agent** on `gaia_core`, proving out the C++ framework while delivering a tool that DevOps engineers and sysadmins actually want. + +### Why Bash Specifically + +1. **Constrained domain** — shell scripting has clear idioms, a finite set of built-ins, and well-defined linting (ShellCheck). This makes it easier for smaller local models to excel compared to general-purpose coding. +2. **Tool-heavy workflow** — bash work is inherently about running commands and inspecting output. The agent loop (think → tool → observe → think) maps directly to how developers actually work in a terminal. +3. **Local-first is natural** — sysadmins running scripts on servers often can't send code to cloud APIs. A local agent with zero network dependency is a genuine differentiator. +4. **Proves the C++ framework** — building a real product on `gaia_core` surfaces missing capabilities and validates the architecture before more agents follow. + +--- + +## Recommended LLM + +### Primary: Qwen3-Coder-Next (GGUF) + +| Property | Value | +|---|---| +| Architecture | MoE — 80B total, 3B active per forward pass | +| Context window | 256K tokens | +| SWE-bench Verified | 58.7–70.6% (scaffolding-dependent) | +| Hardware | Single 24GB GPU (RTX 4090 / AMD equivalent) | +| Throughput | ~18–22 tok/s GPU, ~10 tok/s CPU-only (96GB RAM) | +| Format | GGUF via llama.cpp (Lemonade backend) | + +**Why this model:** +- Best overall local coding LLM in 2026 — trained on 800K real GitHub PR scenarios +- Non-reasoning model: no thinking blocks, direct fast answers — ideal for a scripting assistant where latency matters +- Ultra-sparse MoE: consumer-grade hardware delivers Sonnet-class coding quality +- Evaluated on Terminal-Bench (shell scripting, DevOps, system-level programming) +- Strong agentic tool-calling performance with recovery from execution failures + +### Tiered Hardware Options + +| Tier | Model | VRAM | Speed | Quality | +|---|---|---|---|---| +| High | Qwen3-Coder-Next Q4_K_XL | 24GB | ~20 tok/s | Excellent | +| Medium | Qwen 2.5 Coder 32B | 24GB | ~25 tok/s | Very good | +| Budget | Qwen 2.5 Coder 14B | 12GB | ~40 tok/s | Good for simple scripts | +| Minimal | Qwen 2.5 Coder 7B | 8GB | ~60 tok/s | Autocomplete quality | + + +**Model quality vs. tool reliability trade-off:** The C++ framework currently uses text-based tool calling (tools described in the system prompt, LLM returns JSON), not OpenAI-native function calling schemas. This means smaller models (7B, 14B) are more likely to hallucinate tool names or produce malformed JSON. The plan includes mitigations — tool argument validation in M1-P1 and optional native function calling in M1-P4 — but Qwen3-Coder-Next at 3B active params is the minimum for reliable multi-step tool orchestration. + + +--- + +## Goals + +- **Sub-second startup** — native binary, no runtime interpreter +- **Offline-capable** — runs entirely on local hardware via Lemonade Server +- **Bash-specialized** — system prompt, tools, and analysis tuned for shell scripting +- **Claude Code-like TUI** — reactive terminal UI with markdown rendering, streaming tokens, split panes +- **Safe by default** — destructive commands require explicit approval, path validation on all file ops +- **Proves `gaia_core`** — first production CLI agent built on the C++ framework, surfacing and fixing framework gaps + +## Non-Goals + +- **Replacing the Python CodeAgent.** `gaia-code` continues to serve general-purpose coding (Python, TS, web). `gaia-bash` is a complementary domain agent. +- **Supporting every shell.** M2-P1 targets `bash` and `sh`. PowerShell, zsh, fish are future extensions — not blockers. +- **Building a general-purpose terminal emulator.** `gaia-bash` runs inside your existing terminal. It's a TUI application, not a terminal replacement. +- **Remote execution (SSH).** Local-only in this plan. Remote execution via MCP server is a natural follow-up but out of scope. +- **Embedding llama.cpp directly.** Inference goes through Lemonade Server's HTTP API. Direct `libllama` embedding adds complexity without clear benefit (Lemonade handles model lifecycle, GPU scheduling, quantization). +- **Windows PowerShell specialization.** The agent runs on Windows (WSL/Git Bash) but the domain expertise is bash/POSIX, not PowerShell. A separate PowerShell agent would be a different project. + +--- + +## Critical Framework Gaps + +The C++ framework exploration revealed five issues that must be addressed before or during implementation. These are not blockers — they're known work items. + +### Gap 1: Text-Based Tool Calling (No Native Function Calling) + +**Current state:** Tools are formatted as text in the system prompt (`==== AVAILABLE TOOLS ====`), and the agent parses LLM responses for `"tool"` / `"tool_args"` JSON fields with regex fallback. The OpenAI-compatible API request does NOT include a `"tools"` parameter. + +**Risk:** Higher tool-name hallucination with smaller models. No schema validation at the API level. + +**Mitigation:** +1. **M1-P1:** Add JSON schema validation for tool arguments before execution +2. **M1-P4 (optional):** Add native function calling via the OpenAI `"tools"` parameter in `LemonadeClient`, gated behind a capability check (Lemonade Server version / model support) + +### Gap 2: No File I/O Tools + +**Current state:** The C++ framework has no built-in `file_read`, `file_write`, or `file_edit` tools. The Python CodeAgent has `FileIOToolsMixin` but nothing equivalent in C++. + +**Resolution:** Implement as framework-level tools in M1-P1. The `ToolRegistry` mechanism is ready — only the tool callbacks need writing. Path validation already exists in `security.cpp`. + +### Gap 3: Basic TUI (No Reactive UI) + +**Current state:** `CleanConsole` outputs ANSI-colored text via `std::cout`. No cursor control, no screen regions, no interactive widgets. Progress indicators (`startProgress`/`stopProgress`) are no-ops. + +**Resolution:** Build `TuiConsole` on FTXUI in M1-P3. `CleanConsole` becomes the fallback for `--no-tui` / piped output modes. + +### Gap 4: No REPL Architecture + +**Current state:** Existing examples (`wifi_agent`, `health_agent`) use blocking `std::getline()` loops that call `processQuery()` once per input. No session continuity model beyond conversation history. + +**Resolution:** Build `ReplRunner` in M1-P2: +- Two-thread architecture: input thread + agent thread +- Maintains conversation history across queries (already supported by `Agent`) +- Slash command framework with built-in commands + agent-registered extensions +- Ctrl-C cancels current agent run without killing the process + +### Gap 5: No Session Persistence + +**Current state:** Conversation history is in-memory only. Process exits lose everything. + +**Resolution:** Build `SessionStore` in M1-P2. Serialize `conversationHistory_` to `~/.gaia/sessions/.json` on exit, reload on startup with `--resume`. + +--- + +## Architecture + +The system has two layers. The bottom layer is the `gaia_core` framework (Milestone 1) — reusable by any future C++ agent. The top layer is the `gaia-bash` agent (Milestone 2) — bash-domain-specific code that plugs into the framework. + +``` +┌─────────────────────────────────────────────────────────┐ +│ gaia-bash binary (Milestone 2 — agent-specific) │ +│ │ +│ ┌────────────────────────────────────────────────────┐ │ +│ │ BashAgent : Agent │ │ +│ │ registerTools(): │ │ +│ │ bash_execute, script_lint, script_test, │ │ +│ │ env_inspect, man_lookup, bash_background │ │ +│ │ getSystemPrompt(): │ │ +│ │ Bash expert persona (POSIX, ShellCheck, BATS) │ │ +│ │ Slash commands: │ │ +│ │ /run, /lint, /test, /review, /edit │ │ +│ └────────────────────────────────────────────────────┘ │ +│ │ │ +├────────────────────────────┼──────────────────────────────┤ +│ gaia_core library (Milestone 1 — shared framework) │ +│ │ │ +│ ┌──────────────┐ ┌──────┴───────┐ ┌────────────────┐ │ +│ │ TuiConsole │ │ Agent loop │ │ ReplRunner │ │ +│ │ (FTXUI) │ │ + tools │ │ (2-thread, │ │ +│ │ Markdown │ │ + security │ │ slash cmds, │ │ +│ │ Streaming │ │ + MCP │ │ cancel) │ │ +│ └──────────────┘ └──────────────┘ └────────────────┘ │ +│ ┌──────────────┐ ┌──────────────┐ ┌────────────────┐ │ +│ │ FileIOTools │ │ ProcessRunner│ │ SessionStore │ │ +│ │ GitTools │ │ (cross-plat) │ │ (JSON persist) │ │ +│ └──────────────┘ └──────────────┘ └────────────────┘ │ +│ │ │ +│ ┌─────────────┴─────────────┐ │ +│ │ LemonadeClient │ │ +│ │ (HTTP, streaming SSE) │ │ +│ └─────────────┬─────────────┘ │ +└────────────────────────────┼──────────────────────────────┘ + │ + ┌─────────────┴─────────────┐ + │ Lemonade Server │ + │ Qwen3-Coder-Next GGUF │ + │ (AMD NPU / GPU / CPU) │ + └────────────────────────────┘ +``` + +### Build Integration + +Framework additions in `cpp/CMakeLists.txt` (Milestone 1): + +```cmake +# FTXUI — optional TUI dependency, gated behind GAIA_BUILD_TUI +option(GAIA_BUILD_TUI "Build FTXUI-based TUI console" ON) +if(GAIA_BUILD_TUI) + FetchContent_Declare(ftxui + GIT_REPOSITORY https://github.com/ArthurSonzogni/FTXUI + GIT_TAG v6.1.9 + ) + FetchContent_MakeAvailable(ftxui) +endif() + +# New source files added to gaia_core +add_library(gaia_core + # ... existing sources ... + src/process.cpp + src/file_tools.cpp + src/git_tools.cpp + src/repl.cpp + src/session.cpp +) +if(GAIA_BUILD_TUI) + target_sources(gaia_core PRIVATE + src/tui_console.cpp + src/tui_markdown.cpp + ) + target_link_libraries(gaia_core PRIVATE + ftxui::component ftxui::dom ftxui::screen + ) +endif() +``` + +Agent binary target (Milestone 2): + +```cmake +add_executable(gaia-bash + agents/bash/main.cpp + agents/bash/bash_agent.cpp + agents/bash/bash_tools.cpp +) +target_link_libraries(gaia-bash PRIVATE gaia::gaia_core) +``` + +--- + +## Tool Inventory + +### Framework Tools (M1-P1 — shared by all agents) + +| Tool | Description | Security Policy | +|---|---|---| +| `file_read` | Read file contents with optional line range (`--start`, `--end`) | ALLOW | +| `file_write` | Write/create file (creates parent dirs). Shows diff preview for existing files. | CONFIRM | +| `file_edit` | Surgical old_string → new_string replacement (Claude Code-style Edit tool) | CONFIRM | +| `file_search` | Glob + grep across directory tree. Returns paths + context lines. | ALLOW | +| `git_status` | Read-only git operations: status, diff, log, branch, show | ALLOW | + +### Bash Agent Core Tools (M2-P1) + +| Tool | Description | Security Policy | +|---|---|---| +| `bash_execute` | Run bash/sh command with configurable timeout, capture stdout + stderr + exit code. Supports pipes and redirection. | CONFIRM | +| `env_inspect` | Shell version, PATH entries, OS info, key env vars | ALLOW | + +### Bash Agent Developer Tools (M2-P2) + +| Tool | Description | Security Policy | +|---|---|---| +| `script_lint` | Run ShellCheck, return structured diagnostics (severity + line + SC code + fix suggestion) | ALLOW | +| `script_test` | Execute script in temp sandbox, capture results, cleanup | CONFIRM | +| `man_lookup` | Query man pages or `--help` for command documentation | ALLOW | +| `git_commit` | Stage + commit with LLM-generated message (shows preview first) | CONFIRM | + +### Bash Agent Advanced Tools (M2-P3) + +| Tool | Description | Security Policy | +|---|---|---| +| `bash_background` | Run long-running command in background, return PID, poll for output | CONFIRM | +| `process_list` | List running processes with filtering | ALLOW | +| `cron_inspect` | Read crontab entries, systemd timers | ALLOW | +| `network_check` | ping, curl, dig diagnostics | CONFIRM | +| `clipboard_copy` | Copy text to system clipboard | ALLOW | + +--- + +## TUI Design + +### Component Stack (M1-P3 — framework-level) + +The TUI is a framework component (`TuiConsole`) that any agent gets for free. Bash-specific customizations (syntax highlighting for shell keywords, bash slash commands) are registered by the agent at startup. + +| Layer | Library | Milestone | Purpose | +|---|---|---|---| +| Layout & widgets | [FTXUI v6.1.9](https://github.com/ArthurSonzogni/FTXUI) | M1-P3 | Reactive fullscreen TUI — input, scrollable output, status bar, split panes | +| Markdown rendering | Built-in C++17 parser | M1-P3 | Render LLM responses with headings, bold, code blocks, lists. [MarkdownFTXUI](https://github.com/zvasilev/MarkdownFTXUI) (C++20) as future upgrade. | +| Syntax highlighting | Agent-registered color map | M2-P1 | Bash agent registers bash keywords (`if/then/fi`, pipes, redirects) for code block coloring | +| Token streaming | Existing `sse_parser.cpp` + FTXUI refresh | M1-P3 | Feed SSE tokens into text element, trigger incremental re-render | + +### Modes (M1-P2 — framework-level) + +These modes are provided by `ReplRunner` and work for any agent, not just `gaia-bash`: + +| Mode | Behavior | +|---|---| +| `` (default) | Interactive FTXUI TUI with full split-pane layout | +| ` "query"` | Single-query mode: run query, print result, exit | +| ` --print` | Pipe-friendly: no TUI, streaming plain text to stdout | +| ` --resume ` | Resume a previous session from `~/.gaia/sessions/` | + +### Slash Commands + +**Framework built-ins** (M1-P2 — available to all agents): + +| Command | Action | +|---|---| +| `/clear` | Clear conversation history | +| `/model [name]` | Switch LLM model | +| `/help` | Show available commands | +| `/history` | Browse session history | + +**Bash-specific** (M2-P2 — registered by `BashAgent`): + +| Command | Action | +|---|---| +| `/run ` | Execute bash command directly (bypass LLM) | +| `/edit ` | Open file in `$EDITOR` | +| `/review [file]` | Multi-pass code review of a script | +| `/test [file]` | Generate and run BATS tests | +| `/lint [file]` | Run ShellCheck | + +--- + +## Intelligence Layer + +### Bash-Specialized System Prompt + +The system prompt encodes bash domain expertise: + +- Default to POSIX-compatible scripts; use bashisms only when shebang is `#!/bin/bash` +- Always include `set -euo pipefail` in non-trivial scripts +- Quote all variable expansions unless word splitting is intentional +- Use `"$@"` not `$@`, `"${var}"` not `$var` +- Prefer built-in shell features over external tools when equivalent +- For destructive operations (`rm -rf`, `dd`, `mkfs`), always confirm with user +- Provide ShellCheck codes (SC2086, SC2046, etc.) when explaining fixes +- Use BATS format for generated tests +- Include man page references for non-obvious flags + +### Code Review Engine (M2-P3) + +Multi-pass analysis pipeline: + +1. **Correctness** — logic errors, edge cases, exit code handling +2. **Security** — injection risks, unquoted expansions, `eval` usage, temp file races +3. **Portability** — bashisms in `#!/bin/sh` scripts, GNU-specific flags +4. **Performance** — unnecessary subshells, useless `cat`, fork-heavy patterns +5. **Style** — ShellCheck compliance, naming conventions, comments + +### Test Generation (M2-P3) + +Given a script, generate [BATS](https://github.com/bats-core/bats-core) test cases covering: +- Happy path with expected output +- Error cases (missing args, bad input, missing dependencies) +- Edge cases (empty input, filenames with spaces/newlines) +- Exit code verification + +--- + +## Phased Delivery + +Work is split into two milestones. **Milestone 1** upgrades the shared `gaia_core` C++ library — every capability added here benefits all future C++ agents (CUA, Docker, PowerShell, etc.), not just `gaia-bash`. **Milestone 2** builds the bash-specific agent on top of the upgraded framework. + +| Phase | Scope | Location | Effort | +|---|---|---|---| +| **M1-P1** | ProcessRunner, FileIOTools, GitTools, tool arg validation | `cpp/include/gaia/`, `cpp/src/` | 2–3 weeks | +| **M1-P2** | ReplRunner, slash command framework, SessionStore, agent cancel | `cpp/include/gaia/`, `cpp/src/` | 2–3 weeks | +| **M1-P3** | TuiConsole (FTXUI), markdown rendering, streaming, `--no-tui` | `cpp/include/gaia/`, `cpp/src/` | 3–4 weeks | +| **M1-P4** | Native function calling (optional) | `cpp/include/gaia/`, `cpp/src/` | 2–3 weeks | +| **M2-P1** | BashAgent, `bash_execute`, system prompt, CLI entry point | `cpp/agents/bash/` | 2–3 weeks | +| **M2-P2** | ShellCheck, BATS, man_lookup, bash slash commands, clipboard | `cpp/agents/bash/` | 2–3 weeks | +| **M2-P3** | Code review engine, test generation, POSIX checker, advanced tools | `cpp/agents/bash/` | 3–4 weeks | +| **M2-P4** | Static linking, docs, CI build matrix, Python bridge | `cpp/agents/bash/`, `docs/` | 2–3 weeks | +| **M2-P5** | REST API server, MCP server, `--serve` / `--mcp` flags | `cpp/agents/bash/` | 3–4 weeks | +| **M2-P6** | Eval scenarios, eval adapter, ground truth, CI integration | `cpp/agents/bash/`, `src/gaia/eval/` | 2–3 weeks | + +**Total estimated effort:** M1: 9–13 weeks, M2: 14–20 weeks. M1-P4 is optional and can be parallelized with M2. M2-P1 can start as soon as M1-P1 through M1-P3 are complete. M2-P5 can be parallelized with M2-P2/P3. + +--- + +### Milestone 1: C++ Framework Upgrades (`gaia_core`) + +Everything in this milestone ships as library code in `cpp/include/gaia/` and `cpp/src/`. No bash-specific logic. The validation target is: an existing example agent (e.g. `wifi_agent`) can be refactored to use these new primitives and gain interactive TUI, file I/O, and session persistence for free. + +#### M1-P1: Process Runner + File I/O Tools + +**Scope:** Extract the ad-hoc `runShell()` pattern into a proper library utility and add reusable file I/O tool callbacks. + +**Deliverables:** + +| Component | Files | Description | +|---|---|---| +| `ProcessRunner` | `include/gaia/process.h`, `src/process.cpp` | `run(cmd, timeout, cwd, env)` → `{stdout, stderr, exitCode}`. Cross-platform (popen on POSIX, `_popen`/CreateProcess on Windows). Configurable timeout with SIGKILL/TerminateProcess fallback. Output size cap (default 64KB). | +| `FileIOTools` | `include/gaia/file_tools.h`, `src/file_tools.cpp` | Pre-built tool callbacks: `file_read(path, start?, end?)`, `file_write(path, content)`, `file_edit(path, old_string, new_string)`, `file_search(pattern, path?, content_pattern?)`. All use `validatePath()` from `security.cpp`. Register via `ToolRegistry::registerFileTools()`. | +| `GitTools` | `include/gaia/git_tools.h`, `src/git_tools.cpp` | Read-only git tool callbacks: `git_status()`, `git_diff(path?)`, `git_log(n?)`, `git_show(ref)`. Delegates to `ProcessRunner`. Register via `ToolRegistry::registerGitTools()`. | +| Tool argument validation | `tool_registry.h/cpp` | Validate JSON args against declared `ToolParameter` types (string, int, bool, required/optional) before invoking callback. Reject malformed args with actionable error instead of passing garbage to the tool. | +| Unit tests | `tests/test_process.cpp`, `tests/test_file_tools.cpp`, `tests/test_git_tools.cpp` | Mock filesystem and process execution. | + +**Estimated effort:** 2–3 weeks + +#### M1-P2: REPL Runner + Session Persistence + +**Scope:** A reusable interactive loop class that any agent can plug into, replacing the blocking `std::getline()` pattern in examples. + +**Deliverables:** + +| Component | Files | Description | +|---|---|---| +| `ReplRunner` | `include/gaia/repl.h`, `src/repl.cpp` | Two-thread architecture: input thread (reads user input) + agent thread (runs `processQuery()`). Input accepted while agent is thinking. Ctrl-C cancels current agent run (sets a `cancel_` flag checked in the agent loop), doesn't kill the process. | +| Slash command framework | Built into `ReplRunner` | Register slash commands via `repl.addCommand("/name", callback)`. Built-in commands: `/clear` (reset history), `/help` (list commands), `/model ` (switch model), `/history` (show session). Agent-specific commands (e.g. `/lint`) registered by the agent, not the framework. | +| `SessionStore` | `include/gaia/session.h`, `src/session.cpp` | Serialize/deserialize `conversationHistory_` to `~/.gaia/sessions/.json`. `save(id, history)`, `load(id) → history`, `list() → [{id, timestamp, preview}]`. Supports `--resume ` CLI pattern. | +| Agent cancel support | `agent.h/cpp` | Add `requestCancel()` method and `cancelled_` atomic flag. The agent loop checks this flag between steps and exits early with a partial result. | +| Unit tests | `tests/test_repl.cpp`, `tests/test_session.cpp` | Test command dispatch, session round-trip serialization, cancel semantics. | + +**Estimated effort:** 2–3 weeks + +#### M1-P3: FTXUI Console + Markdown Rendering + +**Scope:** A reactive `OutputHandler` subclass built on FTXUI that any agent can use as a drop-in replacement for `CleanConsole`. + +**Deliverables:** + +| Component | Files | Description | +|---|---|---| +| FTXUI dependency | `CMakeLists.txt` | FetchContent for FTXUI v6.1.9. Optional — `CleanConsole` remains the default when FTXUI is not available (piped output, `--no-tui`). Build gate: `option(GAIA_BUILD_TUI "Build FTXUI-based TUI console" ON)`. | +| `TuiConsole` : `OutputHandler` | `include/gaia/tui_console.h`, `src/tui_console.cpp` | FTXUI fullscreen app implementing the `OutputHandler` interface. Layout: scrollable chat history (top), status bar (model name, token count, step N/M), input area (bottom). Streaming tokens update the chat panel in real-time. Tool approval shows a modal dialog ([Y]es / [N]o / [A]lways). | +| Markdown rendering | `src/tui_markdown.cpp` | Render LLM markdown responses inside FTXUI elements: headings (bold + color), `**bold**`, `` `code` ``, fenced code blocks (bordered, syntax-colored for bash keywords), bullet lists, blockquotes. Built in C++17 — lightweight parser, no cmark dependency. MarkdownFTXUI (C++20) is a future upgrade path, not an M1 dependency. | +| Input component | Integrated in `TuiConsole` | Multi-line input with Enter to submit (Shift+Enter or Ctrl+Enter for newline). Up/down arrow for input history. Paste support. | +| Streaming integration | `tui_console.cpp` | `printStreamToken()` appends to the current chat bubble and triggers FTXUI re-render. `printStreamEnd()` finalizes the bubble. | +| `--no-tui` fallback | `repl.h` | ReplRunner auto-detects piped stdout and falls back to `CleanConsole`. Explicit `--no-tui` flag also forces fallback. | +| Unit tests | `tests/test_tui_console.cpp` | Test rendering logic (markdown parser), not FTXUI screen output. | + +**Estimated effort:** 3–4 weeks + +#### M1-P4: Native Function Calling (Optional) + +**Scope:** Send tools as OpenAI-compatible `"tools"` array in the API request instead of embedding them as text in the system prompt. Gated behind a capability check. + +**Deliverables:** + +| Component | Files | Description | +|---|---|---| +| Tool schema export | `tool_registry.h/cpp` | `formatAsOpenAITools() → json` — export registered tools as an OpenAI `tools` array with JSON Schema parameter definitions. | +| LemonadeClient upgrade | `lemonade_client.h/cpp` | Accept optional `tools` JSON in `chatCompletions()` and `chatCompletionsStreaming()`. Include in request body when non-empty. | +| Response parsing | `json_utils.h/cpp` | Parse OpenAI-style `tool_calls` array in assistant messages (alongside existing text-based fallback). | +| Agent integration | `agent.cpp` | Config flag `useNativeFunctionCalling` (default: false). When enabled, uses schema export + native parsing. Falls back to text-based on error. | +| Capability detection | `lemonade_client.cpp` | Probe Lemonade Server `/health` or `/v1/models` for function calling support. Auto-enable when available. | +| Integration test | `tests/integration/test_function_calling.cpp` | Test with real Lemonade Server + Qwen model. | + + +This phase is **optional** — the text-based approach works and is what ships first. Native function calling is a reliability upgrade that reduces tool hallucinations, especially with smaller models (7B–14B). It can be deferred or parallelized with Milestone 2 work. + + +**Estimated effort:** 2–3 weeks + +--- + +### Milestone 2: Bash Coding Agent (`gaia-bash`) + +Everything in this milestone is bash-domain-specific code. It lives in `cpp/agents/bash/` (or a similar directory), links against `gaia_core`, and produces a standalone `gaia-bash` binary. Framework components from Milestone 1 are consumed, not modified. + +#### M2-P1: Core Agent + Basic TUI + +**Scope:** Functional bash agent with domain tools, FTXUI shell, streaming — the "hello world" that proves M1 works end-to-end. + +**Deliverables:** + +| Component | Description | +|---|---| +| `BashAgent : Agent` | Subclass with `registerTools()` and `getSystemPrompt()`. Registers framework tools (file I/O, git) plus bash-specific tools. | +| Bash system prompt | Expert persona: POSIX-first, `set -euo pipefail`, quoting rules, ShellCheck awareness. | +| `bash_execute` tool | Wraps `ProcessRunner` with bash-specific concerns: shell detection (bash/sh/WSL), shebang injection, output truncation heuristics. CONFIRM policy. | +| `env_inspect` tool | Shell version, PATH, OS info, installed tools detection (`jq`, `shellcheck`, `bats`). ALLOW policy. | +| CLI entry point (`main.cpp`) | Argument parsing: `gaia-bash "query"` (single-shot), `gaia-bash` (interactive), `gaia-bash --print` (piped), `gaia-bash --resume `. | +| REPL integration | Plugs `BashAgent` into `ReplRunner` with `TuiConsole`. | +| Cross-platform shell detection | Detect available shell (bash, sh, WSL, Git Bash) and configure `bash_execute` accordingly. | +| CMake target | `add_executable(gaia-bash ...)` linking `gaia_core`, FTXUI. | +| Unit + integration tests | Mock-LLM tests for all bash tools. Integration test with real Lemonade. | + +**Estimated effort:** 2–3 weeks + +#### M2-P2: Developer Tools + Slash Commands + +**Scope:** ShellCheck, BATS, man pages, and the slash command UX that makes bash work feel native. + +**Deliverables:** + +| Component | Description | +|---|---| +| `script_lint` tool | Run ShellCheck with `-f json1`, parse structured output → severity + line + SC code + fix suggestion. Graceful degradation if ShellCheck not installed. ALLOW policy. | +| `script_test` tool | Create temp sandbox dir, write script + generated BATS test, execute `bats`, capture TAP output, cleanup. CONFIRM policy. | +| `man_lookup` tool | Run `man ` or ` --help`, capture and truncate output. ALLOW policy. | +| `git_commit` tool | Stage files + LLM-generated commit message with preview. CONFIRM policy. | +| Bash slash commands | `/run ` (direct execution), `/lint [file]`, `/test [file]`, `/review [file]`, `/edit ` (open in `$EDITOR`). Registered via `ReplRunner::addCommand()`. | +| Clipboard integration | `clipboard_copy` tool — xclip/pbcopy/wl-copy/win32 `SetClipboardData`. ALLOW policy. | + +**Estimated effort:** 2–3 weeks + +#### M2-P3: Intelligence Layer + +**Scope:** The domain expertise that makes `gaia-bash` more than a wrapper around a generic LLM. + +**Deliverables:** + +| Component | Description | +|---|---| +| Code review engine | Multi-pass LLM analysis: correctness → security → portability → performance → style. Triggered by `/review` or `review this script`. | +| POSIX compliance checker | Flag bashisms (`[[ ]]`, `<<<`, `${var,,}`, arrays) when shebang is `#!/bin/sh`. Suggest portable alternatives. | +| BATS test generation | Given a script, generate test cases covering happy path, error cases, edge cases, exit codes. | +| Dependency detection | Scan `#!/bin/bash` scripts for external tool usage (`jq`, `yq`, `curl`, `awk`, `sed`, `docker`, `kubectl`), check which are on PATH, warn on missing. | +| `bash_background` tool | Run command in background, return PID, `poll(pid)` for output, `kill(pid)` to stop. CONFIRM policy. | +| `process_list` tool | `ps aux` / `tasklist` with grep filtering. ALLOW policy. | +| `cron_inspect` tool | Read `crontab -l`, parse systemd timers. ALLOW policy. | +| `network_check` tool | `ping`, `curl -I`, `dig` with structured output. CONFIRM policy. | +| Snippet templates | Built-in prompt templates for common patterns: argument parsing (`getopts`), error handling (`trap`), logging, parallel execution (`xargs -P`, GNU parallel). | +| Auto-mode | Classify tools as safe (read-only → auto-approve) vs. dangerous (writes, network → require confirmation). Config-driven override. | + +**Estimated effort:** 3–4 weeks + +#### M2-P4: Polish + Distribution + +**Scope:** Production readiness, packaging, docs, CI. + +**Deliverables:** + +| Component | Description | +|---|---| +| Static linking | Single-binary distribution — no shared library dependencies beyond libc/kernel32. | +| `gaia init` integration | Model download for Qwen3-Coder-Next via Lemonade API from C++. First-run setup flow. | +| Performance benchmarks | Startup latency (<100ms target), token throughput, memory footprint. Tracked in CI. | +| Integration tests | End-to-end tests against real Lemonade Server in CI. | +| Documentation | `docs/guides/bash.mdx` — quickstart, tool reference, examples. Update `docs/docs.json`. | +| CI/CD build matrix | Windows MSVC, Linux GCC/Clang, macOS AppleClang. Artifact upload to GitHub Releases. | +| Python bridge | `gaia bash` subcommand in `cli.py` that delegates to the native binary if found on PATH. | + +**Estimated effort:** 2–3 weeks + +#### M2-P5: API Server + MCP Server (Third-Party Integration) + +**Scope:** Expose the bash agent via an HTTP REST API and an MCP server so external tools (Claude Code, OpenCode, custom integrations) can interact with it programmatically. + +**Deliverables:** + +| Component | Description | +|---|---| +| **REST API server** | Built-in HTTP server (cpp-httplib, already a dependency) exposing OpenAI-compatible `/v1/chat/completions` endpoint. Runs on configurable port (default 8200). Supports streaming via SSE. Accepts tool calls in request body. | +| `/v1/chat/completions` | Main endpoint: accepts messages array, routes through BashAgent, returns structured response with tool call results. Streaming mode sends token-by-token SSE events. | +| `/v1/tools` | List all registered bash agent tools with their schemas (name, description, parameters). Read-only. | +| `/v1/tools/{name}` | Execute a specific tool directly (bypass LLM). Useful for external orchestrators that want to call `bash_execute` or `file_read` without going through the agent loop. | +| `/health` | Health check endpoint returning agent status, model info, and tool count. | +| `/sessions` | CRUD for sessions: `GET /sessions` (list), `POST /sessions` (create), `GET /sessions/{id}` (load), `DELETE /sessions/{id}` (remove). | +| `--serve` CLI flag | Start the API server: `gaia-bash --serve [--port 8200]`. Runs instead of the interactive TUI. | +| **MCP server (stdio)** | Expose the bash agent as an MCP tool server over stdio transport. External agents (e.g. Claude Code, OpenCode) can connect via `{"command": "gaia-bash", "args": ["--mcp"]}` in their MCP config. | +| MCP `tools/list` | Returns all bash agent tools as MCP tool definitions (name, description, inputSchema). | +| MCP `tools/call` | Executes a tool call, routes through BashAgent for LLM-backed tools or directly for utility tools. Returns structured result. | +| MCP `prompts/list` | Exposes built-in prompt templates (e.g. "review-script", "generate-bats-test", "explain-command") as MCP prompts. | +| `--mcp` CLI flag | Start as an MCP stdio server: `gaia-bash --mcp`. Reads JSON-RPC from stdin, writes to stdout. Compatible with Claude Code MCP config. | +| Integration tests | Test API endpoints with curl/httplib. Test MCP protocol with a mock client. | + +**Architecture:** + +``` +External tool (Claude Code, OpenCode, curl) + │ + ├── HTTP REST API (:8200) + │ └── /v1/chat/completions + │ └── /v1/tools/{name} + │ └── /health + │ + └── MCP stdio transport + └── tools/list, tools/call + └── prompts/list + │ + ▼ + BashAgent : Agent + (same agent instance, same tools) +``` + +**Usage examples:** + +```bash +# Start as API server +gaia-bash --serve --port 8200 + +# Query via curl +curl http://localhost:8200/v1/chat/completions \ + -H "Content-Type: application/json" \ + -d '{"messages":[{"role":"user","content":"write a backup script"}]}' + +# Use as MCP server in Claude Code (~/.claude/settings.json) +{ + "mcpServers": { + "gaia-bash": { + "command": "gaia-bash", + "args": ["--mcp"] + } + } +} + +# Direct tool call via API +curl http://localhost:8200/v1/tools/bash_execute \ + -H "Content-Type: application/json" \ + -d '{"command":"ls -la","timeoutMs":5000}' +``` + +**Estimated effort:** 3–4 weeks + +#### M2-P6: Eval Integration + +**Scope:** Extend the GAIA evaluation framework to validate the bash agent against bash-specific scenarios. + +**Deliverables:** + +| Component | Description | +|---|---| +| Bash eval scenarios | 20+ test scenarios covering: script writing, script review, file operations, ShellCheck compliance, BATS test generation, error debugging, POSIX portability. | +| Eval adapter | Connect `gaia eval agent` to the bash agent's REST API (`--serve` mode). The eval runner sends scenarios as chat messages and judges responses. | +| Ground truth | Expected outputs / acceptance criteria for each scenario. Leverages existing `gaia eval` judge LLM (Claude) for quality assessment. | +| Benchmark suite | Measure: tool call accuracy, script correctness (run generated scripts, check exit code), ShellCheck pass rate on generated code, POSIX compliance rate. | +| CI integration | Run bash eval scenarios in CI on each PR touching `cpp/agents/bash/`. Report scorecard in PR comment. | + +**Estimated effort:** 2–3 weeks + +**Estimated effort:** 2–3 weeks + +--- + +## Risk Register + +| Risk | Likelihood | Impact | Affects | Mitigation | +|---|---|---|---|---| +| Qwen3-Coder-Next GGUF not available for Lemonade/AMD NPU | Medium | High | M2 | Qwen 2.5 Coder 32B as fallback — already GGUF-available, well-tested | +| Text-based tool calling unreliable with smaller models | Medium | Medium | M1-P1, M1-P4 | JSON schema validation (M1-P1), optional native function calling (M1-P4), model minimum enforcement (3B+ active params) | +| FTXUI doesn't handle all terminal emulators well | Low | Medium | M1-P3 | `--print` / `--no-tui` mode as universal fallback; FTXUI has broad terminal support; `CleanConsole` always available | +| M1 scope creep delays M2 | Medium | Medium | Both | M1-P4 (native function calling) is explicitly optional — M2 can start after M1-P1 through M1-P3. Validate M1 deliverables by refactoring `wifi_agent` before starting M2. | +| Cross-platform bash execution (Windows) | Medium | Medium | M2-P1 | WSL detection + Git Bash fallback; document minimum requirements | +| ShellCheck / BATS not installed on target system | Low | Low | M2-P2 | Graceful degradation — tools report "not found, install with..." and suggest package manager commands | + +--- + +## Competitive Positioning + +| Feature | gaia-bash | Claude Code | OpenCode | Aider | +|---|---|---|---|---| +| Runtime | Native C++ | Node.js/TS | Go | Python | +| Startup | ~50ms | ~2s | ~500ms | ~3s | +| LLM | Local (AMD-optimized) | Cloud (Anthropic) | Multi-provider | Multi-provider | +| Privacy | 100% local | Cloud | Configurable | Configurable | +| Bash specialization | Domain expert | General purpose | General purpose | General purpose | +| ShellCheck integration | Built-in | Via hooks | No | No | +| BATS test generation | Built-in | Manual | No | No | +| TUI framework | FTXUI (reactive C++) | Ink (React/JS) | Bubble Tea (Go) | None | +| AMD NPU acceleration | Via Lemonade | No | No | No | +| Offline capable | Yes | No | With local models | With local models | +| Single binary | Yes | Needs Node.js | Yes | Needs Python | + +--- + +## Open Questions + +1. **Should `gaia-bash` be a standalone binary or a `gaia bash` subcommand?** Standalone (`gaia-bash`) is simpler to distribute and doesn't require Python. Subcommand (`gaia bash`) is more discoverable. Current plan (M2-P4) does both — Python bridge subcommand delegates to the native binary if found on PATH. + +2. **Model auto-download.** Should `gaia-bash` auto-download Qwen3-Coder-Next on first run (like `gaia init`), or require explicit setup? Auto-download is better UX but needs the Lemonade model download API from C++. Deferred to M2-P4. + +3. **MCP tool passthrough.** Should `gaia-bash` support connecting to MCP servers for extensibility? This would allow users to add custom tools (e.g. a Docker MCP server, a Kubernetes MCP server) without recompiling. Low effort since `MCPClient` already exists in `gaia_core` — question is whether to enable it in M2-P1 or defer. + +4. **M1 validation strategy.** The plan proposes refactoring `wifi_agent` to use M1 primitives as a validation gate before starting M2. Is that sufficient, or should we build a minimal "demo agent" (no domain tools, just REPL + TUI + file I/O) as the M1 acceptance test? + +5. **Lemonade Server function calling support.** Native function calling (M1-P4) depends on Lemonade Server accepting the OpenAI `"tools"` parameter. Current Lemonade docs don't confirm this explicitly. Needs verification before M1-P4 work begins — if unsupported, M1-P4 becomes a Lemonade Server feature request rather than a `gaia_core` task. + +--- + +## References + +- [Best LLMs for Coding 2026 — WhatLLM](https://whatllm.org/best-llm-for-coding) +- [Qwen3-Coder-Next Complete Guide — DEV Community](https://dev.to/sienna/qwen3-coder-next-the-complete-2026-guide-to-running-powerful-ai-coding-agents-locally-1k95) +- [Qwen3-Coder-Next — HuggingFace](https://huggingface.co/Qwen/Qwen3-Coder-Next) +- [Qwen3-Coder-Next Real-World Tests — XDA](https://www.xda-developers.com/tested-qwen3-coder-next-four-local-ai-coding-models-gap-embarassing/) +- [Qwen3-Coder-Next Ultra-Sparse MoE — VentureBeat](https://venturebeat.com/technology/qwen3-coder-next-offers-vibe-coders-a-powerful-open-source-ultra-sparse-model-with-10x-higher-throughput-for-repo-tasks) +- [FTXUI — GitHub](https://github.com/ArthurSonzogni/FTXUI) +- [MarkdownFTXUI — GitHub](https://github.com/zvasilev/MarkdownFTXUI) +- [llama.cpp Function Calling — GitHub](https://github.com/ggml-org/llama.cpp/blob/master/docs/function-calling.md) +- [OpenCode TUI — opencode.ai](https://opencode.ai/docs/tui/) +- [Claude Code Overview — Anthropic](https://code.claude.com/docs/en/overview) +- [Local Autonomous Agent Stack — SitePoint](https://www.sitepoint.com/the-complete-stack-for-local-autonomous-agents--from-ggml-to-orchestration/) +- [Best Local LLMs for Coding — Overchat](https://overchat.ai/ai-hub/best-local-llm-for-coding) +- [Best Local LLMs 2026 — ToolHalla](https://toolhalla.ai/blog/best-local-llms-for-coding-2026) diff --git a/src/gaia/llm/lemonade_client.py b/src/gaia/llm/lemonade_client.py index 40aa394cc..fa803fc94 100644 --- a/src/gaia/llm/lemonade_client.py +++ b/src/gaia/llm/lemonade_client.py @@ -278,6 +278,13 @@ class LemonadeStatus: min_ctx_size=32768, description="Autonomous coding assistant", ), + "bash": AgentProfile( + name="bash", + display_name="Bash Agent", + models=["gemma-4-e4b"], + min_ctx_size=32768, + description="Native C++ bash scripting agent (gaia-bash binary)", + ), "talk": AgentProfile( name="talk", display_name="Talk Agent",