From 03efa808b73fe7351e104b9a5b80e1345c47f0b8 Mon Sep 17 00:00:00 2001 From: Ovtcharov Date: Thu, 28 May 2026 20:33:00 -0700 Subject: [PATCH 1/4] ci: add Python version matrix, macOS smoke lane, and path-filter fixes setup.py claims python_requires>=3.10 but CI only tested 3.12. Unit tests now run against 3.10/3.11/3.12 via a strategy matrix. A macOS smoke job (continue-on-error) catches platform-specific failures early. test_mcp.yml gains cli.py in its path filter so MCP subcommand changes trigger the suite, and test_lemonade_server.yml gets a path filter scoped to src/gaia/llm/ instead of firing on every PR. Closes #881 --- .github/workflows/test_lemonade_server.yml | 16 +++++++++ .github/workflows/test_mcp.yml | 2 ++ .github/workflows/test_unit.yml | 41 ++++++++++++++++++++-- 3 files changed, 57 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test_lemonade_server.yml b/.github/workflows/test_lemonade_server.yml index b02b981a6..4bab9b8ce 100644 --- a/.github/workflows/test_lemonade_server.yml +++ b/.github/workflows/test_lemonade_server.yml @@ -7,9 +7,25 @@ on: workflow_call: push: branches: [ main ] + paths: + - 'src/gaia/llm/**' + - 'src/gaia/installer/**' + - 'setup.py' + - '.github/workflows/test_lemonade_server.yml' + - '.github/actions/install-lemonade/**' + - '.github/actions/setup-venv/**' + - 'installer/**' pull_request: branches: [ main ] types: [opened, synchronize, reopened, ready_for_review] + paths: + - 'src/gaia/llm/**' + - 'src/gaia/installer/**' + - 'setup.py' + - '.github/workflows/test_lemonade_server.yml' + - '.github/actions/install-lemonade/**' + - '.github/actions/setup-venv/**' + - 'installer/**' merge_group: workflow_dispatch: inputs: diff --git a/.github/workflows/test_mcp.yml b/.github/workflows/test_mcp.yml index a330e6018..986b7ddea 100644 --- a/.github/workflows/test_mcp.yml +++ b/.github/workflows/test_mcp.yml @@ -13,6 +13,7 @@ on: branches: [ main ] paths: - 'src/gaia/mcp/**' + - 'src/gaia/cli.py' - 'tests/mcp/**' - 'setup.py' - '.github/workflows/test_mcp.yml' @@ -21,6 +22,7 @@ on: types: [opened, synchronize, reopened, ready_for_review] paths: - 'src/gaia/mcp/**' + - 'src/gaia/cli.py' - 'tests/mcp/**' - 'setup.py' - '.github/workflows/test_mcp.yml' diff --git a/.github/workflows/test_unit.yml b/.github/workflows/test_unit.yml index dead53e1d..e9741f398 100644 --- a/.github/workflows/test_unit.yml +++ b/.github/workflows/test_unit.yml @@ -39,17 +39,21 @@ permissions: jobs: unit-tests: - name: Run Unit Tests + name: Unit Tests (py${{ matrix.python-version }}) runs-on: ubuntu-latest timeout-minutes: 30 if: github.event_name != 'pull_request' || github.event.pull_request.draft == false || contains(github.event.pull_request.labels.*.name, 'ready_for_ci') + strategy: + fail-fast: false + matrix: + python-version: ['3.10', '3.11', '3.12'] steps: - uses: actions/checkout@v6 - name: Set up Python uses: actions/setup-python@v6 with: - python-version: '3.12' + python-version: ${{ matrix.python-version }} - name: Install uv run: curl -LsSf https://astral.sh/uv/install.sh | sh @@ -139,3 +143,36 @@ jobs: echo "Integration Tests:" echo " - DatabaseMixin + Agent: Full agent lifecycle with database" echo " - DatabaseAgent: Auto-registered database tools" + + # Experimental macOS smoke test — validates core SDK imports and pure-Python + # unit tests on Darwin. continue-on-error until the full suite is macOS-clean. + unit-tests-macos: + name: Unit Tests (macOS smoke) + runs-on: macos-latest + if: github.event_name != 'pull_request' || github.event.pull_request.draft == false || contains(github.event.pull_request.labels.*.name, 'ready_for_ci') + continue-on-error: true + steps: + - uses: actions/checkout@v6 + + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: '3.12' + + - name: Install uv + run: curl -LsSf https://astral.sh/uv/install.sh | sh + + - name: Install dependencies + run: | + uv pip install --system pytest pytest-asyncio pytest-mock pytest-timeout \ + pyfakefs keyring httpx respx + uv pip install --system -e ".[api]" + + - name: Run unit tests (macOS smoke) + env: + GAIA_MEMORY_DISABLED: "1" + run: | + echo "=== macOS Smoke Test ===" + echo "Running unit tests on macOS to catch platform-specific issues" + echo "" + python -m pytest tests/unit/ -x --timeout=60 -v --tb=short From c38c40f8fa3228418e34f5bc5e00936d33afabf4 Mon Sep 17 00:00:00 2001 From: Ovtcharov Date: Thu, 28 May 2026 23:41:48 -0700 Subject: [PATCH 2/4] ci: add packaging step to macOS job, use consistent pytest style Add packaging validation step to the macOS smoke job matching the Linux job. Switch from `python -m pytest` to bare `pytest` for consistency with the rest of the workflow. --- .github/workflows/test_unit.yml | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test_unit.yml b/.github/workflows/test_unit.yml index e9741f398..c7326b8ff 100644 --- a/.github/workflows/test_unit.yml +++ b/.github/workflows/test_unit.yml @@ -168,6 +168,14 @@ jobs: pyfakefs keyring httpx respx uv pip install --system -e ".[api]" + - name: Validate packaging integrity + run: | + echo "=== Validating Packaging Integrity ===" + echo "Checking setup.py packages, __init__.py files, and entry points" + echo "" + pytest tests/unit/test_packaging.py -v --tb=short + echo "✅ Packaging integrity checks passed" + - name: Run unit tests (macOS smoke) env: GAIA_MEMORY_DISABLED: "1" @@ -175,4 +183,4 @@ jobs: echo "=== macOS Smoke Test ===" echo "Running unit tests on macOS to catch platform-specific issues" echo "" - python -m pytest tests/unit/ -x --timeout=60 -v --tb=short + pytest tests/unit/ -x --timeout=60 -v --tb=short From 1d16df20306f11636215b1fb1d121abb243d9813 Mon Sep 17 00:00:00 2001 From: Ovtcharov Date: Fri, 29 May 2026 09:34:42 -0700 Subject: [PATCH 3/4] =?UTF-8?q?ci:=20address=20review=20feedback=20?= =?UTF-8?q?=E2=80=94=20Lemonade=20test=20paths=20and=20pytest-timeout?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lemonade path filters missed tests/test_lemonade*.py, so edits to those files wouldn't trigger the smoke test. Linux matrix was also missing pytest-timeout, creating a divergence with the macOS job. --- .github/workflows/test_lemonade_server.yml | 2 ++ .github/workflows/test_unit.yml | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test_lemonade_server.yml b/.github/workflows/test_lemonade_server.yml index 4bab9b8ce..a3ed71404 100644 --- a/.github/workflows/test_lemonade_server.yml +++ b/.github/workflows/test_lemonade_server.yml @@ -15,6 +15,7 @@ on: - '.github/actions/install-lemonade/**' - '.github/actions/setup-venv/**' - 'installer/**' + - 'tests/test_lemonade*.py' pull_request: branches: [ main ] types: [opened, synchronize, reopened, ready_for_review] @@ -26,6 +27,7 @@ on: - '.github/actions/install-lemonade/**' - '.github/actions/setup-venv/**' - 'installer/**' + - 'tests/test_lemonade*.py' merge_group: workflow_dispatch: inputs: diff --git a/.github/workflows/test_unit.yml b/.github/workflows/test_unit.yml index c7326b8ff..25d34921b 100644 --- a/.github/workflows/test_unit.yml +++ b/.github/workflows/test_unit.yml @@ -67,8 +67,8 @@ jobs: # keyring + httpx + respx are required by tests/unit/connections/ # (issue #915). The in-memory keyring backend in tests/conftest.py # avoids the SecretService daemon prerequisite on Linux runners. - uv pip install --system pytest pytest-cov pytest-asyncio pytest-mock pyfakefs \ - keyring httpx respx + uv pip install --system pytest pytest-cov pytest-asyncio pytest-mock pytest-timeout \ + pyfakefs keyring httpx respx uv pip install --system -e ".[api]" - name: Validate packaging integrity From 07fb9a568ee156800d21385905d7bf796af3b2cf Mon Sep 17 00:00:00 2001 From: Ovtcharov Date: Mon, 1 Jun 2026 09:30:24 -0700 Subject: [PATCH 4/4] fix(tests): correct corrupt download test to match implementation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit _is_corrupt_download_error deliberately excludes 'llama-server failed to start' — it fires for too many non-corruption failures. The test was asserting True but the implementation correctly returns False. --- tests/unit/test_llamacpp_backend.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/unit/test_llamacpp_backend.py b/tests/unit/test_llamacpp_backend.py index 1ab58db2e..ff82a36a3 100644 --- a/tests/unit/test_llamacpp_backend.py +++ b/tests/unit/test_llamacpp_backend.py @@ -587,13 +587,15 @@ def test_no_model_loaded_by_message(self): class TestLlamaServerCorruptDownload: """Verify _is_corrupt_download_error catches llama-server startup failures.""" - def test_llama_server_failed_to_start(self): - """'llama-server failed to start' indicates corrupt model files.""" + def test_llama_server_failed_to_start_is_not_corruption(self): + """'llama-server failed to start' is NOT a corruption signal — Lemonade + emits it for many non-corruption failures (resource limits, ctx_size, + port conflicts), so it must not trigger the delete+redownload path.""" client = LemonadeClient(host="localhost", port=13305) error = Exception( "model_load_error: llama-server failed to start after loading model" ) - assert client._is_corrupt_download_error(error) is True + assert client._is_corrupt_download_error(error) is False def test_incomplete_download(self): client = LemonadeClient(host="localhost", port=13305)