From 889774e5e3c7c0d2397e84d90b756e4c48f633ae Mon Sep 17 00:00:00 2001 From: Tomasz Iniewicz Date: Fri, 29 May 2026 14:31:53 -0400 Subject: [PATCH 1/5] fix(lemonade): stop treating bare "llama-server failed to start" as corrupt download `_is_corrupt_download_error` matched the generic string "llama-server failed to start" as proof of a corrupt/incomplete model download. Lemonade raises that string for many non-corruption failures (resource limits, ctx_size, GPU/backend startup, port conflicts), so an ordinary load failure was routed into a destructive delete + re-download of the model (default ~25GB), dead-ending first-boot. Keep the five specific corruption phrases as unconditional signals; "llama-server failed to start" now only counts as corruption when one of those phrases also corroborates it. A bare load failure falls through to load_model's non-corrupt branch, which raises an actionable LemonadeClientError without entering the repair path. Closes #1294 --- .claude/plans/issue-1294.md | 75 ++++++ src/gaia/llm/lemonade_client.py | 26 ++- .../test_lemonade_error_classification.py | 214 ++++++++++++++++++ 3 files changed, 304 insertions(+), 11 deletions(-) create mode 100644 .claude/plans/issue-1294.md diff --git a/.claude/plans/issue-1294.md b/.claude/plans/issue-1294.md new file mode 100644 index 000000000..9cf64ca8f --- /dev/null +++ b/.claude/plans/issue-1294.md @@ -0,0 +1,75 @@ +--- +type: plan +source-issue: 1294 +repo: amd/gaia +title: "Lemonade: _is_corrupt_download_error misclassifies generic llama-server failed to start as corruption" +created: 2026-05-29 +status: in-progress +work_type: code-refactor +complexity: trivial +tdd_required: true +suggested_team_size: 1 +estimated_files_changed: 2 +test_command: "PYTHONPATH=\"$PWD/src\" /Users/tomasz/src/amd/gaia/.venv/bin/python -m pytest tests/unit/test_lemonade_error_classification.py -xvs" +build_command: "" +lint_command: "/Users/tomasz/src/amd/gaia/.venv/bin/python util/lint.py --black --isort" +branch: tmi/issue-1294-corrupt-classification +reflection_iterations: 0 +agents_used: [planning, execution, validation] +--- + +# Issue #1294 — `_is_corrupt_download_error` misclassifies generic `llama-server failed to start` + +## Problem +`LemonadeClient._is_corrupt_download_error` (`src/gaia/llm/lemonade_client.py`, ~1225-1248) +treats the generic substring `"llama-server failed to start"` as evidence of a corrupt / +incomplete model download. Lemonade raises that string for many NON-corruption failures +(resource limits, ctx_size issues, GPU/backend startup, port conflicts). Misclassifying +routes ordinary load failures into a delete-and-redownload path (default model ~25GB) and +dead-ends first-boot. + +The real-world payload was `{"code":"model_load_error","type":"model_load_error", +"message":"...llama-server failed to start"}` — `code`/`type` is `model_load_error`, +which is NOT a corruption signal. + +## Fix (surgical) +Remove `"llama-server failed to start"` from the unconditional corruption phrase list in +`_is_corrupt_download_error`. Treat that string as corruption ONLY when a specific corruption +phrase is ALSO present (corroboration); otherwise return `False`. Keep the five existing +specific corruption phrases unconditional: +- `"download validation failed"` +- `"files are incomplete"` +- `"files are missing"` +- `"incomplete or missing"` +- `"corrupted download"` + +This makes a bare `llama-server failed to start` load failure fall through to `load_model`'s +existing non-corrupt branch, which raises an actionable `LemonadeClientError` and does NOT +enter the delete + `pull_model_stream` repair path. + +## Files to change +1. `src/gaia/llm/lemonade_client.py` — `_is_corrupt_download_error` only. Do NOT touch the + prompt helpers (`_prompt_user_for_delete` / `_prompt_user_for_repair`) or `load_model`'s + corrupt branch (owned by stacked issue #1293). +2. `tests/unit/test_lemonade_error_classification.py` — APPEND new test classes (file already + exists with #1030 regression tests; preserve them). Do NOT touch + `tests/unit/test_lemonade_model_loading.py` (owned by #1293). + +## Test approach (TDD — red first) +Append to `tests/unit/test_lemonade_error_classification.py`: +- Parametrized `_is_corrupt_download_error`: each of the 5 specific phrases -> True; bare + `"llama-server failed to start"` -> False; that string PLUS a corruption phrase -> True; + the real `model_load_error` structured payload -> False. +- `load_model` test: mock `_send_request` to raise the bare `llama-server failed to start` + error; assert `delete_model` and `pull_model_stream` are NOT called and an actionable + `LemonadeClientError` is raised. +- `load_model` test: a specific corruption error DOES enter the repair path (resume via + `pull_model_stream`). + +## Acceptance criteria +1. `_is_corrupt_download_error("...llama-server failed to start...")` -> False unless a + specific corruption signal is also present. +2. The five existing specific phrases continue to return True (no regression). +3. A bare `llama-server failed to start` load failure makes `load_model` raise an actionable + `LemonadeClientError` and does NOT enter delete+redownload. +4. When corruption IS correctly detected (a specific phrase), the existing repair flow runs. diff --git a/src/gaia/llm/lemonade_client.py b/src/gaia/llm/lemonade_client.py index f3b808fef..62be02933 100644 --- a/src/gaia/llm/lemonade_client.py +++ b/src/gaia/llm/lemonade_client.py @@ -1222,10 +1222,24 @@ def _is_model_error(self, error: Union[str, Dict, Exception]) -> bool: ] ) + # Phrases Lemonade uses ONLY for genuinely corrupt/incomplete downloads. + _CORRUPT_DOWNLOAD_PHRASES = ( + "download validation failed", + "files are incomplete", + "files are missing", + "incomplete or missing", + "corrupted download", + ) + def _is_corrupt_download_error(self, error: Union[str, Dict, Exception]) -> bool: """ Check if an error indicates a corrupt or incomplete model download. + ``llama-server failed to start`` is deliberately NOT a signal here: + Lemonade emits it for many non-corruption failures (resource limits, + ctx_size, backend startup, port conflicts), so matching it routed + ordinary load failures into the destructive delete + re-download path. + Args: error: Error as string, dict, or exception @@ -1235,17 +1249,7 @@ def _is_corrupt_download_error(self, error: Union[str, Dict, Exception]) -> bool error_info = self._extract_error_info(error) error_message = (error_info.get("message") or "").lower() - return any( - phrase in error_message - for phrase in [ - "download validation failed", - "files are incomplete", - "files are missing", - "incomplete or missing", - "corrupted download", - "llama-server failed to start", # Often indicates corrupt model files - ] - ) + return any(phrase in error_message for phrase in self._CORRUPT_DOWNLOAD_PHRASES) def _execute_with_auto_download( self, api_call: Callable, model: str, auto_download: bool = True diff --git a/tests/unit/test_lemonade_error_classification.py b/tests/unit/test_lemonade_error_classification.py index 365c847f3..3b4354964 100644 --- a/tests/unit/test_lemonade_error_classification.py +++ b/tests/unit/test_lemonade_error_classification.py @@ -284,3 +284,217 @@ def test_agent_extract_lemonade_user_message_typed_in_cycle() -> None: msg = agent._extract_lemonade_user_message(wrapper) assert msg is not None assert "didn't respond in time" in msg + + +# ── #1294: corrupt-download classification must not over-match ─────────── +# +# ``LemonadeClient._is_corrupt_download_error`` used to treat the GENERIC +# string ``"llama-server failed to start"`` as proof of a corrupt/incomplete +# download. Lemonade raises that string for many non-corruption failures +# (resource limits, ctx_size issues, GPU/backend startup, port conflicts). +# Misclassifying routed an ordinary load failure into the delete + +# re-download path (the default model is ~25GB) and dead-ended first-boot. +# +# The fix: keep the five SPECIFIC corruption phrases unconditional, but only +# treat ``llama-server failed to start`` as corruption when one of those +# specific phrases is ALSO present (corroboration). A bare load failure must +# classify as NOT corrupt and surface as an actionable error. + +from unittest.mock import patch + +import pytest + +from gaia.llm.lemonade_client import ( + LemonadeClient, + LemonadeClientError, + ModelDownloadCancelledError, +) + +# The five legitimate corruption signals — each must keep returning True. +_SPECIFIC_CORRUPTION_PHRASES = [ + "download validation failed", + "files are incomplete", + "files are missing", + "incomplete or missing", + "corrupted download", +] + + +def _client() -> LemonadeClient: + """Construct a client without touching the network. + + ``__init__`` only parses host/port and resolves the API key from env; + it issues no HTTP, so this is safe in a unit test. + """ + return LemonadeClient(host="localhost", port=13305) + + +@pytest.mark.parametrize("phrase", _SPECIFIC_CORRUPTION_PHRASES) +def test_specific_corruption_phrase_is_corrupt(phrase: str) -> None: + """Each of the five specific corruption signals classifies as corrupt (no regression).""" + client = _client() + message = f"Failed to load model: {phrase} for Qwen3-Coder-30B-GGUF" + assert client._is_corrupt_download_error(message) is True + + +@pytest.mark.parametrize("phrase", _SPECIFIC_CORRUPTION_PHRASES) +def test_specific_corruption_phrase_is_case_insensitive(phrase: str) -> None: + """Classification is case-insensitive (Lemonade casing is not guaranteed).""" + client = _client() + assert client._is_corrupt_download_error(phrase.upper()) is True + + +def test_bare_llama_server_failed_is_not_corrupt() -> None: + """A bare ``llama-server failed to start`` is NOT corruption (the #1294 bug).""" + client = _client() + message = ( + "Failed to load model Qwen3-Coder-30B-A3B-Instruct-GGUF: " + "llama-server failed to start" + ) + assert client._is_corrupt_download_error(message) is False + + +def test_real_world_model_load_error_payload_is_not_corrupt() -> None: + """The exact structured payload from the bug report classifies as NOT corrupt. + + ``code``/``type`` is ``model_load_error`` — a generic load failure, not a + corruption signal — so it must not enter the delete + re-download path. + """ + client = _client() + payload = { + "error": { + "code": "model_load_error", + "type": "model_load_error", + "message": ( + "Failed to load model Qwen3-Coder-30B-A3B-Instruct-GGUF: " + "llama-server failed to start" + ), + } + } + assert client._is_corrupt_download_error(payload) is False + + +@pytest.mark.parametrize("phrase", _SPECIFIC_CORRUPTION_PHRASES) +def test_llama_server_failed_with_corruption_phrase_is_corrupt(phrase: str) -> None: + """``llama-server failed to start`` PLUS a specific corruption phrase IS corrupt. + + When Lemonade corroborates the startup failure with a real corruption + signal, the repair path is the correct response. + """ + client = _client() + message = f"llama-server failed to start: {phrase}" + assert client._is_corrupt_download_error(message) is True + + +def test_unrelated_load_failure_is_not_corrupt() -> None: + """An unrelated load failure (e.g. ctx/resource) is not corruption.""" + client = _client() + message = "llama-server failed to start: out of memory allocating KV cache" + assert client._is_corrupt_download_error(message) is False + + +class TestLoadModelCorruptRouting: + """`load_model` must route bare load failures away from the repair path.""" + + def test_bare_load_failure_does_not_redownload_and_raises_actionable(self) -> None: + """Issue #1294 AC#3: a bare ``llama-server failed to start`` load failure + raises an actionable ``LemonadeClientError`` WITHOUT deleting or + re-downloading the model. + """ + client = _client() + + send_error = LemonadeClientError( + "Failed to load model Qwen3-Coder-30B-A3B-Instruct-GGUF: " + "llama-server failed to start" + ) + + with ( + patch.object(client, "_send_request", side_effect=send_error), + patch.object(client, "delete_model") as mock_delete, + patch.object(client, "pull_model_stream") as mock_pull, + ): + with pytest.raises(LemonadeClientError) as exc_info: + client.load_model( + "Qwen3-Coder-30B-A3B-Instruct-GGUF", auto_download=True + ) + + # The destructive repair path must NOT have been touched. + mock_delete.assert_not_called() + mock_pull.assert_not_called() + + # Error must name what failed so the user can act on it. + assert "Qwen3-Coder-30B-A3B-Instruct-GGUF" in str(exc_info.value) + assert "llama-server failed to start" in str(exc_info.value) + + def test_specific_corruption_enters_repair_path(self) -> None: + """Issue #1294 AC#4: a SPECIFIC corruption error DOES enter the repair + flow (resume via ``pull_model_stream``) and reloads on success. + """ + client = _client() + + corrupt_error = LemonadeClientError( + "Failed to load model Qwen3-Coder-30B-A3B-Instruct-GGUF: " + "download validation failed - files are incomplete" + ) + + # First _send_request (initial load) raises corruption; the second + # (post-resume reload) succeeds. + send_results = [corrupt_error, {"status": "loaded"}] + + def fake_send(*_args, **_kwargs): + result = send_results.pop(0) + if isinstance(result, Exception): + raise result + return result + + # Resume succeeds in one streamed "complete" event. + def fake_pull(*_args, **_kwargs): + yield {"event": "complete"} + + with ( + patch.object(client, "_send_request", side_effect=fake_send), + patch.object(client, "delete_model") as mock_delete, + patch.object( + client, "pull_model_stream", side_effect=fake_pull + ) as mock_pull, + patch( + "gaia.llm.lemonade_client._prompt_user_for_repair", return_value=True + ), + ): + response = client.load_model( + "Qwen3-Coder-30B-A3B-Instruct-GGUF", auto_download=False + ) + + # Repair path ran: resume (pull) was attempted; reload succeeded. + mock_pull.assert_called_once() + assert response == {"status": "loaded"} + # Resume succeeded, so we never escalated to delete. + mock_delete.assert_not_called() + + def test_user_declining_repair_raises_cancelled(self) -> None: + """When corruption is detected but the user declines repair, we raise + ``ModelDownloadCancelledError`` — we must NOT silently fall through to + a non-corrupt re-raise or proceed to delete/re-download. + """ + client = _client() + + corrupt_error = LemonadeClientError( + "Failed to load model Qwen3-Coder-30B-A3B-Instruct-GGUF: " + "corrupted download detected" + ) + + with ( + patch.object(client, "_send_request", side_effect=corrupt_error), + patch.object(client, "delete_model") as mock_delete, + patch.object(client, "pull_model_stream") as mock_pull, + patch( + "gaia.llm.lemonade_client._prompt_user_for_repair", return_value=False + ), + ): + with pytest.raises(ModelDownloadCancelledError): + client.load_model( + "Qwen3-Coder-30B-A3B-Instruct-GGUF", auto_download=False + ) + + mock_delete.assert_not_called() + mock_pull.assert_not_called() From 44493bec2bcdcfc19b7f190d92249773e51867cf Mon Sep 17 00:00:00 2001 From: Tomasz Iniewicz Date: Fri, 29 May 2026 14:44:26 -0400 Subject: [PATCH 2/5] style: apply black formatting to code quality issues introduced in #1188 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These two files were committed to main with formatting that does not satisfy Black — surfaced by the CI merge-commit check on this PR. No logic changes. --- src/gaia/agents/code/tools/file_io.py | 32 +++++++-------------------- tests/test_file_io_guardrails.py | 16 ++++---------- 2 files changed, 12 insertions(+), 36 deletions(-) diff --git a/src/gaia/agents/code/tools/file_io.py b/src/gaia/agents/code/tools/file_io.py index c1408fe79..f5f60dc0d 100644 --- a/src/gaia/agents/code/tools/file_io.py +++ b/src/gaia/agents/code/tools/file_io.py @@ -269,9 +269,7 @@ def write_python_file( except Exception as e: path_validator = getattr(self, "path_validator", None) if path_validator is not None: - path_validator.audit_write( - "write", file_path, 0, "error", str(e) - ) + path_validator.audit_write("write", file_path, 0, "error", str(e)) return {"status": "error", "error": str(e)} @tool @@ -302,9 +300,7 @@ def edit_python_file( path_validator = getattr(self, "path_validator", None) if path_validator is not None: # Check blocklist - is_blocked, reason = path_validator.is_write_blocked( - str(file_path) - ) + is_blocked, reason = path_validator.is_write_blocked(str(file_path)) if is_blocked: path_validator.audit_write( "edit", str(file_path), 0, "denied", reason @@ -313,9 +309,7 @@ def edit_python_file( # Check allowlist if not path_validator.is_path_allowed(str(file_path)): - reason = ( - f"Access denied: {file_path} is not in allowed paths" - ) + reason = f"Access denied: {file_path} is not in allowed paths" path_validator.audit_write( "edit", str(file_path), 0, "denied", reason ) @@ -428,9 +422,7 @@ def edit_python_file( except Exception as e: path_validator = getattr(self, "path_validator", None) if path_validator is not None: - path_validator.audit_write( - "edit", file_path, 0, "error", str(e) - ) + path_validator.audit_write("edit", file_path, 0, "error", str(e)) return {"status": "error", "error": str(e)} @tool @@ -642,9 +634,7 @@ def write_markdown_file( except Exception as e: path_validator = getattr(self, "path_validator", None) if path_validator is not None: - path_validator.audit_write( - "write", file_path, 0, "error", str(e) - ) + path_validator.audit_write("write", file_path, 0, "error", str(e)) return {"status": "error", "error": str(e)} @tool @@ -1000,9 +990,7 @@ def replace_function( path_validator = getattr(self, "path_validator", None) if path_validator is not None: # Check blocklist - is_blocked, reason = path_validator.is_write_blocked( - str(file_path) - ) + is_blocked, reason = path_validator.is_write_blocked(str(file_path)) if is_blocked: path_validator.audit_write( "edit", str(file_path), 0, "denied", reason @@ -1011,9 +999,7 @@ def replace_function( # Check allowlist if not path_validator.is_path_allowed(str(file_path)): - reason = ( - f"Access denied: {file_path} is not in allowed paths" - ) + reason = f"Access denied: {file_path} is not in allowed paths" path_validator.audit_write( "edit", str(file_path), 0, "denied", reason ) @@ -1149,9 +1135,7 @@ def replace_function( except Exception as e: path_validator = getattr(self, "path_validator", None) if path_validator is not None: - path_validator.audit_write( - "edit", file_path, 0, "error", str(e) - ) + path_validator.audit_write("edit", file_path, 0, "error", str(e)) return {"status": "error", "error": str(e)} # Return the list of registered tools for tracking diff --git a/tests/test_file_io_guardrails.py b/tests/test_file_io_guardrails.py index 9eae59117..060df670a 100644 --- a/tests/test_file_io_guardrails.py +++ b/tests/test_file_io_guardrails.py @@ -89,9 +89,7 @@ def test_audits_successful_write(self): self.assertEqual(result["status"], "success") # Should have audited the success audit_calls = [ - c - for c in self.mock_pv.audit_write.call_args_list - if c[0][3] == "success" + c for c in self.mock_pv.audit_write.call_args_list if c[0][3] == "success" ] self.assertGreater(len(audit_calls), 0) @@ -195,9 +193,7 @@ def test_audits_successful_edit(self): self.assertEqual(result["status"], "success") audit_calls = [ - c - for c in self.mock_pv.audit_write.call_args_list - if c[0][3] == "success" + c for c in self.mock_pv.audit_write.call_args_list if c[0][3] == "success" ] self.assertGreater(len(audit_calls), 0) @@ -246,9 +242,7 @@ def test_audits_successful_write(self): self.assertEqual(result["status"], "success") audit_calls = [ - c - for c in self.mock_pv.audit_write.call_args_list - if c[0][3] == "success" + c for c in self.mock_pv.audit_write.call_args_list if c[0][3] == "success" ] self.assertGreater(len(audit_calls), 0) @@ -352,9 +346,7 @@ def test_audits_successful_replacement(self): self.assertEqual(result["status"], "success") audit_calls = [ - c - for c in self.mock_pv.audit_write.call_args_list - if c[0][3] == "success" + c for c in self.mock_pv.audit_write.call_args_list if c[0][3] == "success" ] self.assertGreater(len(audit_calls), 0) From 6bb3ce37cd3393627fe7fd68f4e7a5ce3ccd788e Mon Sep 17 00:00:00 2001 From: Tomasz Iniewicz Date: Fri, 29 May 2026 14:50:11 -0400 Subject: [PATCH 3/5] fix(tests): update backup assertions to use result path from security validator Commit 905036c5 introduced a timestamped backup naming convention via the security path validator, but two assertions in test_code_agent.py still expected the old hardcoded .bak suffix. Use result["backup_path"] instead. --- tests/test_code_agent.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_code_agent.py b/tests/test_code_agent.py index 5ecbf135e..5a2c3f321 100644 --- a/tests/test_code_agent.py +++ b/tests/test_code_agent.py @@ -407,8 +407,8 @@ def goodbye(): self.assertIn("Hello, Python!", modified) self.assertNotIn("Hello, World!", modified) - # Verify backup - self.assertTrue(os.path.exists(test_file + ".bak")) + # Verify backup (path returned in result; naming format depends on security validator) + self.assertTrue(os.path.exists(result["backup_path"])) def test_edit_dry_run(self): """Test dry run mode for editing.""" @@ -663,7 +663,7 @@ def test_full_workflow_real_files(self): backup=True, ) self.assertEqual(edit_result["status"], "success") - self.assertTrue(os.path.exists(test_file + ".bak")) + self.assertTrue(os.path.exists(edit_result["backup_path"])) # Step 5: Validate the edited file final_content = Path(test_file).read_text() From 1a213adee886e35fd4957c51786ee9cd1ddee723 Mon Sep 17 00:00:00 2001 From: Tomasz Iniewicz Date: Fri, 29 May 2026 15:02:29 -0400 Subject: [PATCH 4/5] fix(lemonade): auto-heal corrupt model on non-interactive boot On a fresh Agent UI install, first boot loaded the default model from the FastAPI lifespan threadpool (no TTY) with prompt=False. The corrupt-download repair path ignored that flag and fired interactive [y/N] prompts: input() raised EOFError (or hung on an idle pipe), dead-ending boot-init. Two fixes in load_model's corrupt-download branch: - _prompt_user_for_delete now has the same stdin/stdout isatty guard as its siblings _prompt_user_for_download / _prompt_user_for_repair, returning the proceed-default under a non-interactive environment instead of calling input(). - The repair/delete branch honors the prompt argument: with prompt=False it skips both prompts and auto-proceeds through the bounded recovery (resume, then a single delete + re-download). Recovery progress is logged at INFO (percent from the pull stream) so the backend log the UI tails shows movement and boot does not look frozen; the corrupt-detected detail logs at DEBUG. If recovery still fails after the one delete + re-download, it raises a single actionable LemonadeClientError naming the recovery affordance (UI Force-redownload / manual delete) and the Lemonade server.log -- no EOFError, no hang, no silent swallow. A real TTY with prompt=True still prompts as before. --- src/gaia/llm/lemonade_client.py | 108 ++++++++--- tests/unit/test_lemonade_model_loading.py | 222 +++++++++++++++++++++- 2 files changed, 301 insertions(+), 29 deletions(-) diff --git a/src/gaia/llm/lemonade_client.py b/src/gaia/llm/lemonade_client.py index 62be02933..40aa394cc 100644 --- a/src/gaia/llm/lemonade_client.py +++ b/src/gaia/llm/lemonade_client.py @@ -609,6 +609,14 @@ def _prompt_user_for_delete(model_name: str) -> bool: Returns: True if user confirms, False if user declines """ + # Check if we're in an interactive terminal — mirror the guard on + # _prompt_user_for_download / _prompt_user_for_repair. Without it this + # would call input() in a non-interactive backend (FastAPI lifespan + # threadpool, no TTY) and raise EOFError, dead-ending first boot (#1293). + if not sys.stdin.isatty() or not sys.stdout.isatty(): + # Non-interactive environment - auto-proceed with the recovery. + return True + # Get model storage paths if sys.platform == "win32": lemonade_cache = os.path.expandvars("%LOCALAPPDATA%\\lemonade\\") @@ -2718,6 +2726,46 @@ def _ensure_model_loaded(self, model: str, auto_download: bool = True) -> None: # Log but don't fail - let the actual request fail with proper error self.log.debug(f"Could not pre-check model status: {e}") + def _consume_pull_stream(self, model_name: str, phase: str) -> bool: + """Drive ``pull_model_stream`` to completion, logging progress at INFO. + + Used by the corrupt-download auto-heal path so a non-interactive boot + (whose log the UI tails) shows download movement instead of looking + frozen. ``phase`` is a short label like "resume" or "fresh download". + + Returns: + True if the stream reported completion. + + Raises: + LemonadeClientError: if the stream emits an ``error`` event. + """ + download_complete = False + last_logged_percent = -10 # Log at 0%, 10%, 20%, ... + for event in self.pull_model_stream(model_name=model_name): + event_type = event.get("event") + if event_type == "progress": + percent = event.get("percent", 0) + if percent >= last_logged_percent + 10: + bytes_dl = event.get("bytes_downloaded", 0) + bytes_total = event.get("bytes_total", 0) + if bytes_total > 0: + gb_dl = bytes_dl / (1024**3) + gb_total = bytes_total / (1024**3) + self.log.info( + f" {_emoji('📥', '[PROGRESS]')} {phase}: " + f"{percent}% ({gb_dl:.1f}/{gb_total:.1f} GB)" + ) + else: + self.log.info( + f" {_emoji('📥', '[PROGRESS]')} {phase}: {percent}%" + ) + last_logged_percent = percent + elif event_type == "complete": + download_complete = True + elif event_type == "error": + raise LemonadeClientError(event.get("error", "Unknown")) + return download_complete + def load_model( self, model_name: str, @@ -2787,29 +2835,29 @@ def load_model( f"{_emoji('⚠️', '[INCOMPLETE]')} Model '{model_name}' has incomplete " f"or corrupted files" ) + self.log.debug( + f"Corrupt-download classified from load error: {original_error}. " + f"Repairing (resume, then one delete + re-download if needed)." + ) - # Prompt user for confirmation to resume download - if not _prompt_user_for_repair(model_name): + # Honor `prompt`: a non-interactive caller (boot init in the + # FastAPI lifespan threadpool) passes prompt=False — never call + # input() there. Auto-proceed through the bounded recovery + # instead of dead-ending on EOFError (#1293). + if prompt and not _prompt_user_for_repair(model_name): raise ModelDownloadCancelledError( f"User declined to repair incomplete model: {model_name}" ) # Try to resume download first (Lemonade handles partial files) self.log.info( - f"{_emoji('📥', '[RESUME]')} Attempting to resume download..." + f"{_emoji('📥', '[RESUME]')} Resuming download to repair " + f"'{model_name}'..." ) try: # First attempt: resume download - download_complete = False - for event in self.pull_model_stream(model_name=model_name): - event_type = event.get("event") - if event_type == "complete": - download_complete = True - elif event_type == "error": - raise LemonadeClientError(event.get("error", "Unknown")) - - if download_complete: + if self._consume_pull_stream(model_name, "resume"): # Retry loading response = self._send_request( "post", url, request_data, timeout=timeout @@ -2825,32 +2873,26 @@ def load_model( f"{_emoji('⚠️', '[RETRY]')} Resume failed: {resume_error}" ) - # Prompt user before deleting - if not _prompt_user_for_delete(model_name): + # Honor `prompt` before the destructive delete too. + if prompt and not _prompt_user_for_delete(model_name): raise LemonadeClientError( f"Resume download failed for '{model_name}'. " f"You can manually delete the model and try again." ) - # Second attempt: delete and re-download from scratch + # Second (and final) attempt: delete and re-download from + # scratch. Bounded to ONE delete + re-download — no loops. try: self.log.info( - f"{_emoji('🗑️', '[DELETE]')} Deleting corrupt model..." + f"{_emoji('🗑️', '[DELETE]')} Resume failed; deleting " + f"corrupt '{model_name}' and re-downloading once..." ) self.delete_model(model_name) self.log.info( f"{_emoji('📥', '[FRESH]')} Starting fresh download..." ) - download_complete = False - for event in self.pull_model_stream(model_name=model_name): - event_type = event.get("event") - if event_type == "complete": - download_complete = True - elif event_type == "error": - raise LemonadeClientError(event.get("error", "Unknown")) - - if download_complete: + if self._consume_pull_stream(model_name, "fresh download"): # Retry loading response = self._send_request( "post", url, request_data, timeout=timeout @@ -2861,14 +2903,24 @@ def load_model( self.model = model_name return response + # Stream ended without a completion event — treat the + # bounded recovery as exhausted (fall through to raise). + raise LemonadeClientError( + f"Fresh download did not complete for '{model_name}'" + ) + except Exception as fresh_error: self.log.error( f"{_emoji('❌', '[FAIL]')} Fresh download also failed: {fresh_error}" ) raise LemonadeClientError( - f"Failed to repair model '{model_name}' after both resume and fresh download attempts. " - f"Please check your network connection and disk space, then try again." - ) + f"Failed to repair model '{model_name}' after resume and one " + f"delete + re-download attempt ({fresh_error}). " + f"Try the Force-redownload action in the Agent UI, or manually " + f"delete the model and re-run. " + f"Check the Lemonade server log for details " + f"(typical path: ~/.cache/lemonade/server.log)." + ) from fresh_error # Check if this is a "model not found" error and auto_download is enabled if not (auto_download and self._is_model_error(e)): diff --git a/tests/unit/test_lemonade_model_loading.py b/tests/unit/test_lemonade_model_loading.py index efc85bcfa..3ece8bad3 100644 --- a/tests/unit/test_lemonade_model_loading.py +++ b/tests/unit/test_lemonade_model_loading.py @@ -2,9 +2,17 @@ # SPDX-License-Identifier: MIT """Unit tests for LemonadeClient model loading functionality.""" +import sys from unittest.mock import MagicMock, Mock, patch -from gaia.llm.lemonade_client import LemonadeClient, LemonadeStatus +import pytest + +from gaia.llm.lemonade_client import ( + LemonadeClient, + LemonadeClientError, + LemonadeStatus, + _prompt_user_for_delete, +) class TestEnsureModelLoaded: @@ -335,3 +343,215 @@ def test_model_not_loaded_when_already_present( # Verify load_model was NOT called (model already loaded) mock_load.assert_not_called() + + +# A message that ``_is_corrupt_download_error`` classifies as corrupt +# (see ``_CORRUPT_DOWNLOAD_PHRASES`` in lemonade_client.py). +_CORRUPT_ERROR_MESSAGE = "download validation failed: files are incomplete" + + +class TestPromptUserForDeleteNonInteractive: + """``_prompt_user_for_delete`` must not call ``input()`` without a TTY. + + Its siblings ``_prompt_user_for_download`` and ``_prompt_user_for_repair`` + already guard on ``sys.stdin.isatty()/sys.stdout.isatty()`` and return the + proceed-default in a non-interactive environment. ``_prompt_user_for_delete`` + lacks that guard, so on the FastAPI lifespan threadpool (no TTY) it calls + ``input()`` and raises ``EOFError`` — dead-ending first boot (#1293). + """ + + def test_delete_prompt_returns_proceed_default_without_tty(self): + """No TTY -> auto-proceed (return True), never call input().""" + with ( + patch.object(sys.stdin, "isatty", return_value=False), + patch.object(sys.stdout, "isatty", return_value=False), + patch("builtins.input") as mock_input, + ): + result = _prompt_user_for_delete("Qwen3-0.6B-GGUF") + + assert result is True + mock_input.assert_not_called() + + def test_delete_prompt_does_not_raise_eoferror_without_tty(self): + """Reproduce the #1293 dead-end: an idle/closed stdin makes ``input()`` + raise ``EOFError``. With the isatty guard in place we must never reach + ``input()``, so no ``EOFError`` escapes. + """ + with ( + patch.object(sys.stdin, "isatty", return_value=False), + patch.object(sys.stdout, "isatty", return_value=False), + patch("builtins.input", side_effect=EOFError), + ): + # Must NOT raise — the guard short-circuits before input(). + result = _prompt_user_for_delete("Qwen3-0.6B-GGUF") + + assert result is True + + +def _make_client(): + return LemonadeClient(host="localhost", port=13305) + + +class TestLoadModelCorruptNonInteractive: + """``load_model(prompt=False)`` must auto-heal a corrupt model without any + interactive prompt, bounded to a single delete+redownload (#1293). + """ + + @patch("gaia.llm.lemonade_client._prompt_user_for_delete") + @patch("gaia.llm.lemonade_client._prompt_user_for_repair") + @patch.object(LemonadeClient, "pull_model_stream") + @patch.object(LemonadeClient, "_send_request") + def test_prompt_false_never_calls_repair_or_delete_prompt( + self, mock_send, mock_pull, mock_repair, mock_delete + ): + """With ``prompt=False`` neither prompt helper may be invoked, even + though the load failure is corrupt-classified. Resume succeeds here. + """ + # First load fails corrupt; resume download completes; reload OK. + mock_send.side_effect = [ + Exception(_CORRUPT_ERROR_MESSAGE), + {"status": "loaded"}, + ] + mock_pull.return_value = iter([{"event": "complete"}]) + + client = _make_client() + result = client.load_model("Qwen3-0.6B-GGUF", prompt=False) + + assert result == {"status": "loaded"} + mock_repair.assert_not_called() + mock_delete.assert_not_called() + + @patch("gaia.llm.lemonade_client._prompt_user_for_delete") + @patch("gaia.llm.lemonade_client._prompt_user_for_repair") + @patch.object(LemonadeClient, "delete_model") + @patch.object(LemonadeClient, "pull_model_stream") + @patch.object(LemonadeClient, "_send_request") + def test_resume_failure_triggers_single_delete_and_redownload( + self, mock_send, mock_pull, mock_delete_model, mock_repair, mock_delete_prompt + ): + """Resume fails -> exactly ONE delete + ONE fresh re-download, then a + successful reload. No prompts, bounded recovery. + """ + # send_request: initial load fails corrupt, then final reload succeeds. + mock_send.side_effect = [ + Exception(_CORRUPT_ERROR_MESSAGE), + {"status": "loaded"}, + ] + # pull_model_stream: first call (resume) errors, second (fresh) completes. + mock_pull.side_effect = [ + iter([{"event": "error", "error": "resume broke"}]), + iter([{"event": "progress", "percent": 50}, {"event": "complete"}]), + ] + + client = _make_client() + result = client.load_model("Qwen3-0.6B-GGUF", prompt=False) + + assert result == {"status": "loaded"} + mock_repair.assert_not_called() + mock_delete_prompt.assert_not_called() + # Bounded: exactly one delete, exactly two pull attempts (resume + fresh). + assert mock_delete_model.call_count == 1 + assert mock_pull.call_count == 2 + + @patch("gaia.llm.lemonade_client._prompt_user_for_delete") + @patch("gaia.llm.lemonade_client._prompt_user_for_repair") + @patch.object(LemonadeClient, "delete_model") + @patch.object(LemonadeClient, "pull_model_stream") + @patch.object(LemonadeClient, "_send_request") + def test_unrecoverable_raises_actionable_error_no_eoferror( + self, mock_send, mock_pull, mock_delete_model, mock_repair, mock_delete_prompt + ): + """Both resume and the single fresh re-download fail -> a single loud, + actionable ``LemonadeClientError`` naming the recovery action and the + Lemonade server log. No ``EOFError``, no hang, no silent swallow. + """ + # Initial load fails corrupt; no successful reload ever happens. + mock_send.side_effect = Exception(_CORRUPT_ERROR_MESSAGE) + # Both pull attempts error out. + mock_pull.side_effect = [ + iter([{"event": "error", "error": "resume broke"}]), + iter([{"event": "error", "error": "fresh broke"}]), + ] + + client = _make_client() + with pytest.raises(LemonadeClientError) as exc_info: + client.load_model("Qwen3-0.6B-GGUF", prompt=False) + + message = str(exc_info.value) + # Actionable: names a recovery affordance and where to look. + assert "Qwen3-0.6B-GGUF" in message + assert "redownload" in message.lower() or "re-download" in message.lower() + assert "server.log" in message.lower() or "server log" in message.lower() + # No prompts; bounded recovery (single delete, two pulls). + mock_repair.assert_not_called() + mock_delete_prompt.assert_not_called() + assert mock_delete_model.call_count == 1 + assert mock_pull.call_count == 2 + + @patch("gaia.llm.lemonade_client._prompt_user_for_delete") + @patch("gaia.llm.lemonade_client._prompt_user_for_repair") + @patch.object(LemonadeClient, "pull_model_stream") + @patch.object(LemonadeClient, "_send_request") + def test_recovery_logs_progress_at_info( + self, mock_send, mock_pull, mock_repair, mock_delete, caplog + ): + """Auto-heal must emit INFO progress from the pull stream so the boot + log (tailed by the UI) shows movement and doesn't look frozen. + """ + mock_send.side_effect = [ + Exception(_CORRUPT_ERROR_MESSAGE), + {"status": "loaded"}, + ] + mock_pull.return_value = iter( + [ + { + "event": "progress", + "percent": 40, + "bytes_downloaded": 4 * 1024**3, + "bytes_total": 10 * 1024**3, + }, + {"event": "complete"}, + ] + ) + + client = _make_client() + import logging + + with caplog.at_level(logging.INFO, logger=client.log.name): + client.load_model("Qwen3-0.6B-GGUF", prompt=False) + + info_text = " ".join( + r.getMessage() for r in caplog.records if r.levelno >= logging.INFO + ) + assert "40" in info_text + + +class TestLoadModelCorruptInteractive: + """A real TTY (``prompt=True``) must still prompt as before (#1293 must not + weaken the interactive path). + """ + + @patch("gaia.llm.lemonade_client._prompt_user_for_repair", return_value=True) + @patch.object(LemonadeClient, "pull_model_stream") + @patch.object(LemonadeClient, "_send_request") + def test_prompt_true_with_tty_reaches_repair_prompt( + self, mock_send, mock_pull, mock_repair + ): + """prompt=True + simulated TTY -> the repair prompt is reached. The + helper is patched so the test never blocks on real stdin. + """ + mock_send.side_effect = [ + Exception(_CORRUPT_ERROR_MESSAGE), + {"status": "loaded"}, + ] + mock_pull.return_value = iter([{"event": "complete"}]) + + client = _make_client() + with ( + patch.object(sys.stdin, "isatty", return_value=True), + patch.object(sys.stdout, "isatty", return_value=True), + ): + result = client.load_model("Qwen3-0.6B-GGUF", prompt=True) + + assert result == {"status": "loaded"} + mock_repair.assert_called_once() From ac19af1977c32f101e70e441d83fc88fbe58e40b Mon Sep 17 00:00:00 2001 From: Tomasz Iniewicz Date: Fri, 29 May 2026 15:02:34 -0400 Subject: [PATCH 5/5] docs(plans): add issue-1293 non-interactive boot auto-heal plan --- .claude/plans/issue-1293.md | 66 +++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 .claude/plans/issue-1293.md diff --git a/.claude/plans/issue-1293.md b/.claude/plans/issue-1293.md new file mode 100644 index 000000000..c41403d50 --- /dev/null +++ b/.claude/plans/issue-1293.md @@ -0,0 +1,66 @@ +--- +type: plan +source-issue: 1293 +repo: amd/gaia +title: "Agent UI first-boot fails: interactive Delete/re-download prompt dead-ends non-interactive backend" +created: 2026-05-29 +status: in-progress +work_type: code-feature +complexity: standard +tdd_required: true +suggested_team_size: 2 +estimated_files_changed: 2 +test_command: "PYTHONPATH=\"$PWD/src\" /Users/tomasz/src/amd/gaia/.venv/bin/python -m pytest tests/unit/test_lemonade_model_loading.py -xvs" +build_command: "" +lint_command: "/Users/tomasz/src/amd/gaia/.venv/bin/python util/lint.py --black --isort" +branch: tmi/issue-1293-noninteractive-boot +reflection_iterations: 0 +agents_used: [planning, execution, validation] +--- + +# Issue #1293 — Non-interactive boot auto-heal + +Stacked on `tmi/issue-1294-corrupt-classification` (parent commit `cccc34ff`). #1294 +already narrowed `_is_corrupt_download_error` (removed the bare "llama-server failed to +start" signal); this branch does NOT touch that function or its tests. + +## Files +- `src/gaia/llm/lemonade_client.py` — `_prompt_user_for_delete` guard + `load_model` + corrupt-download branch honoring `prompt`. +- `tests/unit/test_lemonade_model_loading.py` — new TDD coverage (NOT the + error_classification file). + +## Bug +On a fresh Agent UI install the corrupt-download repair path in `load_model` fires +interactive `[y/N]` prompts inside the FastAPI lifespan threadpool (no TTY). `input()` +raises `EOFError` / hangs → boot-init job fails. Two mechanisms: +1. `_prompt_user_for_delete` lacks the `sys.stdin.isatty()/sys.stdout.isatty()` guard its + siblings (`_prompt_user_for_download`, `_prompt_user_for_repair`) both have. +2. The corrupt-download branch calls `_prompt_user_for_repair` / `_prompt_user_for_delete` + unconditionally, ignoring the `prompt` argument that boot callers pass as `prompt=False` + (`lemonade_manager._try_preload_with_ctx`, `ui/server.py:_load_model`). + +## Recovery-policy design (UX-first: silent success, loud only when unrecoverable) +- When `prompt=False` OR stdin/stdout not a TTY: NEVER call `input()` in any branch. +- `_prompt_user_for_delete` gets the same non-interactive guard → returns the proceed + default under non-TTY so auto-heal can continue. +- Corrupt-download branch HONORS `prompt`: with `prompt=False`, skip the prompts and + auto-proceed (resume → if that fails, ONE delete+redownload). Bounded to a single + delete+redownload; no loops. +- Surface recovery PROGRESS at INFO (percent from `pull_model_stream` events) so the + backend log (tailed by the UI) shows movement and boot doesn't look frozen. The + corrupt-detected / repairing "why" detail logs at DEBUG. +- Unrecoverable after the single delete+redownload → one loud actionable + `LemonadeClientError` (what failed / what to do — UI Force-redownload or manual recovery / + where to look — Lemonade server.log). No EOFError, no hang, no silent swallow. +- Interactive TTY (`prompt=True` + real TTY) still prompts as today. + +## Acceptance criteria +1. No `load_model` branch calls `input()` when `prompt=False` OR non-TTY. +2. `_prompt_user_for_delete` has the isatty guard like its two siblings. +3. Corrupt-download repair/delete branch honors the `prompt` argument. +4. Non-interactive corrupt model → automatic recovery, bounded to ONE delete+redownload, + no prompt. +5. Recovery surfaces progress (INFO from the pull stream); repair detail at DEBUG. +6. Unrecoverable → a single loud actionable `LemonadeClientError`. No EOFError, no hang. +7. Interactive prompting still works when `prompt=True` and a TTY is present.