diff --git a/MANIFEST.in b/MANIFEST.in
index acc5aaea8..68dbf359e 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -18,6 +18,10 @@ include README.md
 # util/verify_wheel_dist.py asserts the wheel actually contains these files.
 recursive-include src/gaia/apps/webui/dist *.html *.js *.css *.svg *.png *.jpg *.jpeg *.webp *.ico *.webmanifest *.json *.woff *.woff2 *.ttf *.txt
 
+# Voice-test harness served by the audio router at /voice/test. Standalone
+# HTML page (no bundling, no React); ships as a single file.
+recursive-include src/gaia/ui/static *.html
+
 # Backstop deny-list — these patterns must never reach a published wheel even
 # if a developer accidentally checks them in or `npm run build` emits them.
 prune src/gaia/apps/webui/dist/node_modules
diff --git a/setup.py b/setup.py
index f6f5ed50a..46857cff2 100644
--- a/setup.py
+++ b/setup.py
@@ -103,6 +103,11 @@
             "dist/*.txt",
             "dist/assets/*",
         ],
+        # Voice-test harness for the audio router (/voice/test). Standalone
+        # HTML page with the WAV converter + mic recorder, no JS bundle needed.
+        "gaia.ui": [
+            "static/*.html",
+        ],
     },
     install_requires=[
         "openai",
diff --git a/src/gaia/audio/lemonade_audio.py b/src/gaia/audio/lemonade_audio.py
new file mode 100644
index 000000000..f8a85b296
--- /dev/null
+++ b/src/gaia/audio/lemonade_audio.py
@@ -0,0 +1,273 @@
+# Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved.
+# SPDX-License-Identifier: MIT
+"""
+Lemonade-routed audio: speech-to-text and text-to-speech via the OpenAI-
+compatible /v1/audio endpoints exposed by Lemonade Server.
+
+Why this module exists alongside whisper_asr.py and kokoro_tts.py:
+  Those modules import the `whisper` and `kokoro` Python packages and run the
+  models in-process. That means each GAIA process loads its own copy of those
+  models — wasteful when Lemonade is already running for the LLM. Lemonade
+  exposes Whisper and Kokoro as REST endpoints (port 13305 by default), so a
+  single Lemonade instance can serve LLM + STT + TTS to multiple clients.
+
+  This module is the thin HTTP client that GAIA's Agent UI (and downstream
+  consumers like Beacon) use to talk to those endpoints. The original
+  whisper_asr / kokoro_tts modules remain for use cases that need in-process
+  execution (e.g., the `gaia talk` standalone CLI without a running server).
+
+Endpoints:
+  POST /v1/audio/transcriptions   multipart: file=<wav>, model=Whisper-Small
+                                  → {"text": "..."}
+  POST /v1/audio/speech           JSON: {model:"kokoro-v1", input, voice,
+                                          response_format, speed}
+                                  → raw audio bytes (mp3/wav/opus/pcm)
+  WS   /realtime                  streaming STT (OpenAI realtime-compatible);
+                                  not yet wrapped here.
+
+Models auto-download on first request (~30s for Whisper-Small).
+"""
+
+from __future__ import annotations
+import os
+from pathlib import Path
+
+import httpx
+
+
+LEMONADE_URL = os.getenv("LEMONADE_BASE_URL", "http://localhost:13305")
+
+DEFAULT_STT_MODEL = "Whisper-Small"   # English; lighter+faster than -Large
+DEFAULT_TTS_MODEL = "kokoro-v1"        # only TTS model Lemonade exposes today
+DEFAULT_TTS_VOICE = "shimmer"
+
+
+class LemonadeAudioError(RuntimeError):
+    """Raised on Lemonade audio-endpoint failures.
+
+    GAIA's no-silent-fallback policy applies: callers must handle this error
+    explicitly (retry, surface to user, fall back to text input). We do NOT
+    silently fall back to whisper_asr / kokoro_tts — those modules have a
+    different operational contract (in-process model loading) and using them
+    as a fallback would mask real Lemonade misconfiguration.
+    """
+
+
+# ────────────────────────── Speech-to-text ──────────────────────────
+def transcribe(
+    audio_path: str | Path,
+    *,
+    model: str = DEFAULT_STT_MODEL,
+    language: str | None = "en",
+    base_url: str = LEMONADE_URL,
+    timeout: float = 60.0,
+) -> str:
+    """POST a WAV file to Lemonade /v1/audio/transcriptions.
+
+    Args:
+        audio_path: path to a 16kHz mono WAV file (push-to-talk recordings).
+        model: ``Whisper-Tiny`` | ``Whisper-Base`` | ``Whisper-Small`` |
+               ``Whisper-Large`` (or any other Whisper variant Lemonade serves).
+        language: ISO 639-1 code; defaults to ``"en"``. Pass ``None`` to
+                  auto-detect.
+        base_url: Lemonade server URL.
+        timeout: HTTP timeout in seconds.
+
+    Returns:
+        The transcribed text.
+
+    Raises:
+        FileNotFoundError: if ``audio_path`` does not exist.
+        LemonadeAudioError: server unreachable, non-200 status, or malformed
+                            response.
+    """
+    audio_path = Path(audio_path)
+    if not audio_path.exists():
+        raise FileNotFoundError(audio_path)
+
+    files = {"file": (audio_path.name, audio_path.read_bytes(), "audio/wav")}
+    data: dict[str, str] = {"model": model}
+    if language is not None:
+        data["language"] = language
+
+    try:
+        r = httpx.post(
+            f"{base_url}/v1/audio/transcriptions",
+            files=files,
+            data=data,
+            timeout=timeout,
+        )
+    except httpx.RequestError as e:
+        raise LemonadeAudioError(
+            f"Lemonade STT unreachable at {base_url} — start the server with "
+            f"`lemonade-server serve`, or set LEMONADE_BASE_URL. Original: {e}"
+        ) from e
+
+    if r.status_code != 200:
+        raise LemonadeAudioError(
+            f"Lemonade STT returned {r.status_code}: {r.text[:200]}. "
+            f"Common causes: model '{model}' not yet downloaded "
+            f"(first request triggers a ~30s auto-download), or audio not WAV/16kHz mono."
+        )
+
+    body = r.json()
+    if "text" not in body:
+        raise LemonadeAudioError(f"Unexpected STT response shape: {body!r}")
+    # Lemonade returns {"text": null} when no speech was detected; normalize
+    # to empty string so downstream string-handling doesn't NPE.
+    return body["text"] or ""
+
+
+def transcribe_bytes(
+    audio_bytes: bytes,
+    filename: str = "audio.wav",
+    *,
+    model: str = DEFAULT_STT_MODEL,
+    language: str | None = "en",
+    base_url: str = LEMONADE_URL,
+    timeout: float = 60.0,
+) -> str:
+    """Like :func:`transcribe` but takes raw WAV bytes (no temp file needed).
+
+    Useful for FastAPI handlers that accept :class:`UploadFile` and want to
+    forward the bytes directly to Lemonade without disk I/O.
+    """
+    files = {"file": (filename, audio_bytes, "audio/wav")}
+    data: dict[str, str] = {"model": model}
+    if language is not None:
+        data["language"] = language
+
+    try:
+        r = httpx.post(
+            f"{base_url}/v1/audio/transcriptions",
+            files=files,
+            data=data,
+            timeout=timeout,
+        )
+    except httpx.RequestError as e:
+        raise LemonadeAudioError(
+            f"Lemonade STT unreachable at {base_url} — start the server with "
+            f"`lemonade-server serve`, or set LEMONADE_BASE_URL. Original: {e}"
+        ) from e
+
+    if r.status_code != 200:
+        raise LemonadeAudioError(
+            f"Lemonade STT returned {r.status_code}: {r.text[:200]}"
+        )
+    body = r.json()
+    if "text" not in body:
+        raise LemonadeAudioError(f"Unexpected STT response shape: {body!r}")
+    # Lemonade returns {"text": null} when no speech was detected; normalize
+    # to empty string so downstream string-handling doesn't NPE.
+    return body["text"] or ""
+
+
+# ────────────────────────── Text-to-speech ──────────────────────────
+def synthesize(
+    text: str,
+    out_path: str | Path,
+    *,
+    voice: str = DEFAULT_TTS_VOICE,
+    model: str = DEFAULT_TTS_MODEL,
+    response_format: str = "mp3",
+    speed: float = 1.0,
+    base_url: str = LEMONADE_URL,
+    timeout: float = 60.0,
+) -> str:
+    """POST text to Lemonade /v1/audio/speech and write the audio bytes.
+
+    Args:
+        text: text to synthesize. Keep ≤ ~500 chars for low-latency replies.
+        out_path: file path to write the audio bytes to.
+        voice: OpenAI voices (``"alloy"``, ``"shimmer"``, ``"ash"``, …) or
+               Kokoro voices (``"af_sky"``, ``"am_echo"``, …).
+        model: must be ``"kokoro-v1"`` as of Lemonade v9.4.
+        response_format: ``"mp3"`` | ``"wav"`` | ``"opus"`` | ``"pcm"``.
+        speed: 0.25–4.0 (default 1.0).
+        base_url: Lemonade server URL.
+        timeout: HTTP timeout in seconds.
+
+    Returns:
+        Absolute path string of the written file.
+    """
+    out_path = Path(out_path)
+    audio_bytes = synthesize_bytes(
+        text,
+        voice=voice,
+        model=model,
+        response_format=response_format,
+        speed=speed,
+        base_url=base_url,
+        timeout=timeout,
+    )
+    out_path.write_bytes(audio_bytes)
+    return str(out_path)
+
+
+def synthesize_bytes(
+    text: str,
+    *,
+    voice: str = DEFAULT_TTS_VOICE,
+    model: str = DEFAULT_TTS_MODEL,
+    response_format: str = "mp3",
+    speed: float = 1.0,
+    base_url: str = LEMONADE_URL,
+    timeout: float = 60.0,
+) -> bytes:
+    """Like :func:`synthesize` but returns the audio bytes (no file write).
+
+    Useful for FastAPI handlers that stream the audio directly back to the
+    client without touching disk.
+    """
+    payload = {
+        "model": model,
+        "input": text,
+        "voice": voice,
+        "response_format": response_format,
+        "speed": speed,
+    }
+    try:
+        r = httpx.post(
+            f"{base_url}/v1/audio/speech",
+            json=payload,
+            timeout=timeout,
+        )
+    except httpx.RequestError as e:
+        raise LemonadeAudioError(
+            f"Lemonade TTS unreachable at {base_url} — start the server with "
+            f"`lemonade-server serve`, or set LEMONADE_BASE_URL. Original: {e}"
+        ) from e
+
+    if r.status_code != 200:
+        raise LemonadeAudioError(
+            f"Lemonade TTS returned {r.status_code}: {r.text[:200]}"
+        )
+    return r.content
+
+
+# ────────────────────────── Health probe ──────────────────────────
+def lemonade_health(base_url: str = LEMONADE_URL, timeout: float = 5.0) -> dict:
+    """Probe Lemonade health and return the JSON body.
+
+    Lemonade exposes the health endpoint at ``/api/v1/health`` (its native
+    namespace). Some installations also serve it at ``/v1/health`` and ``/health``
+    for compatibility, but ``/api/v1/health`` is what the rest of GAIA uses
+    (see ``gaia.llm.lemonade_client.LemonadeClient.get_health``).
+    """
+    try:
+        r = httpx.get(f"{base_url}/api/v1/health", timeout=timeout)
+    except httpx.RequestError as e:
+        raise LemonadeAudioError(
+            f"Lemonade unreachable at {base_url}. Start it with "
+            f"`lemonade-server serve` or set LEMONADE_BASE_URL. Original: {e}"
+        ) from e
+    if r.status_code != 200:
+        raise LemonadeAudioError(
+            f"Lemonade /api/v1/health returned {r.status_code}: {r.text[:200]}"
+        )
+    try:
+        return r.json()
+    except ValueError as e:
+        raise LemonadeAudioError(
+            f"Lemonade /api/v1/health returned non-JSON: {r.text[:200]}"
+        ) from e
diff --git a/src/gaia/ui/routers/audio.py b/src/gaia/ui/routers/audio.py
new file mode 100644
index 000000000..53fb0f035
--- /dev/null
+++ b/src/gaia/ui/routers/audio.py
@@ -0,0 +1,300 @@
+# Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved.
+# SPDX-License-Identifier: MIT
+"""
+Audio router — exposes STT (/voice/transcribe), TTS (/voice/speech), a
+health probe (/voice/health), and a browser test page (/voice/test) so
+contributors can verify the audio path end-to-end without writing code.
+
+Two backends are supported:
+
+  - ``lemonade`` (default) — routes to :mod:`gaia.audio.lemonade_audio`,
+    which POSTs to Lemonade Server's OpenAI-compatible /v1/audio/* endpoints.
+    Single inference server for LLM + STT + TTS. Required for the
+    ruggedized-Ryzen-AI fielding story. **Currently does not work on macOS**
+    (Lemonade's ``whispercpp`` recipe is Linux/Windows-only as of v10.2).
+
+  - ``in-process`` — falls through to the legacy
+    :class:`gaia.audio.whisper_asr.WhisperAsr` and
+    :class:`gaia.audio.kokoro_tts.KokoroTTS` classes which load the
+    ``openai-whisper`` and ``kokoro`` Python packages locally. Heavier
+    install footprint (torch, CUDA wheels, spaCy) but works on macOS where
+    Lemonade audio doesn't.
+
+Backend is selected at request time via the ``GAIA_VOICE_BACKEND`` env var
+(``lemonade`` or ``in-process``; default ``lemonade``). Both backends are
+shipped together until Lemonade adds macOS support for whispercpp / Kokoro;
+no silent fallback between them — if the selected backend is unreachable
+or its deps are missing, the route returns a clear error.
+"""
+
+from __future__ import annotations
+
+import io
+import logging
+import os
+import tempfile
+from pathlib import Path
+
+from fastapi import APIRouter, File, Form, HTTPException, UploadFile
+from fastapi.responses import HTMLResponse, Response
+from pydantic import BaseModel, Field
+
+from gaia.audio.lemonade_audio import (
+    DEFAULT_STT_MODEL,
+    DEFAULT_TTS_MODEL,
+    DEFAULT_TTS_VOICE,
+    LemonadeAudioError,
+    lemonade_health,
+    synthesize_bytes,
+    transcribe_bytes,
+)
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter(prefix="/voice", tags=["audio"])
+
+
+class SpeechRequest(BaseModel):
+    """Body schema for POST /voice/speech (mirrors Lemonade's contract)."""
+
+    input: str = Field(..., min_length=1, description="Text to synthesize")
+    voice: str = Field(DEFAULT_TTS_VOICE, description="OpenAI or Kokoro voice name")
+    model: str = Field(DEFAULT_TTS_MODEL, description="TTS model (only kokoro-v1 today)")
+    response_format: str = Field("mp3", description="mp3 | wav | opus | pcm")
+    speed: float = Field(1.0, ge=0.25, le=4.0)
+
+
+# ────────────────────────── Backend selector ──────────────────────────
+_LEMONADE = "lemonade"
+_IN_PROCESS = "in-process"
+
+
+def _backend() -> str:
+    """Resolve the requested voice backend. Default ``lemonade``."""
+    val = os.getenv("GAIA_VOICE_BACKEND", _LEMONADE).lower()
+    if val not in (_LEMONADE, _IN_PROCESS):
+        logger.warning(
+            "Unknown GAIA_VOICE_BACKEND=%r; falling back to %r", val, _LEMONADE
+        )
+        return _LEMONADE
+    return val
+
+
+# Map Lemonade STT model names → openai-whisper-package names. Used when
+# the in-process backend is selected.
+_WHISPER_MODEL_TO_PACKAGE = {
+    "Whisper-Tiny":  "tiny",
+    "Whisper-Base":  "base",
+    "Whisper-Small": "small",
+    "Whisper-Large": "large",
+}
+
+
+def _to_whisper_package_name(name: str) -> str:
+    """Lemonade name → in-process whisper-package name. Pass-through if already short."""
+    return _WHISPER_MODEL_TO_PACKAGE.get(name, name.lower())
+
+
+# ────────────────────────── /voice/transcribe (STT) ──────────────────────────
+@router.post("/transcribe")
+async def voice_transcribe(
+    audio: UploadFile = File(..., description="Audio file (WAV preferred, 16kHz mono)"),
+    model: str = Form(DEFAULT_STT_MODEL),
+    language: str | None = Form("en"),
+):
+    """Transcribe an uploaded audio clip via the configured backend.
+
+    Returns ``{"text": "<transcript>", "model": "<used-model>", "backend": "<lemonade|in-process>"}``.
+    """
+    audio_bytes = await audio.read()
+    if not audio_bytes:
+        raise HTTPException(status_code=400, detail="empty audio upload")
+
+    backend = _backend()
+
+    if backend == _IN_PROCESS:
+        # WhisperAsr loads the openai-whisper model in-process. Heavier on
+        # cold start (~3-10s for the model load on first call), but works
+        # on macOS where Lemonade's whispercpp recipe currently does not.
+        try:
+            from gaia.audio.whisper_asr import WhisperAsr
+        except ImportError as e:
+            raise HTTPException(
+                status_code=503,
+                detail=(
+                    "in-process voice backend missing deps — install with "
+                    '`uv pip install -e ".[talk]"`. ' + str(e)
+                ),
+            ) from e
+
+        package_name = _to_whisper_package_name(model)
+        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
+            tmp.write(audio_bytes)
+            tmp_path = tmp.name
+        try:
+            asr = WhisperAsr(model_size=package_name)
+            text = asr.transcribe_file(tmp_path)
+        except FileNotFoundError as e:
+            raise HTTPException(status_code=400, detail=str(e)) from e
+        except ImportError as e:
+            raise HTTPException(status_code=503, detail=str(e)) from e
+        finally:
+            try:
+                os.unlink(tmp_path)
+            except OSError:
+                pass
+        return {"text": text, "model": model, "backend": _IN_PROCESS}
+
+    # Default: Lemonade-routed path.
+    try:
+        text = transcribe_bytes(
+            audio_bytes,
+            filename=audio.filename or "audio.wav",
+            model=model,
+            language=language or None,
+        )
+    except LemonadeAudioError as e:
+        # 502 = upstream Lemonade failure (we forwarded faithfully but it errored).
+        raise HTTPException(status_code=502, detail=str(e)) from e
+    return {"text": text, "model": model, "backend": _LEMONADE}
+
+
+# ────────────────────────── /voice/speech (TTS) ──────────────────────────
+@router.post("/speech")
+async def voice_speech(req: SpeechRequest):
+    """Synthesize speech via the configured backend.
+
+    Body validated by :class:`SpeechRequest`. Returns the raw audio bytes
+    with a Content-Type matching ``response_format``.
+
+    In-process backend caveat: Kokoro produces float32 audio at 24 kHz; we
+    encode it server-side as WAV regardless of ``response_format`` (no MP3
+    encoder is bundled with the in-process path). The response sets
+    ``Content-Type: audio/wav`` in that case so the browser plays it correctly.
+    """
+    backend = _backend()
+
+    if backend == _IN_PROCESS:
+        try:
+            from gaia.audio.kokoro_tts import KokoroTTS
+        except ImportError as e:
+            raise HTTPException(
+                status_code=503,
+                detail=(
+                    "in-process voice backend missing deps — install with "
+                    '`uv pip install -e ".[talk]"`. ' + str(e)
+                ),
+            ) from e
+
+        try:
+            import soundfile as sf
+        except ImportError as e:
+            raise HTTPException(
+                status_code=503,
+                detail="`soundfile` is required to encode in-process TTS output to WAV. " + str(e),
+            ) from e
+
+        try:
+            tts = KokoroTTS()
+            # KokoroTTS catalog uses Kokoro-native names ("af_bella" etc.). If
+            # the caller passed an OpenAI voice ("shimmer"), Kokoro will fall
+            # back to whatever default it has — we set the requested voice
+            # explicitly so the failure mode is the caller's, not ours.
+            tts.set_voice(req.voice)
+            audio_array, _phonemes, meta = tts.generate_speech(req.input)
+        except ImportError as e:
+            raise HTTPException(status_code=503, detail=str(e)) from e
+        except Exception as e:  # noqa: BLE001 — Kokoro raises various errors
+            raise HTTPException(status_code=502, detail=f"in-process TTS failed: {e}") from e
+
+        sample_rate = meta.get("sample_rate", 24_000)
+        buf = io.BytesIO()
+        sf.write(buf, audio_array, samplerate=sample_rate, format="WAV", subtype="PCM_16")
+        return Response(content=buf.getvalue(), media_type="audio/wav")
+
+    # Default: Lemonade-routed path.
+    try:
+        audio = synthesize_bytes(
+            req.input,
+            voice=req.voice,
+            model=req.model,
+            response_format=req.response_format,
+            speed=req.speed,
+        )
+    except LemonadeAudioError as e:
+        raise HTTPException(status_code=502, detail=str(e)) from e
+
+    media = {
+        "mp3":  "audio/mpeg",
+        "wav":  "audio/wav",
+        "opus": "audio/opus",
+        # PCM has no universally-played MIME without sample-rate parameter;
+        # clients asking for raw PCM are expected to know the format.
+        "pcm":  "application/octet-stream",
+    }.get(req.response_format, "application/octet-stream")
+    return Response(content=audio, media_type=media)
+
+
+# ────────────────────────── /voice/health ──────────────────────────
+@router.get("/health")
+def voice_health():
+    """Report the active backend and probe its readiness.
+
+    For the ``lemonade`` backend, this proxies to Lemonade's /api/v1/health.
+    For ``in-process``, it just confirms the local imports resolve.
+    """
+    backend = _backend()
+    if backend == _IN_PROCESS:
+        deps_ok = True
+        detail: str | None = None
+        try:
+            import whisper  # noqa: F401  (required by WhisperAsr)
+            import kokoro   # noqa: F401  (required by KokoroTTS)
+        except ImportError as e:
+            deps_ok = False
+            detail = (
+                'in-process backend missing deps — install with `uv pip install -e ".[talk]"`. '
+                + str(e)
+            )
+        return {
+            "backend": _IN_PROCESS,
+            "ready": deps_ok,
+            "detail": detail,
+            "stt_default": "small",
+            "tts_default": "af_bella",
+        }
+
+    # Default: lemonade
+    try:
+        body = lemonade_health()
+    except LemonadeAudioError as e:
+        raise HTTPException(status_code=502, detail=str(e)) from e
+    return {
+        "backend": _LEMONADE,
+        "lemonade": body,
+        "stt_default": DEFAULT_STT_MODEL,
+        "tts_default": DEFAULT_TTS_MODEL,
+    }
+
+
+# ────────────────────────── /voice/test (browser harness) ──────────────────────────
+_TEST_HTML_PATH = Path(__file__).parent.parent / "static" / "voice_test.html"
+
+
+@router.get("/test", response_class=HTMLResponse)
+def voice_test_page():
+    """Serve a single-page browser harness for STT + TTS smoke testing.
+
+    Open in a browser at ``http://localhost:<ui-port>/voice/test``. The page
+    auto-probes /voice/health on load and displays which backend is active.
+    """
+    try:
+        return HTMLResponse(_TEST_HTML_PATH.read_text(encoding="utf-8"))
+    except FileNotFoundError as e:
+        raise HTTPException(
+            status_code=500,
+            detail=(
+                f"voice_test.html missing at {_TEST_HTML_PATH}. "
+                "Reinstall gaia or restore src/gaia/ui/static/voice_test.html."
+            ),
+        ) from e
diff --git a/src/gaia/ui/server.py b/src/gaia/ui/server.py
index ead8d38cf..6bfe673c0 100644
--- a/src/gaia/ui/server.py
+++ b/src/gaia/ui/server.py
@@ -49,6 +49,7 @@
 from .database import ChatDatabase
 from .document_monitor import DocumentMonitor
 from .routers import agents as agents_router_mod
+from .routers import audio as audio_router_mod
 from .routers import chat as chat_router_mod
 from .routers import documents as documents_router_mod
 from .routers import files as files_router_mod
@@ -395,6 +396,7 @@ async def _global_exception_handler(request: Request, exc: Exception):
     app.include_router(files_router_mod.router)
     app.include_router(tunnel_router_mod.router)
     app.include_router(mcp_router_mod.router)
+    app.include_router(audio_router_mod.router)
 
     # ── Serve Uploaded Files ─────────────────────────────────────────────
     # Mount the uploads directory so uploaded files can be served by URL.
diff --git a/src/gaia/ui/static/voice_test.html b/src/gaia/ui/static/voice_test.html
new file mode 100644
index 000000000..3d281ac07
--- /dev/null
+++ b/src/gaia/ui/static/voice_test.html
@@ -0,0 +1,308 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+<meta charset="UTF-8">
+<meta name="viewport" content="width=device-width, initial-scale=1.0">
+<title>GAIA — Voice Test (Lemonade STT/TTS)</title>
+<style>
+  :root {
+    --bg: #0d1117; --panel: #161b22; --border: #30363d;
+    --text: #e6edf3; --dim: #8b949e; --accent: #58a6ff; --good: #3fb950; --warn: #f0883e; --bad: #f85149;
+  }
+  * { box-sizing: border-box; }
+  body { margin: 0; padding: 32px 24px; background: var(--bg); color: var(--text);
+         font-family: ui-sans-serif, system-ui, sans-serif; line-height: 1.5;
+         max-width: 800px; margin: 0 auto; }
+  h1 { font-size: 24px; margin: 0 0 8px; }
+  h2 { font-size: 14px; color: var(--dim); margin: 24px 0 8px;
+       letter-spacing: 0.08em; text-transform: uppercase; font-weight: 600; }
+  p { color: var(--dim); font-size: 14px; }
+  .card { background: var(--panel); border: 1px solid var(--border);
+          border-radius: 8px; padding: 20px; margin-bottom: 16px; }
+  button { font: inherit; background: var(--panel); color: var(--text);
+           border: 1px solid var(--border); border-radius: 4px;
+           padding: 8px 16px; cursor: pointer; transition: 0.15s; }
+  button:hover:not(:disabled) { border-color: var(--accent); }
+  button:disabled { opacity: 0.4; cursor: not-allowed; }
+  button.primary { background: var(--accent); color: var(--bg); border-color: var(--accent); font-weight: 600; }
+  button.recording { background: var(--bad); border-color: var(--bad); color: white; animation: pulse 1s infinite; }
+  @keyframes pulse { 50% { opacity: 0.7; } }
+  input, select, textarea {
+    font: inherit; background: var(--bg); color: var(--text);
+    border: 1px solid var(--border); border-radius: 4px; padding: 6px 10px; width: 100%;
+  }
+  textarea { min-height: 80px; resize: vertical; }
+  .row { display: flex; gap: 8px; align-items: center; margin: 8px 0; }
+  .row label { min-width: 140px; font-size: 13px; color: var(--dim); }
+  .row > select, .row > input { flex: 1; }
+  .out { background: #0a0e14; border: 1px solid var(--border); border-radius: 4px;
+         padding: 12px; min-height: 40px; font-family: ui-monospace, monospace;
+         font-size: 13px; white-space: pre-wrap; word-break: break-word; margin-top: 8px; }
+  .status { display: inline-flex; align-items: center; gap: 8px; font-size: 12px; color: var(--dim); }
+  .dot { width: 8px; height: 8px; border-radius: 50%; }
+  .dot.good { background: var(--good); }
+  .dot.warn { background: var(--warn); }
+  .dot.bad  { background: var(--bad); }
+  .small { font-size: 11px; color: var(--dim); }
+  audio { width: 100%; margin-top: 8px; }
+  code { background: #0a0e14; padding: 1px 5px; border-radius: 3px;
+         font-family: ui-monospace, monospace; font-size: 12px; }
+</style>
+</head>
+<body>
+<h1>🍋 GAIA Voice Test — STT/TTS</h1>
+<p>Smoke-test the audio pipeline through GAIA's <code>/voice/*</code> endpoints. Backend is selected via the <code>GAIA_VOICE_BACKEND</code> env var (<code>lemonade</code> default, <code>in-process</code> for macOS / when Lemonade audio recipes are unavailable).</p>
+<p class="small">Active backend (from <code>/voice/health</code>): <strong id="active-backend" style="color:var(--accent);">checking…</strong></p>
+
+<div class="card">
+  <h2>Health</h2>
+  <button onclick="checkHealth()">Probe /voice/health</button>
+  <div id="health" class="status" style="margin-left:12px;"><span class="dot warn"></span>not checked yet</div>
+  <div id="health-out" class="out small" style="display:none;"></div>
+</div>
+
+<div class="card">
+  <h2>1 · Speech-to-Text</h2>
+  <p>Click record, speak for ~5 seconds, click stop. Audio uploads to <code>POST /voice/transcribe</code> which forwards to Lemonade.</p>
+  <div class="row">
+    <label for="stt-model">Whisper model</label>
+    <select id="stt-model">
+      <option value="Whisper-Small" selected>Whisper-Small (default)</option>
+      <option value="Whisper-Tiny">Whisper-Tiny (fastest)</option>
+      <option value="Whisper-Base">Whisper-Base</option>
+      <option value="Whisper-Large">Whisper-Large (most accurate)</option>
+    </select>
+  </div>
+  <div class="row">
+    <label for="stt-lang">Language</label>
+    <input id="stt-lang" value="en" placeholder="ISO 639-1 (en, es, …) or empty for auto">
+  </div>
+  <div class="row" style="margin-top:16px;">
+    <button id="record-btn" class="primary" onclick="toggleRecord()">🎤 Record</button>
+    <span id="record-status" class="status"><span class="dot bad"></span>idle</span>
+  </div>
+  <div id="stt-out" class="out">(transcription will appear here)</div>
+  <audio id="stt-playback" controls style="display:none;"></audio>
+</div>
+
+<div class="card">
+  <h2>2 · Text-to-Speech</h2>
+  <p>Synthesize text via <code>POST /voice/speech</code> (forwards to Lemonade Kokoro).</p>
+  <div class="row">
+    <label for="tts-text">Text</label>
+  </div>
+  <textarea id="tts-text">The Beacons are lit. Gondor calls for aid.</textarea>
+  <div class="row">
+    <label for="tts-voice">Voice</label>
+    <select id="tts-voice">
+      <option value="shimmer" selected>shimmer (OpenAI default)</option>
+      <option value="alloy">alloy</option>
+      <option value="ash">ash</option>
+      <option value="echo">echo</option>
+      <option value="af_sky">af_sky (Kokoro)</option>
+      <option value="am_echo">am_echo (Kokoro)</option>
+    </select>
+  </div>
+  <div class="row">
+    <label for="tts-format">Format</label>
+    <select id="tts-format">
+      <option value="mp3" selected>mp3</option>
+      <option value="wav">wav</option>
+      <option value="opus">opus</option>
+      <option value="pcm">pcm</option>
+    </select>
+  </div>
+  <div class="row">
+    <label for="tts-speed">Speed</label>
+    <input id="tts-speed" type="number" value="1.0" min="0.25" max="4.0" step="0.05">
+  </div>
+  <div class="row" style="margin-top:16px;">
+    <button class="primary" onclick="synthesize()">▶ Synthesize</button>
+    <span id="tts-status" class="status"><span class="dot bad"></span>not run</span>
+  </div>
+  <audio id="tts-playback" controls style="display:none;"></audio>
+</div>
+
+<p class="small">Endpoints under test: <code>POST /voice/transcribe</code>, <code>POST /voice/speech</code>, <code>GET /voice/health</code>. All proxy to Lemonade <code>/v1/audio/*</code>.</p>
+
+<script>
+  const $ = (id) => document.getElementById(id);
+  const setStatus = (id, kind, text) => {
+    $(id).innerHTML = `<span class="dot ${kind}"></span>${text}`;
+  };
+
+  async function checkHealth() {
+    const out = $("health-out");
+    setStatus("health", "warn", "checking…");
+    out.style.display = "block";
+    out.textContent = "";
+    try {
+      const r = await fetch("/voice/health");
+      const body = await r.json();
+      out.textContent = JSON.stringify(body, null, 2);
+      const backend = body.backend || "?";
+      $("active-backend").textContent = backend;
+      if (r.ok) {
+        const ready = body.ready ?? true;
+        setStatus("health", ready ? "good" : "warn",
+          ready ? `${backend} backend ready` : `${backend} backend deps missing`);
+      } else {
+        setStatus("health", "bad", `HTTP ${r.status}`);
+      }
+    } catch (e) {
+      setStatus("health", "bad", `error: ${e.message}`);
+      $("active-backend").textContent = "unknown";
+    }
+  }
+
+  // ─── STT ───
+  let mediaRecorder = null;
+  let chunks = [];
+
+  async function toggleRecord() {
+    const btn = $("record-btn");
+    if (mediaRecorder && mediaRecorder.state === "recording") {
+      mediaRecorder.stop();
+      return;
+    }
+    try {
+      const stream = await navigator.mediaDevices.getUserMedia({
+        audio: { sampleRate: 16000, channelCount: 1, echoCancellation: true }
+      });
+      mediaRecorder = new MediaRecorder(stream);
+      chunks = [];
+      mediaRecorder.ondataavailable = (e) => { if (e.data.size > 0) chunks.push(e.data); };
+      mediaRecorder.onstop = onRecordingStopped;
+      mediaRecorder.start();
+      btn.classList.add("recording");
+      btn.textContent = "■ Stop";
+      setStatus("record-status", "warn", "recording…");
+    } catch (e) {
+      setStatus("record-status", "bad", `mic denied: ${e.message}`);
+    }
+  }
+
+  async function onRecordingStopped() {
+    const btn = $("record-btn");
+    btn.classList.remove("recording");
+    btn.textContent = "🎤 Record";
+    const blob = new Blob(chunks, { type: chunks[0]?.type || "audio/webm" });
+
+    // Show the original recording
+    const url = URL.createObjectURL(blob);
+    $("stt-playback").src = url;
+    $("stt-playback").style.display = "block";
+
+    // Convert to WAV in-browser via OfflineAudioContext (Lemonade requires WAV).
+    setStatus("record-status", "warn", "converting to WAV…");
+    let wavBlob;
+    try {
+      wavBlob = await blobToWav(blob, 16000);
+    } catch (e) {
+      setStatus("record-status", "bad", `wav convert failed: ${e.message}`);
+      return;
+    }
+
+    // Upload
+    setStatus("record-status", "warn", "transcribing via Lemonade…");
+    const fd = new FormData();
+    fd.append("audio", wavBlob, "recording.wav");
+    fd.append("model", $("stt-model").value);
+    const lang = $("stt-lang").value.trim();
+    if (lang) fd.append("language", lang);
+
+    try {
+      const t0 = performance.now();
+      const r = await fetch("/voice/transcribe", { method: "POST", body: fd });
+      const dt = ((performance.now() - t0) / 1000).toFixed(1);
+      const body = await r.json();
+      if (r.ok) {
+        $("stt-out").textContent = body.text || "(empty transcription)";
+        setStatus("record-status", "good", `done (${dt}s, ${$("stt-model").value})`);
+      } else {
+        $("stt-out").textContent = JSON.stringify(body, null, 2);
+        setStatus("record-status", "bad", `HTTP ${r.status}`);
+      }
+    } catch (e) {
+      setStatus("record-status", "bad", `error: ${e.message}`);
+    }
+  }
+
+  // Convert any browser Blob to a 16kHz mono WAV using AudioContext.
+  async function blobToWav(blob, sampleRate) {
+    const buf = await blob.arrayBuffer();
+    const ctx = new (window.AudioContext || window.webkitAudioContext)();
+    const decoded = await ctx.decodeAudioData(buf);
+    // Downsample to 16 kHz mono. OfflineAudioContext requires an integer length;
+    // decoded.duration * sampleRate can be fractional (Safari is strict here).
+    const length = Math.max(1, Math.floor(decoded.duration * sampleRate));
+    const offline = new OfflineAudioContext(1, length, sampleRate);
+    const src = offline.createBufferSource();
+    src.buffer = decoded;
+    src.connect(offline.destination);
+    src.start();
+    const rendered = await offline.startRendering();
+    return audioBufferToWav(rendered);
+  }
+
+  function audioBufferToWav(buffer) {
+    const numCh = buffer.numberOfChannels;
+    const sr = buffer.sampleRate;
+    const samples = buffer.getChannelData(0);
+    const len = samples.length;
+    const wav = new ArrayBuffer(44 + len * 2);
+    const v = new DataView(wav);
+    let p = 0;
+    function ws(s) { for (let i = 0; i < s.length; i++) v.setUint8(p++, s.charCodeAt(i)); }
+    function w16(x) { v.setUint16(p, x, true); p += 2; }
+    function w32(x) { v.setUint32(p, x, true); p += 4; }
+    ws("RIFF"); w32(36 + len * 2); ws("WAVE"); ws("fmt "); w32(16);
+    w16(1); w16(numCh); w32(sr); w32(sr * numCh * 2); w16(numCh * 2); w16(16);
+    ws("data"); w32(len * 2);
+    for (let i = 0; i < len; i++) {
+      const s = Math.max(-1, Math.min(1, samples[i]));
+      v.setInt16(p, s < 0 ? s * 0x8000 : s * 0x7fff, true);
+      p += 2;
+    }
+    return new Blob([wav], { type: "audio/wav" });
+  }
+
+  // ─── TTS ───
+  async function synthesize() {
+    const text = $("tts-text").value.trim();
+    if (!text) { setStatus("tts-status", "bad", "empty text"); return; }
+    setStatus("tts-status", "warn", "synthesizing via Lemonade…");
+    const payload = {
+      input: text,
+      voice: $("tts-voice").value,
+      response_format: $("tts-format").value,
+      speed: parseFloat($("tts-speed").value || "1.0"),
+    };
+    try {
+      const t0 = performance.now();
+      const r = await fetch("/voice/speech", {
+        method: "POST",
+        headers: { "Content-Type": "application/json" },
+        body: JSON.stringify(payload),
+      });
+      const dt = ((performance.now() - t0) / 1000).toFixed(1);
+      if (!r.ok) {
+        const body = await r.json().catch(() => ({}));
+        setStatus("tts-status", "bad", `HTTP ${r.status}: ${body.detail || ""}`);
+        return;
+      }
+      const audioBlob = await r.blob();
+      $("tts-playback").src = URL.createObjectURL(audioBlob);
+      $("tts-playback").style.display = "block";
+      $("tts-playback").play();
+      setStatus("tts-status", "good", `done (${dt}s, ${(audioBlob.size / 1024).toFixed(1)} KB)`);
+    } catch (e) {
+      setStatus("tts-status", "bad", `error: ${e.message}`);
+    }
+  }
+
+  // Probe health on page load
+  checkHealth();
+</script>
+</body>
+</html>