From 8ddfc1b3a7512cbcf205c1f2e102baaeba8fa667 Mon Sep 17 00:00:00 2001
From: sumleo <sumleo@users.noreply.github.com>
Date: Thu, 25 Jun 2026 16:00:52 +0800
Subject: [PATCH] fix(codex): use a stable prompt_cache_key instead of
 per-request uuid4

A fresh uuid4 per request makes OpenAI/Codex prompt-cache prefix routing
miss on every call, so the stable system-instructions + tools[] prefix is
never reused. Derive the key once per provider instance from (account,
model) in CodexLLM.call() and call_with_tools().
---
 .../engine/providers/codex_llm.py             | 21 ++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/hindsight-api-slim/hindsight_api/engine/providers/codex_llm.py b/hindsight-api-slim/hindsight_api/engine/providers/codex_llm.py
index 802dff26b..6c63b99a6 100644
--- a/hindsight-api-slim/hindsight_api/engine/providers/codex_llm.py
+++ b/hindsight-api-slim/hindsight_api/engine/providers/codex_llm.py
@@ -16,10 +16,11 @@
 """
 
 import asyncio
+import functools
+import hashlib
 import json
 import logging
 import time
-import uuid
 from pathlib import Path
 from typing import Any
 
@@ -155,6 +156,20 @@ def _auth_file(self) -> Path:
     def _auth_file(self, v: Path) -> None:
         self._auth_manager._auth_file = v
 
+    @functools.cached_property
+    def _prompt_cache_key(self) -> str:
+        """Stable ``prompt_cache_key`` for OpenAI/Codex prompt-cache routing.
+
+        OpenAI uses ``prompt_cache_key`` as an explicit hint for routing
+        requests to a cached-prefix backend, so it must stay constant across
+        calls that share the same system instructions + tools prefix. A fresh
+        ``uuid4`` per request defeats that routing and forces a 100% cache miss.
+        Derive it once per provider instance from (account, model) so it is
+        stable across requests but still distinct per account/model.
+        """
+        seed = f"{self.account_id}:{self.model}"
+        return hashlib.sha256(seed.encode("utf-8")).hexdigest()[:32]
+
     # ------------------------------------------------------------------
     # Forwarding methods (keep surface area for tests / subclasses)
     # ------------------------------------------------------------------
@@ -388,7 +403,7 @@ async def call(
             "store": False,  # Codex uses stateless mode
             "stream": True,  # SSE streaming
             "include": ["reasoning.encrypted_content"],
-            "prompt_cache_key": str(uuid.uuid4()),
+            "prompt_cache_key": self._prompt_cache_key,
         }
 
         headers = {
@@ -711,7 +726,7 @@ async def call_with_tools(
             "store": False,
             "stream": True,
             "include": ["reasoning.encrypted_content"],
-            "prompt_cache_key": str(uuid.uuid4()),
+            "prompt_cache_key": self._prompt_cache_key,
         }
 
         headers = {