From 8ddfc1b3a7512cbcf205c1f2e102baaeba8fa667 Mon Sep 17 00:00:00 2001 From: sumleo Date: Thu, 25 Jun 2026 16:00:52 +0800 Subject: [PATCH] fix(codex): use a stable prompt_cache_key instead of per-request uuid4 A fresh uuid4 per request makes OpenAI/Codex prompt-cache prefix routing miss on every call, so the stable system-instructions + tools[] prefix is never reused. Derive the key once per provider instance from (account, model) in CodexLLM.call() and call_with_tools(). --- .../engine/providers/codex_llm.py | 21 ++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/hindsight-api-slim/hindsight_api/engine/providers/codex_llm.py b/hindsight-api-slim/hindsight_api/engine/providers/codex_llm.py index 802dff26b..6c63b99a6 100644 --- a/hindsight-api-slim/hindsight_api/engine/providers/codex_llm.py +++ b/hindsight-api-slim/hindsight_api/engine/providers/codex_llm.py @@ -16,10 +16,11 @@ """ import asyncio +import functools +import hashlib import json import logging import time -import uuid from pathlib import Path from typing import Any @@ -155,6 +156,20 @@ def _auth_file(self) -> Path: def _auth_file(self, v: Path) -> None: self._auth_manager._auth_file = v + @functools.cached_property + def _prompt_cache_key(self) -> str: + """Stable ``prompt_cache_key`` for OpenAI/Codex prompt-cache routing. + + OpenAI uses ``prompt_cache_key`` as an explicit hint for routing + requests to a cached-prefix backend, so it must stay constant across + calls that share the same system instructions + tools prefix. A fresh + ``uuid4`` per request defeats that routing and forces a 100% cache miss. + Derive it once per provider instance from (account, model) so it is + stable across requests but still distinct per account/model. + """ + seed = f"{self.account_id}:{self.model}" + return hashlib.sha256(seed.encode("utf-8")).hexdigest()[:32] + # ------------------------------------------------------------------ # Forwarding methods (keep surface area for tests / subclasses) # ------------------------------------------------------------------ @@ -388,7 +403,7 @@ async def call( "store": False, # Codex uses stateless mode "stream": True, # SSE streaming "include": ["reasoning.encrypted_content"], - "prompt_cache_key": str(uuid.uuid4()), + "prompt_cache_key": self._prompt_cache_key, } headers = { @@ -711,7 +726,7 @@ async def call_with_tools( "store": False, "stream": True, "include": ["reasoning.encrypted_content"], - "prompt_cache_key": str(uuid.uuid4()), + "prompt_cache_key": self._prompt_cache_key, } headers = {