vectorize-io · nicoloboschi · Jun 29, 2026 · Jun 27, 2026
diff --git a/hindsight-api-slim/hindsight_api/engine/reflect/agent.py b/hindsight-api-slim/hindsight_api/engine/reflect/agent.py
@@ -239,6 +239,9 @@ def _json_schema_type_to_python(field_schema: dict) -> type:
             ],
             response_format=DynamicModel,
             scope="reflect_structured",
+            max_retries=1,
+            initial_backoff=0.25,
+            max_backoff=1.0,
             skip_validation=True,  # We'll handle the dict ourselves
             return_usage=True,
         )

diff --git a/hindsight-api-slim/tests/test_chunking.py b/hindsight-api-slim/tests/test_chunking.py
@@ -455,4 +455,3 @@ def test_merged_json_array_routes_to_conversation_chunking():
         assert isinstance(parsed, list), f"Chunk must be a JSON array: {chunk[:60]}"
         assert all(isinstance(e, dict) for e in parsed), f"Every element must be a dict: {chunk[:60]}"
         assert all("role" in e for e in parsed), f"Every element must have a role key: {chunk[:60]}"
-
diff --git a/hindsight-api-slim/tests/test_reflect_agent.py b/hindsight-api-slim/tests/test_reflect_agent.py
@@ -18,6 +18,7 @@
     _clean_answer_text,
     _clean_done_answer,
     _count_messages_tokens,
+    _generate_structured_output,
     _is_context_overflow_error,
     _is_done_tool,
     _normalize_tool_name,
@@ -234,6 +235,36 @@ def test_empty_list_is_vacuously_usable(self):
         assert _all_mental_models_are_usable_and_fresh({}) is True
 
 
+class TestReflectStructuredOutput:
+    """Tests for the second-pass structured-output extraction."""
+
+    @pytest.mark.asyncio
+    async def test_structured_output_uses_short_retry_budget(self):
+        """A provider-specific structured-output failure must not consume the full reflect timeout."""
+        llm = MagicMock()
+        llm.call = AsyncMock(side_effect=RuntimeError("empty message content: finish_reason=length"))
+
+        result = await _generate_structured_output(
+            answer="Alice prefers concise engineering updates.",
+            response_schema={
+                "type": "object",
+                "properties": {
+                    "summary": {"type": "string"},
+                },
+                "required": ["summary"],
+            },
+            llm_config=llm,
+            reflect_id="test-reflect",
+        )
+
+        assert result.structured_output is None
+        call_kwargs = llm.call.await_args.kwargs
+        assert call_kwargs["scope"] == "reflect_structured"
+        assert call_kwargs["max_retries"] == 1
+        assert call_kwargs["initial_backoff"] == 0.25
+        assert call_kwargs["max_backoff"] == 1.0
+
+
 class TestReflectAgentMocked:
     """Test reflect agent with mocked LLM outputs."""
 
@@ -1022,7 +1053,6 @@ async def test_reflect_completes_with_tiny_context_budget(self, memory, request_
 
             # Patch get_config where memory_engine uses it, injecting a tiny
             # max_context_tokens.  Everything else delegates to the real config.
-            real_config = memory._get_raw_config() if hasattr(memory, "_get_raw_config") else None
             from hindsight_api.config import get_config as _real_get_config
 
             class _TinyContextProxy:

diff --git a/hindsight-api-slim/tests/test_retain_append_mode.py b/hindsight-api-slim/tests/test_retain_append_mode.py
@@ -43,7 +43,7 @@ async def test_append_mode_concatenates_content(memory, request_context):
         assert "Alice works at Google" in v1_text
 
         # Second retain with append — add new content
-        v2_units = await memory.retain_batch_async(
+        await memory.retain_batch_async(
             bank_id=bank_id,
             contents=[
                 {
@@ -254,10 +254,12 @@ async def test_append_mode_conversation_arrays_produce_valid_json(memory, reques
 
     try:
         # First retain - JSON conversation array
-        turn1 = json.dumps([
-            {"role": "user", "content": "Hello"},
-            {"role": "assistant", "content": "Hi there"},
-        ])
+        turn1 = json.dumps(
+            [
+                {"role": "user", "content": "Hello"},
+                {"role": "assistant", "content": "Hi there"},
+            ]
+        )
         await memory.retain_batch_async(
             bank_id=bank_id,
             contents=[
@@ -271,10 +273,12 @@ async def test_append_mode_conversation_arrays_produce_valid_json(memory, reques
         )
 
         # Second retain - append more turns
-        turn2 = json.dumps([
-            {"role": "user", "content": "How are you"},
-            {"role": "assistant", "content": "Doing well"},
-        ])
+        turn2 = json.dumps(
+            [
+                {"role": "user", "content": "How are you"},
+                {"role": "assistant", "content": "Doing well"},
+            ]
+        )
         await memory.retain_batch_async(
             bank_id=bank_id,
             contents=[
@@ -300,10 +304,12 @@ async def test_append_mode_conversation_arrays_produce_valid_json(memory, reques
         assert len(parsed) == 4, "Should contain all 4 messages from both retains"
 
         # Third retain - append again, verify no degradation
-        turn3 = json.dumps([
-            {"role": "user", "content": "What is new"},
-            {"role": "assistant", "content": "Not much"},
-        ])
+        turn3 = json.dumps(
+            [
+                {"role": "user", "content": "What is new"},
+                {"role": "assistant", "content": "Not much"},
+            ]
+        )
         await memory.retain_batch_async(
             bank_id=bank_id,
             contents=[
@@ -329,4 +335,3 @@ async def test_append_mode_conversation_arrays_produce_valid_json(memory, reques
 
     finally:
         await memory.delete_bank(bank_id, request_context=request_context)
-
Original file line number	Diff line number	Diff line change
Expand Up		@@ -455,4 +455,3 @@ def test_merged_json_array_routes_to_conversation_chunking():
		assert isinstance(parsed, list), f"Chunk must be a JSON array: {chunk[:60]}"
		assert all(isinstance(e, dict) for e in parsed), f"Every element must be a dict: {chunk[:60]}"
		assert all("role" in e for e in parsed), f"Every element must have a role key: {chunk[:60]}"