diff --git a/hindsight-api-slim/hindsight_api/engine/reflect/agent.py b/hindsight-api-slim/hindsight_api/engine/reflect/agent.py index 8a0ecda5c..f08e21b96 100644 --- a/hindsight-api-slim/hindsight_api/engine/reflect/agent.py +++ b/hindsight-api-slim/hindsight_api/engine/reflect/agent.py @@ -239,6 +239,9 @@ def _json_schema_type_to_python(field_schema: dict) -> type: ], response_format=DynamicModel, scope="reflect_structured", + max_retries=1, + initial_backoff=0.25, + max_backoff=1.0, skip_validation=True, # We'll handle the dict ourselves return_usage=True, ) diff --git a/hindsight-api-slim/tests/test_chunking.py b/hindsight-api-slim/tests/test_chunking.py index ea69a4656..e6e40f610 100644 --- a/hindsight-api-slim/tests/test_chunking.py +++ b/hindsight-api-slim/tests/test_chunking.py @@ -455,4 +455,3 @@ def test_merged_json_array_routes_to_conversation_chunking(): assert isinstance(parsed, list), f"Chunk must be a JSON array: {chunk[:60]}" assert all(isinstance(e, dict) for e in parsed), f"Every element must be a dict: {chunk[:60]}" assert all("role" in e for e in parsed), f"Every element must have a role key: {chunk[:60]}" - diff --git a/hindsight-api-slim/tests/test_reflect_agent.py b/hindsight-api-slim/tests/test_reflect_agent.py index 5eb86ddb6..4d13b3982 100644 --- a/hindsight-api-slim/tests/test_reflect_agent.py +++ b/hindsight-api-slim/tests/test_reflect_agent.py @@ -18,6 +18,7 @@ _clean_answer_text, _clean_done_answer, _count_messages_tokens, + _generate_structured_output, _is_context_overflow_error, _is_done_tool, _normalize_tool_name, @@ -234,6 +235,36 @@ def test_empty_list_is_vacuously_usable(self): assert _all_mental_models_are_usable_and_fresh({}) is True +class TestReflectStructuredOutput: + """Tests for the second-pass structured-output extraction.""" + + @pytest.mark.asyncio + async def test_structured_output_uses_short_retry_budget(self): + """A provider-specific structured-output failure must not consume the full reflect timeout.""" + llm = MagicMock() + llm.call = AsyncMock(side_effect=RuntimeError("empty message content: finish_reason=length")) + + result = await _generate_structured_output( + answer="Alice prefers concise engineering updates.", + response_schema={ + "type": "object", + "properties": { + "summary": {"type": "string"}, + }, + "required": ["summary"], + }, + llm_config=llm, + reflect_id="test-reflect", + ) + + assert result.structured_output is None + call_kwargs = llm.call.await_args.kwargs + assert call_kwargs["scope"] == "reflect_structured" + assert call_kwargs["max_retries"] == 1 + assert call_kwargs["initial_backoff"] == 0.25 + assert call_kwargs["max_backoff"] == 1.0 + + class TestReflectAgentMocked: """Test reflect agent with mocked LLM outputs.""" @@ -1022,7 +1053,6 @@ async def test_reflect_completes_with_tiny_context_budget(self, memory, request_ # Patch get_config where memory_engine uses it, injecting a tiny # max_context_tokens. Everything else delegates to the real config. - real_config = memory._get_raw_config() if hasattr(memory, "_get_raw_config") else None from hindsight_api.config import get_config as _real_get_config class _TinyContextProxy: diff --git a/hindsight-api-slim/tests/test_retain_append_mode.py b/hindsight-api-slim/tests/test_retain_append_mode.py index d02b65b35..ffbcefe44 100644 --- a/hindsight-api-slim/tests/test_retain_append_mode.py +++ b/hindsight-api-slim/tests/test_retain_append_mode.py @@ -43,7 +43,7 @@ async def test_append_mode_concatenates_content(memory, request_context): assert "Alice works at Google" in v1_text # Second retain with append — add new content - v2_units = await memory.retain_batch_async( + await memory.retain_batch_async( bank_id=bank_id, contents=[ { @@ -254,10 +254,12 @@ async def test_append_mode_conversation_arrays_produce_valid_json(memory, reques try: # First retain - JSON conversation array - turn1 = json.dumps([ - {"role": "user", "content": "Hello"}, - {"role": "assistant", "content": "Hi there"}, - ]) + turn1 = json.dumps( + [ + {"role": "user", "content": "Hello"}, + {"role": "assistant", "content": "Hi there"}, + ] + ) await memory.retain_batch_async( bank_id=bank_id, contents=[ @@ -271,10 +273,12 @@ async def test_append_mode_conversation_arrays_produce_valid_json(memory, reques ) # Second retain - append more turns - turn2 = json.dumps([ - {"role": "user", "content": "How are you"}, - {"role": "assistant", "content": "Doing well"}, - ]) + turn2 = json.dumps( + [ + {"role": "user", "content": "How are you"}, + {"role": "assistant", "content": "Doing well"}, + ] + ) await memory.retain_batch_async( bank_id=bank_id, contents=[ @@ -300,10 +304,12 @@ async def test_append_mode_conversation_arrays_produce_valid_json(memory, reques assert len(parsed) == 4, "Should contain all 4 messages from both retains" # Third retain - append again, verify no degradation - turn3 = json.dumps([ - {"role": "user", "content": "What is new"}, - {"role": "assistant", "content": "Not much"}, - ]) + turn3 = json.dumps( + [ + {"role": "user", "content": "What is new"}, + {"role": "assistant", "content": "Not much"}, + ] + ) await memory.retain_batch_async( bank_id=bank_id, contents=[ @@ -329,4 +335,3 @@ async def test_append_mode_conversation_arrays_produce_valid_json(memory, reques finally: await memory.delete_bank(bank_id, request_context=request_context) -