Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions hindsight-api-slim/hindsight_api/engine/reflect/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,9 @@ def _json_schema_type_to_python(field_schema: dict) -> type:
],
response_format=DynamicModel,
scope="reflect_structured",
max_retries=1,
initial_backoff=0.25,
max_backoff=1.0,
skip_validation=True, # We'll handle the dict ourselves
return_usage=True,
)
Expand Down
1 change: 0 additions & 1 deletion hindsight-api-slim/tests/test_chunking.py
Original file line number Diff line number Diff line change
Expand Up @@ -455,4 +455,3 @@ def test_merged_json_array_routes_to_conversation_chunking():
assert isinstance(parsed, list), f"Chunk must be a JSON array: {chunk[:60]}"
assert all(isinstance(e, dict) for e in parsed), f"Every element must be a dict: {chunk[:60]}"
assert all("role" in e for e in parsed), f"Every element must have a role key: {chunk[:60]}"

32 changes: 31 additions & 1 deletion hindsight-api-slim/tests/test_reflect_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
_clean_answer_text,
_clean_done_answer,
_count_messages_tokens,
_generate_structured_output,
_is_context_overflow_error,
_is_done_tool,
_normalize_tool_name,
Expand Down Expand Up @@ -234,6 +235,36 @@ def test_empty_list_is_vacuously_usable(self):
assert _all_mental_models_are_usable_and_fresh({}) is True


class TestReflectStructuredOutput:
"""Tests for the second-pass structured-output extraction."""

@pytest.mark.asyncio
async def test_structured_output_uses_short_retry_budget(self):
"""A provider-specific structured-output failure must not consume the full reflect timeout."""
llm = MagicMock()
llm.call = AsyncMock(side_effect=RuntimeError("empty message content: finish_reason=length"))

result = await _generate_structured_output(
answer="Alice prefers concise engineering updates.",
response_schema={
"type": "object",
"properties": {
"summary": {"type": "string"},
},
"required": ["summary"],
},
llm_config=llm,
reflect_id="test-reflect",
)

assert result.structured_output is None
call_kwargs = llm.call.await_args.kwargs
assert call_kwargs["scope"] == "reflect_structured"
assert call_kwargs["max_retries"] == 1
assert call_kwargs["initial_backoff"] == 0.25
assert call_kwargs["max_backoff"] == 1.0


class TestReflectAgentMocked:
"""Test reflect agent with mocked LLM outputs."""

Expand Down Expand Up @@ -1022,7 +1053,6 @@ async def test_reflect_completes_with_tiny_context_budget(self, memory, request_

# Patch get_config where memory_engine uses it, injecting a tiny
# max_context_tokens. Everything else delegates to the real config.
real_config = memory._get_raw_config() if hasattr(memory, "_get_raw_config") else None
from hindsight_api.config import get_config as _real_get_config

class _TinyContextProxy:
Expand Down
33 changes: 19 additions & 14 deletions hindsight-api-slim/tests/test_retain_append_mode.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ async def test_append_mode_concatenates_content(memory, request_context):
assert "Alice works at Google" in v1_text

# Second retain with append — add new content
v2_units = await memory.retain_batch_async(
await memory.retain_batch_async(
bank_id=bank_id,
contents=[
{
Expand Down Expand Up @@ -254,10 +254,12 @@ async def test_append_mode_conversation_arrays_produce_valid_json(memory, reques

try:
# First retain - JSON conversation array
turn1 = json.dumps([
{"role": "user", "content": "Hello"},
{"role": "assistant", "content": "Hi there"},
])
turn1 = json.dumps(
[
{"role": "user", "content": "Hello"},
{"role": "assistant", "content": "Hi there"},
]
)
await memory.retain_batch_async(
bank_id=bank_id,
contents=[
Expand All @@ -271,10 +273,12 @@ async def test_append_mode_conversation_arrays_produce_valid_json(memory, reques
)

# Second retain - append more turns
turn2 = json.dumps([
{"role": "user", "content": "How are you"},
{"role": "assistant", "content": "Doing well"},
])
turn2 = json.dumps(
[
{"role": "user", "content": "How are you"},
{"role": "assistant", "content": "Doing well"},
]
)
await memory.retain_batch_async(
bank_id=bank_id,
contents=[
Expand All @@ -300,10 +304,12 @@ async def test_append_mode_conversation_arrays_produce_valid_json(memory, reques
assert len(parsed) == 4, "Should contain all 4 messages from both retains"

# Third retain - append again, verify no degradation
turn3 = json.dumps([
{"role": "user", "content": "What is new"},
{"role": "assistant", "content": "Not much"},
])
turn3 = json.dumps(
[
{"role": "user", "content": "What is new"},
{"role": "assistant", "content": "Not much"},
]
)
await memory.retain_batch_async(
bank_id=bank_id,
contents=[
Expand All @@ -329,4 +335,3 @@ async def test_append_mode_conversation_arrays_produce_valid_json(memory, reques

finally:
await memory.delete_bank(bank_id, request_context=request_context)