Skip to content

Commit 937a53f

Browse files
authored
fix: explicit block-type check in execute() text extraction (#11)
content_models.ThinkingContent has a .text attribute (distinct from message_models.ThinkingBlock which uses .thinking). The hasattr-based filter was letting thinking text leak into final_content, polluting downstream consumers of the response (e.g., parse_json in recipe steps). Replaces hasattr check with explicit block.type check. Same fix pattern as amplifier-module-loop-streaming PR #25 (merge df5c0e1). Adds regression test verifying ThinkingContent blocks are filtered while TextBlock/TextContent pass through.
1 parent 9d2b073 commit 937a53f

2 files changed

Lines changed: 292 additions & 12 deletions

File tree

amplifier_module_loop_basic/__init__.py

Lines changed: 34 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -444,11 +444,19 @@ async def execute_single_tool(
444444
display_name = tool_name
445445
if tool_name == "delegate":
446446
try:
447-
_args = args if isinstance(args, dict) else json.loads(args)
447+
_args = (
448+
args
449+
if isinstance(args, dict)
450+
else json.loads(args)
451+
)
448452
_agent = _args.get("agent", "")
449453
if _agent:
450454
display_name = _agent
451-
except (json.JSONDecodeError, TypeError, AttributeError):
455+
except (
456+
json.JSONDecodeError,
457+
TypeError,
458+
AttributeError,
459+
):
452460
pass
453461
coordinator.cancellation.register_tool_start(
454462
tool_call_id, display_name
@@ -630,10 +638,12 @@ async def execute_single_tool(
630638
# Without this, transcript has tool_results without a closing assistant
631639
# message, triggering FM3 (incomplete_assistant_turn) on resume.
632640
if hasattr(context, "add_message"):
633-
await context.add_message({
634-
"role": "assistant",
635-
"content": "The previous operation was cancelled. Results from completed tools have been preserved.",
636-
})
641+
await context.add_message(
642+
{
643+
"role": "assistant",
644+
"content": "The previous operation was cancelled. Results from completed tools have been preserved.",
645+
}
646+
)
637647
# Re-raise to let the cancellation propagate
638648
raise
639649

@@ -687,10 +697,12 @@ async def execute_single_tool(
687697
# Without this, transcript has tool_results without a closing assistant
688698
# message, triggering FM3 (incomplete_assistant_turn) on resume.
689699
if hasattr(context, "add_message"):
690-
await context.add_message({
691-
"role": "assistant",
692-
"content": "The previous operation was cancelled. Results from completed tools have been preserved.",
693-
})
700+
await context.add_message(
701+
{
702+
"role": "assistant",
703+
"content": "The previous operation was cancelled. Results from completed tools have been preserved.",
704+
}
705+
)
694706
return final_content
695707

696708
# Add all tool results to context in original order (deterministic)
@@ -714,9 +726,19 @@ async def execute_single_tool(
714726
if isinstance(content, list):
715727
text_parts = []
716728
for block in content:
717-
if hasattr(block, "text"):
729+
block_type = getattr(block, "type", None)
730+
type_value = (
731+
getattr(block_type, "value", block_type)
732+
if block_type
733+
else None
734+
)
735+
if type_value == "text" and hasattr(block, "text"):
718736
text_parts.append(block.text)
719-
elif isinstance(block, dict) and "text" in block:
737+
elif (
738+
isinstance(block, dict)
739+
and block.get("type", "text") == "text"
740+
and "text" in block
741+
):
720742
text_parts.append(block["text"])
721743
final_content = "\n\n".join(text_parts) if text_parts else ""
722744
else:

tests/test_thinking_block_leak.py

Lines changed: 258 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,258 @@
1+
"""Regression tests for thinking block content leaking into text extraction.
2+
3+
Root cause:
4+
There are TWO content block model systems:
5+
- content_models.ThinkingContent → has .text (was the bug hazard)
6+
- message_models.ThinkingBlock → has .thinking (already safe)
7+
8+
The old execute() inline text extraction used ``hasattr(block, "text")``
9+
which allowed ThinkingContent objects through unchanged, leaking thinking
10+
text into final_content and ultimately into downstream parse_json calls
11+
(causing the "Cannot access 'task_id' on str, not dict" failure mode).
12+
13+
Fix:
14+
Use an explicit ``block.type == "text"`` guard so only text blocks are
15+
included, regardless of which model system the block comes from.
16+
17+
RED / GREEN verification:
18+
Run against the unfixed code to see test_thinking_content_does_not_leak
19+
FAIL (thinking text present in result).
20+
Run after the fix to see all tests PASS.
21+
22+
Cross-ecosystem:
23+
Same fix pattern as amplifier-module-loop-streaming PR #25 (df5c0e1).
24+
"""
25+
26+
import pytest
27+
28+
from amplifier_core.testing import EventRecorder, MockContextManager
29+
30+
from amplifier_module_loop_basic import BasicOrchestrator
31+
32+
33+
# ---------------------------------------------------------------------------
34+
# Helpers
35+
# ---------------------------------------------------------------------------
36+
37+
38+
def _orch():
39+
return BasicOrchestrator({})
40+
41+
42+
def _make_fake_response(blocks):
43+
"""Create a fake provider response with the given content blocks."""
44+
45+
class FakeResponse:
46+
content = blocks
47+
tool_calls = None
48+
usage = None
49+
content_blocks = None
50+
metadata = None
51+
52+
return FakeResponse()
53+
54+
55+
class FakeProvider:
56+
"""Mock provider that returns a pre-configured response."""
57+
58+
name = "mock-thinking"
59+
60+
def __init__(self, response):
61+
self._response = response
62+
63+
async def complete(self, request, **kwargs):
64+
return self._response
65+
66+
67+
# ---------------------------------------------------------------------------
68+
# Primary regression test: ThinkingContent (content_models) must be filtered
69+
# ---------------------------------------------------------------------------
70+
71+
72+
@pytest.mark.asyncio
73+
async def test_thinking_content_does_not_leak_into_final_content():
74+
"""content_models.ThinkingContent has .text — must be filtered by type check.
75+
76+
This is the primary regression test. Before the fix, the
77+
hasattr(block, "text") guard would include ThinkingContent blocks because
78+
they *do* have a .text attribute, just with type="thinking".
79+
80+
RED (before fix): result contains "internal reasoning" — thinking text leaked.
81+
GREEN (after fix): result is ONLY "real response".
82+
"""
83+
from amplifier_core.content_models import TextContent, ThinkingContent
84+
85+
orchestrator = _orch()
86+
context = MockContextManager()
87+
hooks = EventRecorder()
88+
89+
response = _make_fake_response(
90+
[
91+
ThinkingContent(text="internal reasoning"),
92+
TextContent(text="real response"),
93+
]
94+
)
95+
provider = FakeProvider(response)
96+
97+
result = await orchestrator.execute(
98+
prompt="Test",
99+
context=context,
100+
providers={"default": provider},
101+
tools={},
102+
hooks=hooks,
103+
)
104+
105+
# Thinking text must NOT appear in the final content
106+
assert "internal reasoning" not in result, (
107+
f"Thinking text leaked into final_content: {result!r}"
108+
)
109+
# Only the TextContent payload should be present
110+
assert "real response" in result, (
111+
f"Expected 'real response' in result but got: {result!r}"
112+
)
113+
114+
115+
# ---------------------------------------------------------------------------
116+
# Complementary test: ThinkingBlock (message_models) was already safe
117+
# ---------------------------------------------------------------------------
118+
119+
120+
@pytest.mark.asyncio
121+
async def test_thinking_block_does_not_leak_into_final_content():
122+
"""message_models.ThinkingBlock has .thinking (not .text) — already safe.
123+
124+
ThinkingBlock was not affected by the original bug (no .text attribute),
125+
but this test documents that it remains excluded after the type-check
126+
refactor.
127+
"""
128+
from amplifier_core.message_models import TextBlock, ThinkingBlock
129+
130+
orchestrator = _orch()
131+
context = MockContextManager()
132+
hooks = EventRecorder()
133+
134+
response = _make_fake_response(
135+
[
136+
ThinkingBlock(thinking="internal reasoning", signature="sig"),
137+
TextBlock(text="real response"),
138+
]
139+
)
140+
provider = FakeProvider(response)
141+
142+
result = await orchestrator.execute(
143+
prompt="Test",
144+
context=context,
145+
providers={"default": provider},
146+
tools={},
147+
hooks=hooks,
148+
)
149+
150+
assert "internal reasoning" not in result, (
151+
f"ThinkingBlock content leaked into final_content: {result!r}"
152+
)
153+
assert "real response" in result, (
154+
f"Expected 'real response' in result but got: {result!r}"
155+
)
156+
157+
158+
# ---------------------------------------------------------------------------
159+
# Smoke test: normal TextContent and TextBlock pass through correctly
160+
# ---------------------------------------------------------------------------
161+
162+
163+
@pytest.mark.asyncio
164+
async def test_text_content_passes_through():
165+
"""content_models.TextContent is included in final_content as expected."""
166+
from amplifier_core.content_models import TextContent
167+
168+
orchestrator = _orch()
169+
context = MockContextManager()
170+
hooks = EventRecorder()
171+
172+
response = _make_fake_response([TextContent(text="hello from TextContent")])
173+
provider = FakeProvider(response)
174+
175+
result = await orchestrator.execute(
176+
prompt="Test",
177+
context=context,
178+
providers={"default": provider},
179+
tools={},
180+
hooks=hooks,
181+
)
182+
183+
assert result == "hello from TextContent", f"Unexpected result: {result!r}"
184+
185+
186+
@pytest.mark.asyncio
187+
async def test_text_block_passes_through():
188+
"""message_models.TextBlock is included in final_content as expected."""
189+
from amplifier_core.message_models import TextBlock
190+
191+
orchestrator = _orch()
192+
context = MockContextManager()
193+
hooks = EventRecorder()
194+
195+
response = _make_fake_response([TextBlock(text="hello from TextBlock")])
196+
provider = FakeProvider(response)
197+
198+
result = await orchestrator.execute(
199+
prompt="Test",
200+
context=context,
201+
providers={"default": provider},
202+
tools={},
203+
hooks=hooks,
204+
)
205+
206+
assert result == "hello from TextBlock", f"Unexpected result: {result!r}"
207+
208+
209+
@pytest.mark.asyncio
210+
async def test_dict_text_block_passes_through():
211+
"""Dict blocks with type='text' pass through correctly."""
212+
orchestrator = _orch()
213+
context = MockContextManager()
214+
hooks = EventRecorder()
215+
216+
response = _make_fake_response([{"type": "text", "text": "dict text block"}])
217+
provider = FakeProvider(response)
218+
219+
result = await orchestrator.execute(
220+
prompt="Test",
221+
context=context,
222+
providers={"default": provider},
223+
tools={},
224+
hooks=hooks,
225+
)
226+
227+
assert result == "dict text block", f"Unexpected result: {result!r}"
228+
229+
230+
@pytest.mark.asyncio
231+
async def test_dict_thinking_block_is_filtered():
232+
"""Dict blocks with type='thinking' are filtered out (consistency fix)."""
233+
orchestrator = _orch()
234+
context = MockContextManager()
235+
hooks = EventRecorder()
236+
237+
response = _make_fake_response(
238+
[
239+
{"type": "thinking", "text": "dict thinking block"},
240+
{"type": "text", "text": "dict real response"},
241+
]
242+
)
243+
provider = FakeProvider(response)
244+
245+
result = await orchestrator.execute(
246+
prompt="Test",
247+
context=context,
248+
providers={"default": provider},
249+
tools={},
250+
hooks=hooks,
251+
)
252+
253+
assert "dict thinking block" not in result, (
254+
f"Dict thinking block leaked into final_content: {result!r}"
255+
)
256+
assert "dict real response" in result, (
257+
f"Expected 'dict real response' in result but got: {result!r}"
258+
)

0 commit comments

Comments
 (0)