vectorize-io · nicoloboschi · Jun 29, 2026 · Jun 27, 2026 · Jun 27, 2026
diff --git a/hindsight-api-slim/hindsight_api/engine/response_models.py b/hindsight-api-slim/hindsight_api/engine/response_models.py
@@ -274,7 +274,7 @@ def parse_metadata(cls, v: Any) -> dict[str, str] | None:
     )
     scores: RecallScores | None = Field(
         None,
-        description="Recall scores from each pipeline stage (final/reranker/semantic/text). Not returned for source facts.",
+        description="Recall scores from each pipeline stage (final/reranker/semantic/keyword). Not returned for source facts.",
     )
 
 

diff --git a/hindsight-api-slim/tests/test_chunking.py b/hindsight-api-slim/tests/test_chunking.py
@@ -455,4 +455,3 @@ def test_merged_json_array_routes_to_conversation_chunking():
         assert isinstance(parsed, list), f"Chunk must be a JSON array: {chunk[:60]}"
         assert all(isinstance(e, dict) for e in parsed), f"Every element must be a dict: {chunk[:60]}"
         assert all("role" in e for e in parsed), f"Every element must have a role key: {chunk[:60]}"
-
diff --git a/hindsight-api-slim/tests/test_recall_min_score.py b/hindsight-api-slim/tests/test_recall_min_score.py
@@ -2,9 +2,9 @@
 
 Inserts memory_units with known content + real embeddings directly via SQL, then
 verifies that recall_async:
-  - returns a `scores` object (final/reranker/semantic/text) on every result,
+  - returns a `scores` object (final/reranker/semantic/keyword) on every result,
   - applies the post-query floors (`reranker`, `final`) to the scored results,
-  - applies the retrieval-level floors (`semantic`, `text`) inside the SQL arms,
+  - applies the retrieval-level floors (`semantic`, `keyword`) inside the SQL arms,
   - is unchanged by the default (`min_scores=None`).
 
 Filtering is deterministic post/pre-processing, so these assertions are direct —

diff --git a/hindsight-api-slim/tests/test_retain_append_mode.py b/hindsight-api-slim/tests/test_retain_append_mode.py
@@ -254,10 +254,12 @@ async def test_append_mode_conversation_arrays_produce_valid_json(memory, reques
 
     try:
         # First retain - JSON conversation array
-        turn1 = json.dumps([
-            {"role": "user", "content": "Hello"},
-            {"role": "assistant", "content": "Hi there"},
-        ])
+        turn1 = json.dumps(
+            [
+                {"role": "user", "content": "Hello"},
+                {"role": "assistant", "content": "Hi there"},
+            ]
+        )
         await memory.retain_batch_async(
             bank_id=bank_id,
             contents=[
@@ -271,10 +273,12 @@ async def test_append_mode_conversation_arrays_produce_valid_json(memory, reques
         )
 
         # Second retain - append more turns
-        turn2 = json.dumps([
-            {"role": "user", "content": "How are you"},
-            {"role": "assistant", "content": "Doing well"},
-        ])
+        turn2 = json.dumps(
+            [
+                {"role": "user", "content": "How are you"},
+                {"role": "assistant", "content": "Doing well"},
+            ]
+        )
         await memory.retain_batch_async(
             bank_id=bank_id,
             contents=[
@@ -300,10 +304,12 @@ async def test_append_mode_conversation_arrays_produce_valid_json(memory, reques
         assert len(parsed) == 4, "Should contain all 4 messages from both retains"
 
         # Third retain - append again, verify no degradation
-        turn3 = json.dumps([
-            {"role": "user", "content": "What is new"},
-            {"role": "assistant", "content": "Not much"},
-        ])
+        turn3 = json.dumps(
+            [
+                {"role": "user", "content": "What is new"},
+                {"role": "assistant", "content": "Not much"},
+            ]
+        )
         await memory.retain_batch_async(
             bank_id=bank_id,
             contents=[
@@ -329,4 +335,3 @@ async def test_append_mode_conversation_arrays_produce_valid_json(memory, reques
 
     finally:
         await memory.delete_bank(bank_id, request_context=request_context)
-
diff --git a/hindsight-docs/docs/developer/api/recall.mdx b/hindsight-docs/docs/developer/api/recall.mdx
@@ -377,7 +377,7 @@ The retrieval-level floors (`semantic`/`keyword`) change *which candidates are c
 
 **Use floors with care.** The reranker's scores are reliable for *ordering* but not as *absolute* values — a clearly-relevant memory can score `~0.001` on one query and `~1.0` on another, so a fixed cutoff risks silently dropping good results. Calibrate any threshold against the scores you actually observe (recall with no `min_scores` first and inspect the [`scores`](#scores) object).
 
-The threshold is compared against the same `score` value the response reports. See the note under [`score`](#score) on why the scale is relative, not absolute, before relying on a fixed threshold.
+Each threshold is compared against the matching field in the response [`scores`](#scores) object. See the note under [`scores`](#scores) on why the scale is relative, not absolute, before relying on a fixed threshold.
 
 ---
 

diff --git a/skills/hindsight-docs/references/developer/api/recall.md b/skills/hindsight-docs/references/developer/api/recall.md
@@ -622,7 +622,7 @@ The retrieval-level floors (`semantic`/`keyword`) change *which candidates are c
 
 **Use floors with care.** The reranker's scores are reliable for *ordering* but not as *absolute* values — a clearly-relevant memory can score `~0.001` on one query and `~1.0` on another, so a fixed cutoff risks silently dropping good results. Calibrate any threshold against the scores you actually observe (recall with no `min_scores` first and inspect the [`scores`](#scores) object).
 
-The threshold is compared against the same `score` value the response reports. See the note under [`score`](#score) on why the scale is relative, not absolute, before relying on a fixed threshold.
+Each threshold is compared against the matching field in the response [`scores`](#scores) object. See the note under [`scores`](#scores) on why the scale is relative, not absolute, before relying on a fixed threshold.
 
 ---
-Original file line number
+Diff line change
@@ Expand Up / @@ -274,7 +274,7 @@ def parse_metadata(cls, v: Any) -> dict[str, str] | None: @@
         )
         scores: RecallScores | None = Field(
             None,
-            description="Recall scores from each pipeline stage (final/reranker/semantic/text). Not returned for source facts.",
+            description="Recall scores from each pipeline stage (final/reranker/semantic/keyword). Not returned for source facts.",
         )
@@ Expand Down @@
Original file line number	Diff line number	Diff line change
Expand Up		@@ -455,4 +455,3 @@ def test_merged_json_array_routes_to_conversation_chunking():
		assert isinstance(parsed, list), f"Chunk must be a JSON array: {chunk[:60]}"
		assert all(isinstance(e, dict) for e in parsed), f"Every element must be a dict: {chunk[:60]}"
		assert all("role" in e for e in parsed), f"Every element must have a role key: {chunk[:60]}"