fix: address CodeRabbit review findings

William-Hill · William-Hill · commit 88f5bbaa60cb · 2026-04-02T23:34:18.000-04:00
- check_ship_criteria() now iterates required criteria first; missing
  metrics are blocking failures instead of silently passing
- load_seed_queries() missing-file fallback includes narrator key
- Add .superpowers/ to .gitignore (runtime state files)
- Update test expectation for narrator in seed query fallback
diff --git a/.gitignore b/.gitignore
@@ -189,3 +189,6 @@ data/test_uploads/
 
 # Training pipeline artifacts
 training_data/
+
+# Superpowers runtime state
+.superpowers/
diff --git a/tests/training/test_seed.py b/tests/training/test_seed.py
@@ -35,7 +35,7 @@ def test_loads_valid_yaml(self, tmp_path):
     def test_returns_empty_on_missing_file(self, tmp_path):
         with patch("training.seed.get_school_dir", return_value=tmp_path):
             result = load_seed_queries("test-school")
-        assert result == {"explainer": [], "summarizer": []}
+        assert result == {"narrator": [], "explainer": [], "summarizer": []}
 
 
 class TestGenerateSyntheticCoursePairings:
diff --git a/training/eval.py b/training/eval.py
@@ -241,19 +241,24 @@ def check_ship_criteria(metrics: dict[str, float], task: str) -> ShipDecision:
     blocking_failures: list[CriterionFailure] = []
     warnings: list[str] = []
 
+    # Check all required criteria — missing metrics are blocking failures
+    for metric, threshold in criteria.items():
+        value = metrics.get(metric)
+        if value is None:
+            blocking_failures.append(
+                CriterionFailure(metric=metric, threshold=threshold, actual=0.0)
+            )
+        elif value < threshold:
+            blocking_failures.append(
+                CriterionFailure(metric=metric, threshold=threshold, actual=value)
+            )
+
+    # Check informational metrics (present in metrics but not in criteria)
     for metric, value in metrics.items():
-        threshold = criteria.get(metric)
-        if threshold is not None:
-            if value < threshold:
-                blocking_failures.append(
-                    CriterionFailure(metric=metric, threshold=threshold, actual=value)
-                )
-        else:
-            # Informational metric — warn if very low
-            if value < 0.5:
-                warnings.append(
-                    f"{metric} is low ({value:.3f}) — consider improving before deploying"
-                )
+        if metric not in criteria and value < 0.5:
+            warnings.append(
+                f"{metric} is low ({value:.3f}) — consider improving before deploying"
+            )
 
     if blocking_failures:
         decision = "no_ship"
diff --git a/training/seed.py b/training/seed.py
@@ -164,7 +164,7 @@ def load_seed_queries(school: str) -> dict[str, list[dict]]:
     """Load seed queries from a school's seed_queries.yaml."""
     seed_path = get_school_dir(school) / "seed_queries.yaml"
     if not seed_path.exists():
-        return {"explainer": [], "summarizer": []}
+        return {"narrator": [], "explainer": [], "summarizer": []}
     with seed_path.open("r", encoding="utf-8") as fh:
         data = yaml.safe_load(fh) or {}
     return {