Skip to content

Commit b8216ed

Browse files
committed
feat: merge readiness PDP alignment, methodology page, and LiteLLM enrichment (#58)
2 parents a7342b5 + 0c74343 commit b8216ed

8 files changed

Lines changed: 645 additions & 10 deletions

File tree

ML_MODELS_GUIDE.md

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ This guide explains our machine learning models that predict student success out
3030
| **6. GPA Prediction** | What GPA will student achieve? | R²=0.25 | Identify over/underperformers |
3131
| **7. Time to Credential** | How many years until graduation? | R²=0.35 | Graduation timeline planning |
3232
| **8. Credential Type** | What degree will student earn? | Limited | Limited by data availability |
33+
| **9. Readiness Score** | How prepared is this student for success? | Rule-based | Advisor prioritization & intervention planning |
3334

3435

3536
---
@@ -345,6 +346,28 @@ Monitor: Students As Expected
345346

346347
---
347348

349+
## 📐 Model 9: Student Readiness Score (Rule-Based)
350+
351+
**Type:** Weighted rule engine (not ML)
352+
**Output:** `readiness_score` (0.0–1.0), `readiness_level` (high/medium/low)
353+
**Table:** `llm_recommendations`
354+
**Script:** `ai_model/generate_readiness_scores.py`
355+
356+
Unlike the 8 ML models above, the readiness score is a **deterministic rule-based system** aligned with Postsecondary Data Partnership (PDP) momentum metrics. It combines:
357+
358+
- **Academic sub-score (40%):** GPA, course completion rate, passing rate, gateway course completion, and Year 1 credit momentum (≥12 credits)
359+
- **Engagement sub-score (30%):** Enrollment intensity, total courses enrolled, math placement level
360+
- **ML risk sub-score (30%):** Retention probability and at-risk alert from Models 1 & 2 (inverted — higher retention probability = higher readiness)
361+
362+
See [`docs/READINESS_METHODOLOGY.md`](docs/READINESS_METHODOLOGY.md) for full formula, research citations, and upgrade path.
363+
364+
To regenerate scores:
365+
```bash
366+
venv/bin/python ai_model/generate_readiness_scores.py
367+
```
368+
369+
---
370+
348371
## 🎯 Which Students Should I Focus On?
349372

350373
### Priority 1: URGENT Students (206 students)

ai_model/generate_readiness_scores.py

Lines changed: 145 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@
99
1010
Upgrade path: Option A (Ollama LLM) can write to the same table using
1111
source='ollama' and model_version='llama3.2:3b'. No schema or frontend changes
12-
needed.
12+
needed. The UPSERT uses ON CONFLICT ("Student_GUID") — each run overwrites the
13+
previous score for that student (latest always wins per student).
1314
1415
Usage:
1516
venv/bin/python ai_model/generate_readiness_scores.py
@@ -20,6 +21,7 @@
2021
import os
2122
import time
2223
import uuid
24+
import argparse
2325
from datetime import datetime, timezone
2426

2527
import pandas as pd
@@ -81,6 +83,22 @@ def create_run_record(conn) -> str:
8183
"""Insert a new run record and return its UUID."""
8284
run_id = str(uuid.uuid4())
8385
with conn.cursor() as cur:
86+
cur.execute(
87+
"""
88+
CREATE TABLE IF NOT EXISTS readiness_generation_runs (
89+
run_id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
90+
started_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
91+
completed_at TIMESTAMPTZ,
92+
source TEXT NOT NULL,
93+
model_version TEXT NOT NULL,
94+
students_input INTEGER,
95+
students_scored INTEGER,
96+
errors INTEGER DEFAULT 0,
97+
error_sample JSONB,
98+
triggered_by TEXT DEFAULT 'manual'
99+
)
100+
"""
101+
)
84102
cur.execute(
85103
"""
86104
INSERT INTO readiness_generation_runs
@@ -236,7 +254,18 @@ def compute_readiness(row) -> tuple:
236254
english_done = str(row.get("CompletedGatewayEnglishYear1", "")).strip().upper() in ("1", "Y", "YES", "TRUE", "C")
237255
gateway_component = 0.5 + (0.25 if math_done else 0.0) + (0.25 if english_done else 0.0)
238256

239-
academic_score = np.mean([gpa_component, completion_component, passing_component, gateway_component])
257+
credits_y1 = _safe_float(row.get("Number_of_Credits_Earned_Year_1"))
258+
if credits_y1 is None:
259+
credit_momentum_component = 0.5
260+
elif credits_y1 >= 12:
261+
credit_momentum_component = 1.0 # PDP 12-credit momentum milestone
262+
elif credits_y1 >= 6:
263+
credit_momentum_component = 0.6
264+
else:
265+
credit_momentum_component = 0.3
266+
267+
academic_score = np.mean([gpa_component, completion_component, passing_component,
268+
gateway_component, credit_momentum_component])
240269

241270
# --- Engagement sub-score ---
242271
intensity = str(row.get("Enrollment_Intensity_First_Term", "")).strip().upper()
@@ -250,7 +279,10 @@ def compute_readiness(row) -> tuple:
250279
total_courses = _safe_float(row.get("total_courses_enrolled"))
251280
courses_score = min(total_courses / 10.0, 1.0) if total_courses is not None else 0.5
252281

253-
engagement_score = np.mean([intensity_score, courses_score])
282+
math_placement = str(row.get("Math_Placement", "")).strip().upper()
283+
math_placement_score = {"C": 1.0, "R": 0.2, "N": 0.5}.get(math_placement, 0.5)
284+
285+
engagement_score = np.mean([intensity_score, courses_score, math_placement_score])
254286

255287
# --- ML sub-score (inverted risk = readiness) ---
256288
retention_prob = _safe_float(row.get("retention_probability"))
@@ -305,6 +337,10 @@ def build_risk_factors(row) -> list:
305337
if not english_done:
306338
factors.append("Gateway English not completed in Year 1")
307339

340+
credits_y1 = _safe_float(row.get("Number_of_Credits_Earned_Year_1"))
341+
if credits_y1 is not None and credits_y1 < 12:
342+
factors.append(f"Below 12-credit Year 1 milestone ({int(credits_y1)} credits earned)")
343+
308344
alert = str(row.get("at_risk_alert", "")).strip().upper()
309345
if alert in ("URGENT", "HIGH"):
310346
display_alert = alert.capitalize()
@@ -343,6 +379,9 @@ def build_suggested_actions(risk_factors: list) -> list:
343379
if "below average course completion" in factor_text:
344380
actions.append("Review course withdrawal patterns with advisor")
345381

382+
if "12-credit year 1 milestone" in factor_text:
383+
actions.append("Increase credit load to reach 12-credit first-year milestone")
384+
346385
return actions
347386

348387

@@ -404,11 +443,112 @@ def score_student(row) -> dict:
404443
}
405444

406445

446+
# ============================================================================
447+
# LLM Enrichment (optional)
448+
# ============================================================================
449+
450+
def enrich_with_llm(record: dict, model: str) -> dict:
451+
"""
452+
Replace rationale and suggested_actions with LLM-generated content.
453+
Only called for medium/low readiness students.
454+
Input is the FERPA-safe profile — no PII sent to any external service.
455+
Returns the record with enriched text fields (score unchanged).
456+
457+
Provider is determined by the model string:
458+
"gpt-4o-mini" -> OpenAI (requires OPENAI_API_KEY)
459+
"ollama/llama3.2:3b" -> local Ollama (no key needed)
460+
"claude-haiku-4-5-20251001" -> Anthropic (requires ANTHROPIC_API_KEY)
461+
462+
litellm is imported lazily so the default (no-flag) run has no extra
463+
dependencies and installs faster in minimal environments.
464+
"""
465+
import litellm as _litellm # lazy import — only needed with --enrich-with-llm
466+
from litellm import completion as llm_completion
467+
_litellm.telemetry = False # opt out of LiteLLM usage telemetry
468+
469+
profile = json.loads(record["input_features"]) if isinstance(record["input_features"], str) else record["input_features"]
470+
risk_factors = json.loads(record["risk_factors"]) if isinstance(record["risk_factors"], str) else []
471+
472+
prompt = f"""You are an academic advisor assistant at Bishop State Community College.
473+
A student has a readiness score of {record['readiness_score']:.2f} ({record['readiness_level']} readiness).
474+
475+
Student profile (no PII):
476+
- Enrollment: {profile.get('enrollment_type')} / {profile.get('enrollment_intensity')}
477+
- First-year GPA: {profile.get('gpa_year1')}
478+
- Course completion rate: {profile.get('course_completion_rate')}
479+
- Gateway math completed: {profile.get('gateway_math_completed')}
480+
- Gateway English completed: {profile.get('gateway_english_completed')}
481+
- Credits earned Year 1: {profile.get('credits_earned_y1')}
482+
- Math placement: {profile.get('math_placement')}
483+
- At-risk alert: {profile.get('at_risk_alert')}
484+
- Retention probability: {profile.get('retention_probability')}
485+
486+
Identified risk factors:
487+
{chr(10).join(f'- {f}' for f in risk_factors)}
488+
489+
Write two things:
490+
1. RATIONALE: A 2-sentence explanation of this student's readiness score for an advisor.
491+
2. ACTIONS: A JSON array of 3-5 specific, actionable intervention recommendations (strings only).
492+
493+
Format your response exactly as:
494+
RATIONALE: <text>
495+
ACTIONS: <json array>"""
496+
497+
try:
498+
response = llm_completion(
499+
model=model,
500+
messages=[{"role": "user", "content": prompt}],
501+
max_tokens=400,
502+
temperature=0.3,
503+
)
504+
text = response.choices[0].message.content.strip()
505+
506+
rationale_line = next((l for l in text.split("\n") if l.startswith("RATIONALE:")), None)
507+
actions_line = next((l for l in text.split("\n") if l.startswith("ACTIONS:")), None)
508+
509+
if rationale_line:
510+
record["rationale"] = rationale_line.replace("RATIONALE:", "").strip()
511+
if actions_line:
512+
raw_actions = actions_line.replace("ACTIONS:", "").strip()
513+
try:
514+
json.loads(raw_actions) # validate parseable JSON before storing
515+
record["suggested_actions"] = raw_actions
516+
except json.JSONDecodeError:
517+
pass # keep rule-generated suggested_actions on malformed LLM output
518+
519+
except Exception as e:
520+
print(f" ⚠ LLM enrichment failed for {record['Student_GUID']}: {e}")
521+
# Falls back silently to rule-generated text
522+
523+
return record
524+
525+
407526
# ============================================================================
408527
# Main
409528
# ============================================================================
410529

411530
def main():
531+
parser = argparse.ArgumentParser(description="Generate student readiness scores")
532+
parser.add_argument(
533+
"--enrich-with-llm",
534+
action="store_true",
535+
help="Enrich rationale and suggested_actions for medium/low students via LiteLLM",
536+
)
537+
parser.add_argument(
538+
"--llm-model",
539+
default="gpt-4o-mini",
540+
help=(
541+
"LiteLLM model string (default: gpt-4o-mini). Examples: "
542+
"'ollama/llama3.2:3b', 'claude-haiku-4-5-20251001'. "
543+
"Credentials resolved automatically from environment variables."
544+
),
545+
)
546+
args = parser.parse_args()
547+
548+
if args.enrich_with_llm:
549+
print(f"✓ LLM enrichment enabled — model: {args.llm_model}")
550+
print(" (medium/low readiness students only; score is never changed)")
551+
412552
print("=" * 70)
413553
print("READINESS SCORE RULE ENGINE")
414554
print(f"Started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
@@ -447,6 +587,8 @@ def main():
447587
elapsed_ms = int((time.monotonic() - t0) * 1000)
448588
record["generation_ms"] = elapsed_ms
449589
record["run_id"] = run_id
590+
if args.enrich_with_llm and record["readiness_level"] in ("medium", "low"):
591+
record = enrich_with_llm(record, args.llm_model)
450592
records.append(record)
451593
except Exception as e:
452594
errors += 1

0 commit comments

Comments
 (0)