Skip to content

Commit ea4ffd3

Browse files
authored
feat: update methodology page — remove impl refs, add worked examples (#60)
* fix: quote Retention column in KPIs query and coerce pg string counts to numbers for Recharts * fix: use exact case-sensitive column names in LLM schema prompt to prevent Postgres quoting errors * feat: update methodology page — remove impl refs, add worked examples - Remove Script and Table header badges (internal details) - Remove 'Data Source' section (table names, script paths not user-facing) - Replace raw db field names in sub-score tables with plain English labels - Remove input_features column reference from Transparency card - Add 'Worked Examples' section with Maria T. (High, 0.699) and Jordan M. (Low, 0.386) - Each example: inputs table, step-by-step sub-score math, color-coded final score Closes #59
1 parent 77663a9 commit ea4ffd3

5 files changed

Lines changed: 215 additions & 95 deletions

File tree

codebenders-dashboard/app/api/analyze/route.ts

Lines changed: 62 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -18,50 +18,55 @@ const queryPlanSchema = z.object({
1818
})
1919

2020
// Database schema configuration
21+
// IMPORTANT: Column names listed here are the EXACT case-sensitive names in PostgreSQL.
22+
// Mixed-case columns (e.g. "Cohort", "Retention") must be double-quoted in generated SQL.
23+
// All-lowercase columns (e.g. retention_probability) do not require quoting.
2124
const SCHEMA_INFO = {
2225
bscc: {
2326
database: "postgres",
2427
mainTable: "student_level_with_predictions",
2528
description: "Bishop State Community College student cohort data with retention, persistence, and completion metrics",
2629
columns: {
27-
// Key dimensions
28-
cohort: "Cohort year (numeric: 2019, 2020, etc.)",
29-
cohort_term: "Term of cohort entry (Fall, Spring, Summer)",
30-
student_guid: "Unique student identifier",
31-
institution_id: "Institution identifier (102030 for Bishop State)",
32-
33-
// Demographics
34-
gender: "Student gender",
35-
race: "Student race/ethnicity",
36-
student_age: "Age of student",
37-
first_gen: "First generation status",
38-
39-
// Academic info
40-
enrollment_type: "Type of enrollment",
41-
enrollment_intensity_first_term: "Enrollment intensity in first term (Full-Time, Part-Time)",
42-
program_of_study_year_1: "Program of study in year 1 (CIP code)",
43-
credential_type_sought_year_1: "Credential type being pursued",
44-
45-
// Performance metrics
46-
retention: "Retention indicator (0 or 1)",
47-
persistence: "Persistence indicator (0 or 1)",
48-
gpa_group_year_1: "GPA in year 1",
49-
gpa_group_term_1: "GPA in term 1",
50-
51-
// Credits
52-
number_of_credits_attempted_year_1: "Credits attempted in year 1",
53-
number_of_credits_earned_year_1: "Credits earned in year 1",
54-
number_of_credits_attempted_year_2: "Credits attempted in year 2",
55-
number_of_credits_earned_year_2: "Credits earned in year 2",
56-
57-
// Completion metrics
58-
time_to_credential: "Time to any credential",
59-
60-
// ML predictions
30+
// Key dimensions — MIXED CASE: must be double-quoted in SQL
31+
Cohort: "Cohort year (numeric: 2019, 2020, etc.) — write as \"Cohort\"",
32+
Cohort_Term: "Term of cohort entry (Fall, Spring, Summer) — write as \"Cohort_Term\"",
33+
Student_GUID: "Unique student identifier — write as \"Student_GUID\"",
34+
Institution_ID: "Institution identifier (102030 for Bishop State) — write as \"Institution_ID\"",
35+
36+
// Demographics — MIXED CASE: must be double-quoted in SQL
37+
Gender: "Student gender — write as \"Gender\"",
38+
Race: "Student race/ethnicity — write as \"Race\"",
39+
Student_Age: "Age of student (integer) — write as \"Student_Age\"",
40+
First_Gen: "First generation status — write as \"First_Gen\"",
41+
42+
// Academic info — MIXED CASE: must be double-quoted in SQL
43+
Enrollment_Type: "Type of enrollment — write as \"Enrollment_Type\"",
44+
Enrollment_Intensity_First_Term: "Enrollment intensity in first term (Full-Time, Part-Time) — write as \"Enrollment_Intensity_First_Term\"",
45+
Program_of_Study_Year_1: "Program of study in year 1 (CIP code) — write as \"Program_of_Study_Year_1\"",
46+
Credential_Type_Sought_Year_1: "Credential type being pursued — write as \"Credential_Type_Sought_Year_1\"",
47+
Math_Placement: "Math placement level (C=college-level, R=remedial, N=none) — write as \"Math_Placement\"",
48+
49+
// Performance metrics — MIXED CASE: must be double-quoted in SQL
50+
Retention: "Retention indicator (0 or 1) — write as \"Retention\"",
51+
Persistence: "Persistence indicator (0 or 1) — write as \"Persistence\"",
52+
GPA_Group_Year_1: "GPA in year 1 — write as \"GPA_Group_Year_1\"",
53+
GPA_Group_Term_1: "GPA in term 1 — write as \"GPA_Group_Term_1\"",
54+
55+
// Credits — MIXED CASE: must be double-quoted in SQL
56+
Number_of_Credits_Attempted_Year_1: "Credits attempted in year 1 — write as \"Number_of_Credits_Attempted_Year_1\"",
57+
Number_of_Credits_Earned_Year_1: "Credits earned in year 1 — write as \"Number_of_Credits_Earned_Year_1\"",
58+
Number_of_Credits_Attempted_Year_2: "Credits attempted in year 2 — write as \"Number_of_Credits_Attempted_Year_2\"",
59+
Number_of_Credits_Earned_Year_2: "Credits earned in year 2 — write as \"Number_of_Credits_Earned_Year_2\"",
60+
61+
// Completion metrics — MIXED CASE: must be double-quoted in SQL
62+
Time_to_Credential: "Time to any credential — write as \"Time_to_Credential\"",
63+
64+
// ML predictions — all lowercase: no quoting needed
6165
retention_probability: "Predicted probability of retention (0-1)",
6266
retention_risk_category: "Risk category (Low Risk, Moderate Risk, High Risk, Critical Risk)",
6367
at_risk_alert: "Early warning alert level (LOW, MODERATE, HIGH, URGENT)",
64-
predicted_gpa: "ML-predicted GPA",
68+
course_completion_rate: "Course completion rate (0-1)",
69+
passing_rate: "Course passing rate (0-1)",
6570
},
6671
},
6772
akron: {
@@ -107,39 +112,43 @@ KEY COLUMNS:
107112
${Object.entries(schemaInfo.columns).map(([col, desc]) => `- ${col}: ${desc}`).join("\n")}
108113
109114
CRITICAL SCHEMA NOTES:
110-
- cohort: NUMERIC year only (e.g., 2019, 2020) — NOT a string like "2024-Fall"
111-
- cohort_term: Term name (e.g., "Fall", "Spring", "Summer")
112-
- To filter by "Fall 2023", use: WHERE cohort = 2023 AND cohort_term = 'Fall'
113-
- student_age: INTEGER field — use direct numeric comparisons (e.g., student_age >= 25)
115+
- Column names with uppercase letters MUST be double-quoted in PostgreSQL SQL or the query will fail.
116+
CORRECT: WHERE "Cohort" = 2023 AND "Cohort_Term" = 'Fall'
117+
INCORRECT: WHERE cohort = 2023 AND cohort_term = 'Fall'
118+
- "Cohort": NUMERIC year only (e.g., 2019, 2020) — NOT a string like "2024-Fall"
119+
- "Cohort_Term": Term name (e.g., "Fall", "Spring", "Summer")
120+
- To filter by "Fall 2023", use: WHERE "Cohort" = 2023 AND "Cohort_Term" = 'Fall'
121+
- "Student_Age": INTEGER field — use direct numeric comparisons (e.g., "Student_Age" >= 25)
122+
- Lowercase ML columns (retention_probability, at_risk_alert, etc.) do NOT need quoting.
114123
- Use standard PostgreSQL syntax — no backtick quoting, no cross-database references
115124
116125
IMPORTANT QUERY INTERPRETATION RULES:
117126
118127
1. METRIC SELECTION:
119128
- ONLY include a metric if the user explicitly asks for retention, persistence, GPA, credits, etc.
120129
- If user asks to "segment", "compare", "show", "count", or "list" students → use COUNT(*) and NO specific metric
121-
- "retention" → AVG(retention) as retention_rate
122-
- "persistence" or "completion" → AVG(persistence) as completion_rate
123-
- "GPA" → AVG(gpa_group_year_1) as gpa
124-
- "credits" → AVG(number_of_credits_earned_year_1) as credits_earned
130+
- "retention" → AVG("Retention") as retention_rate
131+
- "persistence" or "completion" → AVG("Persistence") as completion_rate
132+
- "GPA" → AVG("GPA_Group_Year_1") as gpa
133+
- "credits" → AVG("Number_of_Credits_Earned_Year_1") as credits_earned
125134
- Otherwise → COUNT(*) as count
126135
127136
2. GROUPING & SEGMENTATION:
128137
- "segment by X" or "compare X" → GROUP BY X column
129138
- "by age", "age groups", "segment by age" → Use CASE statement to create age groups:
130139
CASE
131-
WHEN student_age < 25 THEN 'Under 25'
132-
WHEN student_age >= 25 THEN '25 and Over'
140+
WHEN "Student_Age" < 25 THEN 'Under 25'
141+
WHEN "Student_Age" >= 25 THEN '25 and Over'
133142
END AS age_group
134-
- "by gender" → GROUP BY gender
135-
- "by race" → GROUP BY race
136-
- "by cohort" → GROUP BY cohort
137-
- "by term" → GROUP BY cohort_term
143+
- "by gender" → GROUP BY "Gender"
144+
- "by race" → GROUP BY "Race"
145+
- "by cohort" → GROUP BY "Cohort"
146+
- "by term" → GROUP BY "Cohort_Term"
138147
139148
3. FILTERS:
140-
- "2023 cohort" → WHERE cohort = 2023
141-
- "Fall 2023" or "2023 Fall" → WHERE cohort = 2023 AND cohort_term = 'Fall'
142-
- Age filters: use numeric comparisons directly (e.g., student_age >= 25)
149+
- "2023 cohort" → WHERE "Cohort" = 2023
150+
- "Fall 2023" or "2023 Fall" → WHERE "Cohort" = 2023 AND "Cohort_Term" = 'Fall'
151+
- Age filters: use numeric comparisons directly (e.g., "Student_Age" >= 25)
143152
144153
4. VISUALIZATION:
145154
- Comparing groups (age, gender, race) → "bar"
@@ -163,7 +172,7 @@ Generate a query plan with:
163172
EXAMPLE for "segment students over 25 and under 25 in 2023 cohort":
164173
{
165174
"vizType": "bar",
166-
"sql": "SELECT CASE WHEN student_age < 25 THEN 'Under 25' ELSE '25 and Over' END AS age_group, COUNT(*) as count FROM student_level_with_predictions WHERE cohort = 2023 GROUP BY age_group ORDER BY age_group",
175+
"sql": "SELECT CASE WHEN \"Student_Age\" < 25 THEN 'Under 25' ELSE '25 and Over' END AS age_group, COUNT(*) as count FROM student_level_with_predictions WHERE \"Cohort\" = 2023 GROUP BY age_group ORDER BY age_group",
167176
"queryString": ""
168177
}
169178

codebenders-dashboard/app/api/dashboard/kpis/route.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ export async function GET(request: NextRequest) {
77

88
const sql = `
99
SELECT
10-
AVG(retention) * 100 as overall_retention_rate,
10+
AVG("Retention") * 100 as overall_retention_rate,
1111
AVG(retention_probability) * 100 as avg_predicted_retention,
1212
SUM(CASE WHEN at_risk_alert IN ('HIGH', 'URGENT') THEN 1 ELSE 0 END) as high_critical_risk_count,
1313
AVG(course_completion_rate) * 100 as avg_course_completion_rate,

0 commit comments

Comments
 (0)