Merge pull request #47 from devcolor/rebranding/task-5-ml-pipeline

William-Hill · web-flow · commit 044b987f839d · 2026-02-17T21:43:39.000-06:00
Task 5: Update ML pipeline for Bishop State
diff --git a/ai_model/__init__.py b/ai_model/__init__.py
@@ -1,14 +1,14 @@
 """
-KCTCS Student Success Prediction - AI Model Package
-====================================================
+Bishop State Student Success Prediction - AI Model Package
+==========================================================
 
 This package contains machine learning models for predicting student success
-at Kentucky Community and Technical College System (KCTCS).
+at Bishop State Community College (BSCC).
 
 Modules:
 --------
 - complete_ml_pipeline.py: Complete ML pipeline with 5 prediction models
-- merge_kctcs_data.py: Data merging and preprocessing script
+- merge_bishop_state_data.py: Data merging and preprocessing script
 
 Models:
 -------
diff --git a/ai_model/complete_ml_pipeline.py b/ai_model/complete_ml_pipeline.py
@@ -1,14 +1,14 @@
 """
-Complete ML Pipeline for KCTCS Student Success Prediction
-==========================================================
+Complete ML Pipeline for Bishop State Student Success Prediction
+================================================================
 Models:
 1. Retention Prediction (Binary Classification)
 2. Early Warning System (Binary Classification)
 3. Time-to-Credential Prediction (Regression)
 4. Credential Type Prediction (Multi-class Classification)
 5. Course Success Prediction (Regression)
 
-Output: Predictions saved to MariaDB database tables
+Output: Predictions saved to Supabase Postgres tables
 """
 
 import pandas as pd
@@ -71,7 +71,7 @@
 print("=" * 80)
 
 print("\nLoading student-level dataset...")
-student_file = os.path.join(DATA_DIR, 'kctcs_student_level_with_zip.csv')
+student_file = os.path.join(DATA_DIR, 'bishop_state_student_level_with_zip.csv')
 print(f"Reading from: {student_file}")
 df = pd.read_csv(student_file)
 print(f"Loaded {len(df):,} students with {len(df.columns)} features")
@@ -1075,7 +1075,7 @@ def assign_alert_level(risk_score):
         USE_DATABASE = False
 
 # Always save CSV files for backup and local analysis
-output_file = os.path.join(DATA_DIR, 'kctcs_student_level_with_predictions.csv')
+output_file = os.path.join(DATA_DIR, 'bishop_state_student_level_with_predictions.csv')
 df.to_csv(output_file, index=False)
 print(f"\n✓ Saved student-level predictions to CSV:")
 print(f"  File: {output_file}")
@@ -1090,7 +1090,7 @@ def assign_alert_level(risk_score):
 print("=" * 80)
 
 print("\nLoading course-level merged file...")
-merged_file = os.path.join(DATA_DIR, 'kctcs_merged_with_zip.csv')
+merged_file = os.path.join(DATA_DIR, 'bishop_state_student_level_with_zip.csv')
 print(f"Reading from: {merged_file}")
 merged_df = pd.read_csv(merged_file)
 print(f"Loaded {len(merged_df):,} course records")
@@ -1123,7 +1123,7 @@ def assign_alert_level(risk_score):
         print(f"  Columns: {len(merged_with_predictions.columns)}")
 
 # Always save CSV files for backup and local analysis
-output_file = os.path.join(DATA_DIR, 'kctcs_merged_with_predictions.csv')
+output_file = os.path.join(DATA_DIR, 'bishop_state_merged_with_predictions.csv')
 merged_with_predictions.to_csv(output_file, index=False)
 print(f"\n✓ Saved course-level predictions to CSV:")
 print(f"  File: {output_file}")
@@ -1138,7 +1138,7 @@ def assign_alert_level(risk_score):
 print("=" * 80)
 
 summary_report = f"""
-KCTCS ML PIPELINE - SUMMARY REPORT
+BISHOP STATE ML PIPELINE - SUMMARY REPORT
 {'=' * 80}
 Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
 
@@ -1324,7 +1324,7 @@ def assign_alert_level(risk_score):
 db_connected = 1 if USE_DATABASE else 0
 print(f"\nDatabase Connection Status: {db_connected}")
 if db_connected == 1:
-    print("  ✓ Successfully connected to MariaDB")
+    print("  ✓ Successfully connected to Supabase Postgres")
     print(f"  ✓ Database: {DB_CONFIG['database']}")
     print(f"  ✓ Host: {DB_CONFIG['host']}")
 else:
diff --git a/ai_model/complete_ml_pipeline_csv_only.py b/ai_model/complete_ml_pipeline_csv_only.py
@@ -1,5 +1,5 @@
 """
-Complete ML Pipeline for KCTCS Student Success Prediction (CSV Output Only)
+Complete ML Pipeline for Bishop State Student Success Prediction (CSV Output Only)
 ==========================================================
 Models:
 1. Retention Prediction (Binary Classification)
@@ -50,7 +50,7 @@
 print("=" * 80)
 
 print("\nLoading student-level dataset...")
-student_file = os.path.join(DATA_DIR, 'kctcs_student_level_with_zip.csv')
+student_file = os.path.join(DATA_DIR, 'bishop_state_student_level_with_zip.csv')
 print(f"Reading from: {student_file}")
 df = pd.read_csv(student_file)
 print(f"Loaded {len(df):,} students with {len(df.columns)} features")
@@ -1040,7 +1040,7 @@ def assign_alert_level(risk_score):
 print("=" * 80)
 
 # Save student-level predictions with all columns
-output_file = os.path.join(DATA_DIR, 'kctcs_student_level_with_predictions.csv')
+output_file = os.path.join(DATA_DIR, 'bishop_state_student_level_with_predictions.csv')
 df.to_csv(output_file, index=False)
 print(f"\n✓ Saved student-level predictions to CSV:")
 print(f"  File: {output_file}")
@@ -1071,7 +1071,7 @@ def assign_alert_level(risk_score):
 predictions_df = df[prediction_columns].copy()
 
 print("\nLoading course-level merged file...")
-merged_file = os.path.join(DATA_DIR, 'kctcs_merged_with_zip.csv')
+merged_file = os.path.join(DATA_DIR, 'bishop_state_student_level_with_zip.csv')
 print(f"Reading from: {merged_file}")
 merged_df = pd.read_csv(merged_file)
 print(f"Loaded {len(merged_df):,} course records")
@@ -1091,7 +1091,7 @@ def assign_alert_level(risk_score):
 )
 
 # Save course-level predictions
-output_file = os.path.join(DATA_DIR, 'kctcs_merged_with_predictions.csv')
+output_file = os.path.join(DATA_DIR, 'bishop_state_merged_with_predictions.csv')
 merged_with_predictions.to_csv(output_file, index=False)
 print(f"\n✓ Saved course-level predictions to CSV:")
 print(f"  File: {output_file}")
@@ -1106,7 +1106,7 @@ def assign_alert_level(risk_score):
 print("=" * 80)
 
 summary_report = f"""
-KCTCS ML PIPELINE - SUMMARY REPORT (CSV OUTPUT ONLY)
+BISHOP STATE ML PIPELINE - SUMMARY REPORT (CSV OUTPUT ONLY)
 {'=' * 80}
 Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
 
@@ -1221,12 +1221,12 @@ def assign_alert_level(risk_score):
 summary_report += f"""
 OUTPUT: CSV FILES
 {'-' * 80}
-1. kctcs_student_level_with_predictions.csv
+1. bishop_state_student_level_with_predictions.csv
    - Student-level data with all predictions
    - {len(df):,} students
    - {len(df.columns)} columns
 
-2. kctcs_merged_with_predictions.csv
+2. bishop_state_merged_with_predictions.csv
    - Course-level data with predictions
    - {len(merged_with_predictions):,} records
    - {len(merged_with_predictions.columns)} columns
@@ -1294,8 +1294,8 @@ def assign_alert_level(risk_score):
 print("=" * 80)
 print(f"\nCompleted: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
 print("\nOutput files:")
-print(f"  1. {os.path.join(DATA_DIR, 'kctcs_student_level_with_predictions.csv')}")
-print(f"  2. {os.path.join(DATA_DIR, 'kctcs_merged_with_predictions.csv')}")
+print(f"  1. {os.path.join(DATA_DIR, 'bishop_state_student_level_with_predictions.csv')}")
+print(f"  2. {os.path.join(DATA_DIR, 'bishop_state_merged_with_predictions.csv')}")
 print(f"  3. {os.path.join(DATA_DIR, 'model_comparison_results.csv')}")
 print(f"  4. {report_file}")
 print("=" * 80)