Add functionality to check for students with FAIL status in predeliberation file and log results

This commit is contained in:
bdaneels 2025-07-29 16:18:52 +02:00
parent c5d356b366
commit 8236038f11
4 changed files with 247 additions and 6 deletions

6
.gitignore vendored
View File

@ -8,4 +8,8 @@ __pycache__/
# Ignore Excel files
*.xlsx
sisa_crawl/
# Ignore log files
*.log
sisa_crawl/

View File

@ -107,6 +107,7 @@ def compare_sp_values(predelib_df: pd.DataFrame, dashboard_df: pd.DataFrame) ->
predelib_sp = predelib_matches['Totaal aantal SP'].iloc[0]
dashboard_sp = dashboard_matches['Ingeschr. SP (intern)'].iloc[0]
name_student = predelib_matches['Voornaam'].iloc[0] + ' ' + predelib_matches['Achternaam'].iloc[0]
# Handle potential NaN values
if pd.isna(predelib_sp) or pd.isna(dashboard_sp):
@ -126,8 +127,10 @@ def compare_sp_values(predelib_df: pd.DataFrame, dashboard_df: pd.DataFrame) ->
if predelib_sp_num != dashboard_sp_num:
mismatch = {
'ID': id_val,
'Name': name_student,
'Predelib_SP': predelib_sp,
'Dashboard_SP': dashboard_sp
'Dashboard_SP': dashboard_sp,
}
mismatches.append(mismatch)
logger.debug(f"Mismatch found for ID {id_val}: Predelib={predelib_sp}, Dashboard={dashboard_sp}")
@ -145,7 +148,7 @@ def compare_sp_values(predelib_df: pd.DataFrame, dashboard_df: pd.DataFrame) ->
else:
logger.warning(f"Found {len(mismatches)} mismatches")
for mismatch in mismatches:
logger.info(f"Mismatch - ID {mismatch['ID']}: Predeliberatierapport SP={mismatch['Predelib_SP']}, Dashboard Inschrijvingen SP={mismatch['Dashboard_SP']}")
logger.info(f"Mismatch - ID {mismatch['ID']} ({mismatch['Name']}): Predeliberatierapport SP={mismatch['Predelib_SP']}, Dashboard Inschrijvingen SP={mismatch['Dashboard_SP']}")
return mismatches

View File

@ -0,0 +1,223 @@
import pandas as pd
import logging
from typing import List, Dict, Any, Optional
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler('predelib_processing.log'),
logging.StreamHandler()
]
)
logger = logging.getLogger(__name__)
def check_students_with_fail_adviesrapport(predelib_df: pd.DataFrame) -> List[Dict[str, Any]]:
"""
Check for students with 'FAIL' in 'Adviesrapport code' column and extract their details.
Args:
predelib_df (pandas.DataFrame): Processed predeliberation dataframe
Returns:
list: List of dictionaries containing failed student details
Raises:
ValueError: If input dataframe is invalid
KeyError: If required columns are missing
"""
logger.info("Starting failed students check")
try:
# Validate input dataframe
if predelib_df is None or predelib_df.empty:
error_msg = "Predelib dataframe is None or empty"
logger.error(error_msg)
raise ValueError(error_msg)
logger.info(f"Predelib dataframe shape: {predelib_df.shape}")
# Define required columns
required_columns = [
'ID', 'Achternaam', 'Voornaam', 'E-mail',
'Totaal aantal SP', 'Aantal SP vereist', 'Waarschuwing', 'Adviesrapport code'
]
# Check for required columns
missing_columns = [col for col in required_columns if col not in predelib_df.columns]
if missing_columns:
error_msg = f"Missing required columns in predelib dataframe: {missing_columns}"
logger.error(error_msg)
logger.info(f"Available columns: {list(predelib_df.columns)}")
raise KeyError(error_msg)
logger.info("All required columns found in dataframe")
# Debug Adviesrapport code column
logger.debug(f"Adviesrapport code column type: {predelib_df['Adviesrapport code'].dtype}")
unique_codes = predelib_df['Adviesrapport code'].unique()
logger.debug(f"Unique Adviesrapport codes: {unique_codes}")
# Filter for FAIL cases
try:
# Convert to string and check for FAIL (case-insensitive)
fail_mask = predelib_df['Adviesrapport code'].astype(str).str.upper() == 'FAIL'
students_with_fail_ar_df = predelib_df[fail_mask].copy()
logger.info(f"Found {len(students_with_fail_ar_df)} students with FAIL status")
# Remove duplicate rows (exact same values in all columns)
initial_count = len(students_with_fail_ar_df)
students_with_fail_ar_df = students_with_fail_ar_df.drop_duplicates()
final_count = len(students_with_fail_ar_df)
duplicates_removed = initial_count - final_count
if duplicates_removed > 0:
logger.info(f"Removed {duplicates_removed} duplicate rows")
else:
logger.info("No duplicate rows found")
logger.info(f"Final count after duplicate removal: {final_count} students with FAIL status")
except Exception as e:
error_msg = f"Error filtering for FAIL status: {e}"
logger.error(error_msg)
raise ValueError(error_msg)
if len(students_with_fail_ar_df) == 0:
logger.info("No students with FAIL status found")
return []
# Extract details for failed students
students_with_fail_ar = []
processed_count = 0
for index, row in students_with_fail_ar_df.iterrows():
try:
# Extract student details
student_details = {
'ID': row['ID'],
'Achternaam': row['Achternaam'],
'Voornaam': row['Voornaam'],
'E-mail': row['E-mail'],
'Totaal_aantal_SP': row['Totaal aantal SP'],
'Aantal_SP_vereist': row['Aantal SP vereist'],
'Waarschuwing': row['Waarschuwing'],
'Adviesrapport_code': row['Adviesrapport code']
}
# Handle potential NaN values
for key, value in student_details.items():
if pd.isna(value):
student_details[key] = None
logger.warning(f"NaN value found for {key} in student ID: {row['ID']}")
students_with_fail_ar.append(student_details)
processed_count += 1
logger.debug(f"Processed failed student: ID={row['ID']}, "
f"Name={row['Achternaam']}, {row['Voornaam']}")
except Exception as e:
logger.error(f"Error processing student at index {index}: {e}")
continue
logger.info(f"Successfully processed {processed_count} failed students")
# Log summary
if students_with_fail_ar:
logger.warning(f"Found {len(students_with_fail_ar)} students with FAIL status")
for student in students_with_fail_ar:
logger.info(f"Failed student - ID: {student['ID']}, "
f"Name: {student['Achternaam']}, {student['Voornaam']}, "
f"SP: {student['Totaal_aantal_SP']}/{student['Aantal_SP_vereist']}")
else:
logger.info("No failed students found")
return students_with_fail_ar
except Exception as e:
logger.error(f"Unexpected error in check_students_with_fail_ar: {e}")
raise
def print_students_with_fail_ar_summary(students_with_fail_ar: List[Dict[str, Any]], predelib_df: pd.DataFrame):
"""Print a formatted summary of students with FAIL status"""
print(f"\n{'='*80}")
print("Students with FAIL AR status report")
print(f"{'='*80}")
print(f"Total students processed: {len(predelib_df)}")
print(f"Students with FAIL status: {len(students_with_fail_ar)}")
if students_with_fail_ar:
print(f"\nDetailed failed students list:")
print(f"{'ID':<10} {'Name':<25} {'Email':<30} {'SP':<15} {'Warning':<20}")
print(f"{'-'*10} {'-'*25} {'-'*30} {'-'*15} {'-'*20}")
for student in students_with_fail_ar:
name = f"{student['Achternaam']}, {student['Voornaam']}"
sp_info = f"{student['Totaal_aantal_SP']}/{student['Aantal_SP_vereist']}"
warning = str(student['Waarschuwing']) if student['Waarschuwing'] else "None"
print(f"{str(student['ID']):<10} {name[:25]:<25} {str(student['E-mail'])[:30]:<30} "
f"{sp_info:<15} {warning[:20]:<20}")
else:
print("\n✅ No students with FAIL status found!")
print(f"{'='*80}")
if __name__ == "__main__":
# Example usage - can be used for testing
logger.info("Starting failed students check script")
try:
from checkheaders import check_headers_predelibfile
# Read the Excel file
logger.info("Reading predelib Excel file")
try:
df_predelib = pd.read_excel('db.xlsx')
logger.info(f"Successfully loaded predelib file with shape: {df_predelib.shape}")
except FileNotFoundError:
logger.error("db.xlsx file not found")
raise
except Exception as e:
logger.error(f"Error reading db.xlsx: {e}")
raise
# Process the dataframe
logger.info("Processing predelib dataframe")
try:
processed_predelib_df = check_headers_predelibfile(df_predelib)
logger.info(f"Processed predelib dataframe shape: {processed_predelib_df.shape}")
except Exception as e:
logger.error(f"Error processing predelib file: {e}")
raise
# Check for failed students
logger.info("Checking for failed students")
try:
students_with_fail_ar = check_students_with_fail_adviesrapport(processed_predelib_df)
logger.info(f"Failed students check completed. Found {len(students_with_fail_ar)} failed students.")
# Print summary for console output
print_students_with_fail_ar_summary(students_with_fail_ar, processed_predelib_df)
except Exception as e:
logger.error(f"Error during failed students check: {e}")
raise
except ImportError as e:
logger.error(f"Import error: {e}")
print("Error: Could not import required modules. Make sure checkheaders.py is in the same directory.")
except Exception as e:
logger.error(f"Unexpected error in main execution: {e}")
print(f"An error occurred: {e}")
print("Check the log file 'predelib_processing.log' for detailed error information.")
finally:
logger.info("Failed students check script completed")

View File

@ -6,6 +6,7 @@ import os
from pathlib import Path
from checkheaders import check_headers_dashboard_inschrijvingenfile, check_headers_predelibfile
from process_predelib_file import check_students_with_fail_adviesrapport, print_students_with_fail_ar_summary
from compare_sp import compare_sp_values
# Configure logging
@ -99,7 +100,11 @@ def process_files(predelib_path: str, dashboard_path: str, verbose: bool = False
logger.info("Processing dashboard file headers")
processed_dashboard_df = check_headers_dashboard_inschrijvingenfile(df_dashboard)
# Check the predeliberation file for students with a fail in 'Adviesrapport code'
logger.info("Checking for students with FAIL status in predeliberation file")
students_with_fail = check_students_with_fail_adviesrapport(processed_predelib_df)
# Compare SP values
logger.info("Comparing SP values between files")
mismatches = compare_sp_values(processed_predelib_df, processed_dashboard_df)
@ -110,6 +115,8 @@ def process_files(predelib_path: str, dashboard_path: str, verbose: bool = False
'dashboard_file': dashboard_path,
'predelib_records': len(processed_predelib_df),
'dashboard_records': len(processed_dashboard_df),
'students_with_fail_count': len(students_with_fail),
'students_with_fail': students_with_fail,
'mismatches_count': len(mismatches),
'mismatches': mismatches,
'status': 'completed'
@ -144,12 +151,16 @@ def print_summary(results: dict):
print(f"Dashboard file: {results['dashboard_file']}")
print(f"Predelib records processed: {results['predelib_records']}")
print(f"Dashboard records processed: {results['dashboard_records']}")
print(f"Students with FAIL adviesrapport found: {results['students_with_fail_count']}")
print(f"Mismatches found: {results['mismatches_count']}")
if results['students_with_fail_count'] > 0:
print_students_with_fail_ar_summary(results['students_with_fail'], results['predelib_file'])
if results['mismatches']:
print(f"\nDetailed mismatches:")
print(f"\nDetailed mismatches between SP predeliberatierapport and Dashboard Inschrijvingen:")
for mismatch in results['mismatches']:
print(f" ID {mismatch['ID']}: Predelib={mismatch['Predelib_SP']}, Dashboard={mismatch['Dashboard_SP']}")
print(f"Mismatch - ID {mismatch['ID']} ({mismatch['Name']}): Predeliberatierapport SP={mismatch['Predelib_SP']}, Dashboard Inschrijvingen SP={mismatch['Dashboard_SP']}")
else:
print("\n✅ All SP values match perfectly!")