223 lines
9.0 KiB
Python
223 lines
9.0 KiB
Python
import pandas as pd
|
|
import logging
|
|
from typing import List, Dict, Any, Optional
|
|
|
|
# Configure logging
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
|
handlers=[
|
|
logging.FileHandler('predelib_processing.log'),
|
|
logging.StreamHandler()
|
|
]
|
|
)
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def check_students_with_fail_adviesrapport(predelib_df: pd.DataFrame) -> List[Dict[str, Any]]:
|
|
"""
|
|
Check for students with 'FAIL' in 'Adviesrapport code' column and extract their details.
|
|
|
|
Args:
|
|
predelib_df (pandas.DataFrame): Processed predeliberation dataframe
|
|
|
|
Returns:
|
|
list: List of dictionaries containing failed student details
|
|
|
|
Raises:
|
|
ValueError: If input dataframe is invalid
|
|
KeyError: If required columns are missing
|
|
"""
|
|
logger.info("Starting failed students check")
|
|
|
|
try:
|
|
# Validate input dataframe
|
|
if predelib_df is None or predelib_df.empty:
|
|
error_msg = "Predelib dataframe is None or empty"
|
|
logger.error(error_msg)
|
|
raise ValueError(error_msg)
|
|
|
|
logger.info(f"Predelib dataframe shape: {predelib_df.shape}")
|
|
|
|
# Define required columns
|
|
required_columns = [
|
|
'ID', 'Achternaam', 'Voornaam', 'E-mail',
|
|
'Totaal aantal SP', 'Aantal SP vereist', 'Waarschuwing', 'Adviesrapport code'
|
|
]
|
|
|
|
# Check for required columns
|
|
missing_columns = [col for col in required_columns if col not in predelib_df.columns]
|
|
|
|
if missing_columns:
|
|
error_msg = f"Missing required columns in predelib dataframe: {missing_columns}"
|
|
logger.error(error_msg)
|
|
logger.info(f"Available columns: {list(predelib_df.columns)}")
|
|
raise KeyError(error_msg)
|
|
|
|
logger.info("All required columns found in dataframe")
|
|
|
|
# Debug Adviesrapport code column
|
|
logger.debug(f"Adviesrapport code column type: {predelib_df['Adviesrapport code'].dtype}")
|
|
unique_codes = predelib_df['Adviesrapport code'].unique()
|
|
logger.debug(f"Unique Adviesrapport codes: {unique_codes}")
|
|
|
|
# Filter for FAIL cases
|
|
try:
|
|
# Convert to string and check for FAIL (case-insensitive)
|
|
fail_mask = predelib_df['Adviesrapport code'].astype(str).str.upper() == 'FAIL'
|
|
students_with_fail_ar_df = predelib_df[fail_mask].copy()
|
|
|
|
logger.info(f"Found {len(students_with_fail_ar_df)} students with FAIL status")
|
|
|
|
# Remove duplicate rows (exact same values in all columns)
|
|
initial_count = len(students_with_fail_ar_df)
|
|
students_with_fail_ar_df = students_with_fail_ar_df.drop_duplicates()
|
|
final_count = len(students_with_fail_ar_df)
|
|
|
|
duplicates_removed = initial_count - final_count
|
|
if duplicates_removed > 0:
|
|
logger.info(f"Removed {duplicates_removed} duplicate rows")
|
|
else:
|
|
logger.info("No duplicate rows found")
|
|
|
|
logger.info(f"Final count after duplicate removal: {final_count} students with FAIL status")
|
|
|
|
except Exception as e:
|
|
error_msg = f"Error filtering for FAIL status: {e}"
|
|
logger.error(error_msg)
|
|
raise ValueError(error_msg)
|
|
|
|
if len(students_with_fail_ar_df) == 0:
|
|
logger.info("No students with FAIL status found")
|
|
return []
|
|
|
|
# Extract details for failed students
|
|
students_with_fail_ar = []
|
|
processed_count = 0
|
|
|
|
for index, row in students_with_fail_ar_df.iterrows():
|
|
try:
|
|
# Extract student details
|
|
student_details = {
|
|
'ID': row['ID'],
|
|
'Achternaam': row['Achternaam'],
|
|
'Voornaam': row['Voornaam'],
|
|
'E-mail': row['E-mail'],
|
|
'Totaal_aantal_SP': row['Totaal aantal SP'],
|
|
'Aantal_SP_vereist': row['Aantal SP vereist'],
|
|
'Waarschuwing': row['Waarschuwing'],
|
|
'Adviesrapport_code': row['Adviesrapport code']
|
|
}
|
|
|
|
# Handle potential NaN values
|
|
for key, value in student_details.items():
|
|
if pd.isna(value):
|
|
student_details[key] = None
|
|
logger.warning(f"NaN value found for {key} in student ID: {row['ID']}")
|
|
|
|
students_with_fail_ar.append(student_details)
|
|
processed_count += 1
|
|
|
|
logger.debug(f"Processed failed student: ID={row['ID']}, "
|
|
f"Name={row['Achternaam']}, {row['Voornaam']}")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error processing student at index {index}: {e}")
|
|
continue
|
|
|
|
logger.info(f"Successfully processed {processed_count} failed students")
|
|
|
|
# Log summary
|
|
if students_with_fail_ar:
|
|
logger.warning(f"Found {len(students_with_fail_ar)} students with FAIL status")
|
|
for student in students_with_fail_ar:
|
|
logger.info(f"Failed student - ID: {student['ID']}, "
|
|
f"Name: {student['Achternaam']}, {student['Voornaam']}, "
|
|
f"SP: {student['Totaal_aantal_SP']}/{student['Aantal_SP_vereist']}")
|
|
else:
|
|
logger.info("No failed students found")
|
|
|
|
return students_with_fail_ar
|
|
|
|
except Exception as e:
|
|
logger.error(f"Unexpected error in check_students_with_fail_ar: {e}")
|
|
raise
|
|
|
|
|
|
def print_students_with_fail_ar_summary(students_with_fail_ar: List[Dict[str, Any]], predelib_df: pd.DataFrame):
|
|
"""Print a formatted summary of students with FAIL status"""
|
|
print(f"\n{'='*80}")
|
|
print("Students with FAIL AR status report")
|
|
print(f"{'='*80}")
|
|
print(f"Total students processed: {len(predelib_df)}")
|
|
print(f"Students with FAIL status: {len(students_with_fail_ar)}")
|
|
|
|
if students_with_fail_ar:
|
|
print(f"\nDetailed failed students list:")
|
|
print(f"{'ID':<10} {'Name':<25} {'Email':<30} {'SP':<15} {'Warning':<20}")
|
|
print(f"{'-'*10} {'-'*25} {'-'*30} {'-'*15} {'-'*20}")
|
|
|
|
for student in students_with_fail_ar:
|
|
name = f"{student['Achternaam']}, {student['Voornaam']}"
|
|
sp_info = f"{student['Totaal_aantal_SP']}/{student['Aantal_SP_vereist']}"
|
|
warning = str(student['Waarschuwing']) if student['Waarschuwing'] else "None"
|
|
|
|
print(f"{str(student['ID']):<10} {name[:25]:<25} {str(student['E-mail'])[:30]:<30} "
|
|
f"{sp_info:<15} {warning[:20]:<20}")
|
|
else:
|
|
print("\n✅ No students with FAIL status found!")
|
|
|
|
print(f"{'='*80}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# Example usage - can be used for testing
|
|
logger.info("Starting failed students check script")
|
|
|
|
try:
|
|
from checkheaders import check_headers_predelibfile
|
|
|
|
# Read the Excel file
|
|
logger.info("Reading predelib Excel file")
|
|
try:
|
|
df_predelib = pd.read_excel('db.xlsx')
|
|
logger.info(f"Successfully loaded predelib file with shape: {df_predelib.shape}")
|
|
except FileNotFoundError:
|
|
logger.error("db.xlsx file not found")
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Error reading db.xlsx: {e}")
|
|
raise
|
|
|
|
# Process the dataframe
|
|
logger.info("Processing predelib dataframe")
|
|
try:
|
|
processed_predelib_df = check_headers_predelibfile(df_predelib)
|
|
logger.info(f"Processed predelib dataframe shape: {processed_predelib_df.shape}")
|
|
except Exception as e:
|
|
logger.error(f"Error processing predelib file: {e}")
|
|
raise
|
|
|
|
# Check for failed students
|
|
logger.info("Checking for failed students")
|
|
try:
|
|
students_with_fail_ar = check_students_with_fail_adviesrapport(processed_predelib_df)
|
|
logger.info(f"Failed students check completed. Found {len(students_with_fail_ar)} failed students.")
|
|
|
|
# Print summary for console output
|
|
print_students_with_fail_ar_summary(students_with_fail_ar, processed_predelib_df)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error during failed students check: {e}")
|
|
raise
|
|
|
|
except ImportError as e:
|
|
logger.error(f"Import error: {e}")
|
|
print("Error: Could not import required modules. Make sure checkheaders.py is in the same directory.")
|
|
except Exception as e:
|
|
logger.error(f"Unexpected error in main execution: {e}")
|
|
print(f"An error occurred: {e}")
|
|
print("Check the log file 'predelib_processing.log' for detailed error information.")
|
|
finally:
|
|
logger.info("Failed students check script completed") |