import pandas as pd import logging from typing import List, Dict, Any, Optional # Configure logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', handlers=[ logging.FileHandler('predelib_processing.log'), logging.StreamHandler() ] ) logger = logging.getLogger(__name__) def check_students_with_fail_adviesrapport(predelib_df: pd.DataFrame) -> List[Dict[str, Any]]: """ Check for students with 'FAIL' in 'Adviesrapport code' column and extract their details. Args: predelib_df (pandas.DataFrame): Processed predeliberation dataframe Returns: list: List of dictionaries containing failed student details Raises: ValueError: If input dataframe is invalid KeyError: If required columns are missing """ logger.info("Starting failed students check") try: # Validate input dataframe if predelib_df is None or predelib_df.empty: error_msg = "Predelib dataframe is None or empty" logger.error(error_msg) raise ValueError(error_msg) logger.info(f"Predelib dataframe shape: {predelib_df.shape}") # Define required columns required_columns = [ 'ID', 'Achternaam', 'Voornaam', 'E-mail', 'Totaal aantal SP', 'Aantal SP vereist', 'Waarschuwing', 'Adviesrapport code' ] # Check for required columns missing_columns = [col for col in required_columns if col not in predelib_df.columns] if missing_columns: error_msg = f"Missing required columns in predelib dataframe: {missing_columns}" logger.error(error_msg) logger.info(f"Available columns: {list(predelib_df.columns)}") raise KeyError(error_msg) logger.info("All required columns found in dataframe") # Debug Adviesrapport code column logger.debug(f"Adviesrapport code column type: {predelib_df['Adviesrapport code'].dtype}") unique_codes = predelib_df['Adviesrapport code'].unique() logger.debug(f"Unique Adviesrapport codes: {unique_codes}") # Filter for FAIL cases try: # Convert to string and check for FAIL (case-insensitive) fail_mask = predelib_df['Adviesrapport code'].astype(str).str.upper() == 'FAIL' students_with_fail_ar_df = predelib_df[fail_mask].copy() logger.info(f"Found {len(students_with_fail_ar_df)} students with FAIL status") # Remove duplicate rows (exact same values in all columns) initial_count = len(students_with_fail_ar_df) students_with_fail_ar_df = students_with_fail_ar_df.drop_duplicates() final_count = len(students_with_fail_ar_df) duplicates_removed = initial_count - final_count if duplicates_removed > 0: logger.info(f"Removed {duplicates_removed} duplicate rows") else: logger.info("No duplicate rows found") logger.info(f"Final count after duplicate removal: {final_count} students with FAIL status") except Exception as e: error_msg = f"Error filtering for FAIL status: {e}" logger.error(error_msg) raise ValueError(error_msg) if len(students_with_fail_ar_df) == 0: logger.info("No students with FAIL status found") return [] # Extract details for failed students students_with_fail_ar = [] processed_count = 0 for index, row in students_with_fail_ar_df.iterrows(): try: # Extract student details student_details = { 'ID': row['ID'], 'Achternaam': row['Achternaam'], 'Voornaam': row['Voornaam'], 'E-mail': row['E-mail'], 'Totaal_aantal_SP': row['Totaal aantal SP'], 'Aantal_SP_vereist': row['Aantal SP vereist'], 'Waarschuwing': row['Waarschuwing'], 'Adviesrapport_code': row['Adviesrapport code'] } # Handle potential NaN values for key, value in student_details.items(): if pd.isna(value): student_details[key] = None logger.warning(f"NaN value found for {key} in student ID: {row['ID']}") students_with_fail_ar.append(student_details) processed_count += 1 logger.debug(f"Processed failed student: ID={row['ID']}, " f"Name={row['Achternaam']}, {row['Voornaam']}") except Exception as e: logger.error(f"Error processing student at index {index}: {e}") continue logger.info(f"Successfully processed {processed_count} failed students") # Log summary if students_with_fail_ar: logger.warning(f"Found {len(students_with_fail_ar)} students with FAIL status") for student in students_with_fail_ar: logger.info(f"Failed student - ID: {student['ID']}, " f"Name: {student['Achternaam']}, {student['Voornaam']}, " f"SP: {student['Totaal_aantal_SP']}/{student['Aantal_SP_vereist']}") else: logger.info("No failed students found") return students_with_fail_ar except Exception as e: logger.error(f"Unexpected error in check_students_with_fail_ar: {e}") raise def print_students_with_fail_ar_summary(students_with_fail_ar: List[Dict[str, Any]], predelib_df: pd.DataFrame): """Print a formatted summary of students with FAIL status""" print(f"\n{'='*80}") print("Students with FAIL AR status report") print(f"{'='*80}") print(f"Total students processed: {len(predelib_df)}") print(f"Students with FAIL status: {len(students_with_fail_ar)}") if students_with_fail_ar: print(f"\nDetailed failed students list:") print(f"{'ID':<10} {'Name':<25} {'Email':<30} {'SP':<15} {'Warning':<20}") print(f"{'-'*10} {'-'*25} {'-'*30} {'-'*15} {'-'*20}") for student in students_with_fail_ar: name = f"{student['Achternaam']}, {student['Voornaam']}" sp_info = f"{student['Totaal_aantal_SP']}/{student['Aantal_SP_vereist']}" warning = str(student['Waarschuwing']) if student['Waarschuwing'] else "None" print(f"{str(student['ID']):<10} {name[:25]:<25} {str(student['E-mail'])[:30]:<30} " f"{sp_info:<15} {warning[:20]:<20}") else: print("\n✅ No students with FAIL status found!") print(f"{'='*80}") if __name__ == "__main__": # Example usage - can be used for testing logger.info("Starting failed students check script") try: from checkheaders import check_headers_predelibfile # Read the Excel file logger.info("Reading predelib Excel file") try: df_predelib = pd.read_excel('db.xlsx') logger.info(f"Successfully loaded predelib file with shape: {df_predelib.shape}") except FileNotFoundError: logger.error("db.xlsx file not found") raise except Exception as e: logger.error(f"Error reading db.xlsx: {e}") raise # Process the dataframe logger.info("Processing predelib dataframe") try: processed_predelib_df = check_headers_predelibfile(df_predelib) logger.info(f"Processed predelib dataframe shape: {processed_predelib_df.shape}") except Exception as e: logger.error(f"Error processing predelib file: {e}") raise # Check for failed students logger.info("Checking for failed students") try: students_with_fail_ar = check_students_with_fail_adviesrapport(processed_predelib_df) logger.info(f"Failed students check completed. Found {len(students_with_fail_ar)} failed students.") # Print summary for console output print_students_with_fail_ar_summary(students_with_fail_ar, processed_predelib_df) except Exception as e: logger.error(f"Error during failed students check: {e}") raise except ImportError as e: logger.error(f"Import error: {e}") print("Error: Could not import required modules. Make sure checkheaders.py is in the same directory.") except Exception as e: logger.error(f"Unexpected error in main execution: {e}") print(f"An error occurred: {e}") print("Check the log file 'predelib_processing.log' for detailed error information.") finally: logger.info("Failed students check script completed")