Add functionality to check for students with FAIL status in predeliberation file and log results

2025-07-29 16:18:52 +02:00 · 2025-07-29 16:18:52 +02:00 · 8236038f11
commit 8236038f11
parent c5d356b366
4 changed files with 247 additions and 6 deletions
--- a/.gitignore
+++ b/.gitignore
@ -8,4 +8,8 @@ __pycache__/
 # Ignore Excel files
 *.xlsx

-sisa_crawl/
+# Ignore log files
+*.log
+
+sisa_crawl/
+
--- a/startpakketten/compare_sp.py
+++ b/startpakketten/compare_sp.py
@ -107,6 +107,7 @@ def compare_sp_values(predelib_df: pd.DataFrame, dashboard_df: pd.DataFrame) ->
                
                predelib_sp = predelib_matches['Totaal aantal SP'].iloc[0]
                dashboard_sp = dashboard_matches['Ingeschr. SP (intern)'].iloc[0]
+                name_student = predelib_matches['Voornaam'].iloc[0] + ' ' + predelib_matches['Achternaam'].iloc[0]
                
                # Handle potential NaN values
                if pd.isna(predelib_sp) or pd.isna(dashboard_sp):
@ -126,8 +127,10 @@ def compare_sp_values(predelib_df: pd.DataFrame, dashboard_df: pd.DataFrame) ->
                if predelib_sp_num != dashboard_sp_num:
                    mismatch = {
                        'ID': id_val,
+                        'Name': name_student,
                        'Predelib_SP': predelib_sp,
-                        'Dashboard_SP': dashboard_sp
+                        'Dashboard_SP': dashboard_sp,
+                        
                    }
                    mismatches.append(mismatch)
                    logger.debug(f"Mismatch found for ID {id_val}: Predelib={predelib_sp}, Dashboard={dashboard_sp}")
@ -145,7 +148,7 @@ def compare_sp_values(predelib_df: pd.DataFrame, dashboard_df: pd.DataFrame) ->
        else:
            logger.warning(f"Found {len(mismatches)} mismatches")
            for mismatch in mismatches:
-                logger.info(f"Mismatch - ID {mismatch['ID']}: Predeliberatierapport SP={mismatch['Predelib_SP']}, Dashboard Inschrijvingen SP={mismatch['Dashboard_SP']}")
+                logger.info(f"Mismatch - ID {mismatch['ID']} ({mismatch['Name']}): Predeliberatierapport SP={mismatch['Predelib_SP']}, Dashboard Inschrijvingen SP={mismatch['Dashboard_SP']}")
        
        return mismatches
        
--- a/startpakketten/process_predelib_file.py
+++ b/startpakketten/process_predelib_file.py
@ -0,0 +1,223 @@
+import pandas as pd
+import logging
+from typing import List, Dict, Any, Optional
+
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+    handlers=[
+        logging.FileHandler('predelib_processing.log'),
+        logging.StreamHandler()
+    ]
+)
+
+logger = logging.getLogger(__name__)
+
+
+def check_students_with_fail_adviesrapport(predelib_df: pd.DataFrame) -> List[Dict[str, Any]]:
+    """
+    Check for students with 'FAIL' in 'Adviesrapport code' column and extract their details.
+    
+    Args:
+        predelib_df (pandas.DataFrame): Processed predeliberation dataframe
+    
+    Returns:
+        list: List of dictionaries containing failed student details
+        
+    Raises:
+        ValueError: If input dataframe is invalid
+        KeyError: If required columns are missing
+    """
+    logger.info("Starting failed students check")
+    
+    try:
+        # Validate input dataframe
+        if predelib_df is None or predelib_df.empty:
+            error_msg = "Predelib dataframe is None or empty"
+            logger.error(error_msg)
+            raise ValueError(error_msg)
+        
+        logger.info(f"Predelib dataframe shape: {predelib_df.shape}")
+        
+        # Define required columns
+        required_columns = [
+            'ID', 'Achternaam', 'Voornaam', 'E-mail', 
+            'Totaal aantal SP', 'Aantal SP vereist', 'Waarschuwing', 'Adviesrapport code'
+        ]
+        
+        # Check for required columns
+        missing_columns = [col for col in required_columns if col not in predelib_df.columns]
+        
+        if missing_columns:
+            error_msg = f"Missing required columns in predelib dataframe: {missing_columns}"
+            logger.error(error_msg)
+            logger.info(f"Available columns: {list(predelib_df.columns)}")
+            raise KeyError(error_msg)
+        
+        logger.info("All required columns found in dataframe")
+        
+        # Debug Adviesrapport code column
+        logger.debug(f"Adviesrapport code column type: {predelib_df['Adviesrapport code'].dtype}")
+        unique_codes = predelib_df['Adviesrapport code'].unique()
+        logger.debug(f"Unique Adviesrapport codes: {unique_codes}")
+        
+        # Filter for FAIL cases
+        try:
+            # Convert to string and check for FAIL (case-insensitive)
+            fail_mask = predelib_df['Adviesrapport code'].astype(str).str.upper() == 'FAIL'
+            students_with_fail_ar_df = predelib_df[fail_mask].copy()
+            
+            logger.info(f"Found {len(students_with_fail_ar_df)} students with FAIL status")
+            
+            # Remove duplicate rows (exact same values in all columns)
+            initial_count = len(students_with_fail_ar_df)
+            students_with_fail_ar_df = students_with_fail_ar_df.drop_duplicates()
+            final_count = len(students_with_fail_ar_df)
+            
+            duplicates_removed = initial_count - final_count
+            if duplicates_removed > 0:
+                logger.info(f"Removed {duplicates_removed} duplicate rows")
+            else:
+                logger.info("No duplicate rows found")
+            
+            logger.info(f"Final count after duplicate removal: {final_count} students with FAIL status")
+            
+        except Exception as e:
+            error_msg = f"Error filtering for FAIL status: {e}"
+            logger.error(error_msg)
+            raise ValueError(error_msg)
+        
+        if len(students_with_fail_ar_df) == 0:
+            logger.info("No students with FAIL status found")
+            return []
+        
+        # Extract details for failed students
+        students_with_fail_ar = []
+        processed_count = 0
+        
+        for index, row in students_with_fail_ar_df.iterrows():
+            try:
+                # Extract student details
+                student_details = {
+                    'ID': row['ID'],
+                    'Achternaam': row['Achternaam'],
+                    'Voornaam': row['Voornaam'],
+                    'E-mail': row['E-mail'],
+                    'Totaal_aantal_SP': row['Totaal aantal SP'],
+                    'Aantal_SP_vereist': row['Aantal SP vereist'],
+                    'Waarschuwing': row['Waarschuwing'],
+                    'Adviesrapport_code': row['Adviesrapport code']
+                }
+                
+                # Handle potential NaN values
+                for key, value in student_details.items():
+                    if pd.isna(value):
+                        student_details[key] = None
+                        logger.warning(f"NaN value found for {key} in student ID: {row['ID']}")
+                
+                students_with_fail_ar.append(student_details)
+                processed_count += 1
+                
+                logger.debug(f"Processed failed student: ID={row['ID']}, "
+                           f"Name={row['Achternaam']}, {row['Voornaam']}")
+                
+            except Exception as e:
+                logger.error(f"Error processing student at index {index}: {e}")
+                continue
+        
+        logger.info(f"Successfully processed {processed_count} failed students")
+        
+        # Log summary
+        if students_with_fail_ar:
+            logger.warning(f"Found {len(students_with_fail_ar)} students with FAIL status")
+            for student in students_with_fail_ar:
+                logger.info(f"Failed student - ID: {student['ID']}, "
+                          f"Name: {student['Achternaam']}, {student['Voornaam']}, "
+                          f"SP: {student['Totaal_aantal_SP']}/{student['Aantal_SP_vereist']}")
+        else:
+            logger.info("No failed students found")
+        
+        return students_with_fail_ar
+        
+    except Exception as e:
+        logger.error(f"Unexpected error in check_students_with_fail_ar: {e}")
+        raise
+
+
+def print_students_with_fail_ar_summary(students_with_fail_ar: List[Dict[str, Any]], predelib_df: pd.DataFrame):
+    """Print a formatted summary of students with FAIL status"""
+    print(f"\n{'='*80}")
+    print("Students with FAIL AR status report")
+    print(f"{'='*80}")
+    print(f"Total students processed: {len(predelib_df)}")
+    print(f"Students with FAIL status: {len(students_with_fail_ar)}")
+    
+    if students_with_fail_ar:
+        print(f"\nDetailed failed students list:")
+        print(f"{'ID':<10} {'Name':<25} {'Email':<30} {'SP':<15} {'Warning':<20}")
+        print(f"{'-'*10} {'-'*25} {'-'*30} {'-'*15} {'-'*20}")
+        
+        for student in students_with_fail_ar:
+            name = f"{student['Achternaam']}, {student['Voornaam']}"
+            sp_info = f"{student['Totaal_aantal_SP']}/{student['Aantal_SP_vereist']}"
+            warning = str(student['Waarschuwing']) if student['Waarschuwing'] else "None"
+            
+            print(f"{str(student['ID']):<10} {name[:25]:<25} {str(student['E-mail'])[:30]:<30} "
+                  f"{sp_info:<15} {warning[:20]:<20}")
+    else:
+        print("\n✅ No students with FAIL status found!")
+    
+    print(f"{'='*80}")
+
+
+if __name__ == "__main__":
+    # Example usage - can be used for testing
+    logger.info("Starting failed students check script")
+    
+    try:
+        from checkheaders import check_headers_predelibfile
+        
+        # Read the Excel file
+        logger.info("Reading predelib Excel file")
+        try:
+            df_predelib = pd.read_excel('db.xlsx')
+            logger.info(f"Successfully loaded predelib file with shape: {df_predelib.shape}")
+        except FileNotFoundError:
+            logger.error("db.xlsx file not found")
+            raise
+        except Exception as e:
+            logger.error(f"Error reading db.xlsx: {e}")
+            raise
+        
+        # Process the dataframe
+        logger.info("Processing predelib dataframe")
+        try:
+            processed_predelib_df = check_headers_predelibfile(df_predelib)
+            logger.info(f"Processed predelib dataframe shape: {processed_predelib_df.shape}")
+        except Exception as e:
+            logger.error(f"Error processing predelib file: {e}")
+            raise
+        
+        # Check for failed students
+        logger.info("Checking for failed students")
+        try:
+            students_with_fail_ar = check_students_with_fail_adviesrapport(processed_predelib_df)
+            logger.info(f"Failed students check completed. Found {len(students_with_fail_ar)} failed students.")
+            
+            # Print summary for console output
+            print_students_with_fail_ar_summary(students_with_fail_ar, processed_predelib_df)
+            
+        except Exception as e:
+            logger.error(f"Error during failed students check: {e}")
+            raise
+            
+    except ImportError as e:
+        logger.error(f"Import error: {e}")
+        print("Error: Could not import required modules. Make sure checkheaders.py is in the same directory.")
+    except Exception as e:
+        logger.error(f"Unexpected error in main execution: {e}")
+        print(f"An error occurred: {e}")
+        print("Check the log file 'predelib_processing.log' for detailed error information.")
+    finally:
+        logger.info("Failed students check script completed")
--- a/startpakketten/script.py
+++ b/startpakketten/script.py
@ -6,6 +6,7 @@ import os
 from pathlib import Path

 from checkheaders import check_headers_dashboard_inschrijvingenfile, check_headers_predelibfile
+from process_predelib_file import check_students_with_fail_adviesrapport, print_students_with_fail_ar_summary
 from compare_sp import compare_sp_values

 # Configure logging
@ -99,7 +100,11 @@ def process_files(predelib_path: str, dashboard_path: str, verbose: bool = False
        
        logger.info("Processing dashboard file headers")
        processed_dashboard_df = check_headers_dashboard_inschrijvingenfile(df_dashboard)
-        
+
+        # Check the predeliberation file for students with a fail in 'Adviesrapport code'
+        logger.info("Checking for students with FAIL status in predeliberation file")
+        students_with_fail = check_students_with_fail_adviesrapport(processed_predelib_df)
+
        # Compare SP values
        logger.info("Comparing SP values between files")
        mismatches = compare_sp_values(processed_predelib_df, processed_dashboard_df)
@ -110,6 +115,8 @@ def process_files(predelib_path: str, dashboard_path: str, verbose: bool = False
            'dashboard_file': dashboard_path,
            'predelib_records': len(processed_predelib_df),
            'dashboard_records': len(processed_dashboard_df),
+            'students_with_fail_count': len(students_with_fail),
+            'students_with_fail': students_with_fail,
            'mismatches_count': len(mismatches),
            'mismatches': mismatches,
            'status': 'completed'
@ -144,12 +151,16 @@ def print_summary(results: dict):
    print(f"Dashboard file: {results['dashboard_file']}")
    print(f"Predelib records processed: {results['predelib_records']}")
    print(f"Dashboard records processed: {results['dashboard_records']}")
+    print(f"Students with FAIL adviesrapport found: {results['students_with_fail_count']}")
    print(f"Mismatches found: {results['mismatches_count']}")
    
+    if results['students_with_fail_count'] > 0:
+       print_students_with_fail_ar_summary(results['students_with_fail'], results['predelib_file'])
+
    if results['mismatches']:
-        print(f"\nDetailed mismatches:")
+        print(f"\nDetailed mismatches between SP predeliberatierapport and Dashboard Inschrijvingen:")
        for mismatch in results['mismatches']:
-            print(f"  ID {mismatch['ID']}: Predelib={mismatch['Predelib_SP']}, Dashboard={mismatch['Dashboard_SP']}")
+            print(f"Mismatch - ID {mismatch['ID']} ({mismatch['Name']}): Predeliberatierapport SP={mismatch['Predelib_SP']}, Dashboard Inschrijvingen SP={mismatch['Dashboard_SP']}")
    else:
        print("\n✅ All SP values match perfectly!")