Add functionality to check for students with FAIL status in predeliberation file and log results
This commit is contained in:
parent
c5d356b366
commit
8236038f11
6
.gitignore
vendored
6
.gitignore
vendored
|
@ -8,4 +8,8 @@ __pycache__/
|
|||
# Ignore Excel files
|
||||
*.xlsx
|
||||
|
||||
sisa_crawl/
|
||||
# Ignore log files
|
||||
*.log
|
||||
|
||||
sisa_crawl/
|
||||
|
||||
|
|
|
@ -107,6 +107,7 @@ def compare_sp_values(predelib_df: pd.DataFrame, dashboard_df: pd.DataFrame) ->
|
|||
|
||||
predelib_sp = predelib_matches['Totaal aantal SP'].iloc[0]
|
||||
dashboard_sp = dashboard_matches['Ingeschr. SP (intern)'].iloc[0]
|
||||
name_student = predelib_matches['Voornaam'].iloc[0] + ' ' + predelib_matches['Achternaam'].iloc[0]
|
||||
|
||||
# Handle potential NaN values
|
||||
if pd.isna(predelib_sp) or pd.isna(dashboard_sp):
|
||||
|
@ -126,8 +127,10 @@ def compare_sp_values(predelib_df: pd.DataFrame, dashboard_df: pd.DataFrame) ->
|
|||
if predelib_sp_num != dashboard_sp_num:
|
||||
mismatch = {
|
||||
'ID': id_val,
|
||||
'Name': name_student,
|
||||
'Predelib_SP': predelib_sp,
|
||||
'Dashboard_SP': dashboard_sp
|
||||
'Dashboard_SP': dashboard_sp,
|
||||
|
||||
}
|
||||
mismatches.append(mismatch)
|
||||
logger.debug(f"Mismatch found for ID {id_val}: Predelib={predelib_sp}, Dashboard={dashboard_sp}")
|
||||
|
@ -145,7 +148,7 @@ def compare_sp_values(predelib_df: pd.DataFrame, dashboard_df: pd.DataFrame) ->
|
|||
else:
|
||||
logger.warning(f"Found {len(mismatches)} mismatches")
|
||||
for mismatch in mismatches:
|
||||
logger.info(f"Mismatch - ID {mismatch['ID']}: Predeliberatierapport SP={mismatch['Predelib_SP']}, Dashboard Inschrijvingen SP={mismatch['Dashboard_SP']}")
|
||||
logger.info(f"Mismatch - ID {mismatch['ID']} ({mismatch['Name']}): Predeliberatierapport SP={mismatch['Predelib_SP']}, Dashboard Inschrijvingen SP={mismatch['Dashboard_SP']}")
|
||||
|
||||
return mismatches
|
||||
|
||||
|
|
223
startpakketten/process_predelib_file.py
Normal file
223
startpakketten/process_predelib_file.py
Normal file
|
@ -0,0 +1,223 @@
|
|||
import pandas as pd
|
||||
import logging
|
||||
from typing import List, Dict, Any, Optional
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||||
handlers=[
|
||||
logging.FileHandler('predelib_processing.log'),
|
||||
logging.StreamHandler()
|
||||
]
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def check_students_with_fail_adviesrapport(predelib_df: pd.DataFrame) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Check for students with 'FAIL' in 'Adviesrapport code' column and extract their details.
|
||||
|
||||
Args:
|
||||
predelib_df (pandas.DataFrame): Processed predeliberation dataframe
|
||||
|
||||
Returns:
|
||||
list: List of dictionaries containing failed student details
|
||||
|
||||
Raises:
|
||||
ValueError: If input dataframe is invalid
|
||||
KeyError: If required columns are missing
|
||||
"""
|
||||
logger.info("Starting failed students check")
|
||||
|
||||
try:
|
||||
# Validate input dataframe
|
||||
if predelib_df is None or predelib_df.empty:
|
||||
error_msg = "Predelib dataframe is None or empty"
|
||||
logger.error(error_msg)
|
||||
raise ValueError(error_msg)
|
||||
|
||||
logger.info(f"Predelib dataframe shape: {predelib_df.shape}")
|
||||
|
||||
# Define required columns
|
||||
required_columns = [
|
||||
'ID', 'Achternaam', 'Voornaam', 'E-mail',
|
||||
'Totaal aantal SP', 'Aantal SP vereist', 'Waarschuwing', 'Adviesrapport code'
|
||||
]
|
||||
|
||||
# Check for required columns
|
||||
missing_columns = [col for col in required_columns if col not in predelib_df.columns]
|
||||
|
||||
if missing_columns:
|
||||
error_msg = f"Missing required columns in predelib dataframe: {missing_columns}"
|
||||
logger.error(error_msg)
|
||||
logger.info(f"Available columns: {list(predelib_df.columns)}")
|
||||
raise KeyError(error_msg)
|
||||
|
||||
logger.info("All required columns found in dataframe")
|
||||
|
||||
# Debug Adviesrapport code column
|
||||
logger.debug(f"Adviesrapport code column type: {predelib_df['Adviesrapport code'].dtype}")
|
||||
unique_codes = predelib_df['Adviesrapport code'].unique()
|
||||
logger.debug(f"Unique Adviesrapport codes: {unique_codes}")
|
||||
|
||||
# Filter for FAIL cases
|
||||
try:
|
||||
# Convert to string and check for FAIL (case-insensitive)
|
||||
fail_mask = predelib_df['Adviesrapport code'].astype(str).str.upper() == 'FAIL'
|
||||
students_with_fail_ar_df = predelib_df[fail_mask].copy()
|
||||
|
||||
logger.info(f"Found {len(students_with_fail_ar_df)} students with FAIL status")
|
||||
|
||||
# Remove duplicate rows (exact same values in all columns)
|
||||
initial_count = len(students_with_fail_ar_df)
|
||||
students_with_fail_ar_df = students_with_fail_ar_df.drop_duplicates()
|
||||
final_count = len(students_with_fail_ar_df)
|
||||
|
||||
duplicates_removed = initial_count - final_count
|
||||
if duplicates_removed > 0:
|
||||
logger.info(f"Removed {duplicates_removed} duplicate rows")
|
||||
else:
|
||||
logger.info("No duplicate rows found")
|
||||
|
||||
logger.info(f"Final count after duplicate removal: {final_count} students with FAIL status")
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Error filtering for FAIL status: {e}"
|
||||
logger.error(error_msg)
|
||||
raise ValueError(error_msg)
|
||||
|
||||
if len(students_with_fail_ar_df) == 0:
|
||||
logger.info("No students with FAIL status found")
|
||||
return []
|
||||
|
||||
# Extract details for failed students
|
||||
students_with_fail_ar = []
|
||||
processed_count = 0
|
||||
|
||||
for index, row in students_with_fail_ar_df.iterrows():
|
||||
try:
|
||||
# Extract student details
|
||||
student_details = {
|
||||
'ID': row['ID'],
|
||||
'Achternaam': row['Achternaam'],
|
||||
'Voornaam': row['Voornaam'],
|
||||
'E-mail': row['E-mail'],
|
||||
'Totaal_aantal_SP': row['Totaal aantal SP'],
|
||||
'Aantal_SP_vereist': row['Aantal SP vereist'],
|
||||
'Waarschuwing': row['Waarschuwing'],
|
||||
'Adviesrapport_code': row['Adviesrapport code']
|
||||
}
|
||||
|
||||
# Handle potential NaN values
|
||||
for key, value in student_details.items():
|
||||
if pd.isna(value):
|
||||
student_details[key] = None
|
||||
logger.warning(f"NaN value found for {key} in student ID: {row['ID']}")
|
||||
|
||||
students_with_fail_ar.append(student_details)
|
||||
processed_count += 1
|
||||
|
||||
logger.debug(f"Processed failed student: ID={row['ID']}, "
|
||||
f"Name={row['Achternaam']}, {row['Voornaam']}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing student at index {index}: {e}")
|
||||
continue
|
||||
|
||||
logger.info(f"Successfully processed {processed_count} failed students")
|
||||
|
||||
# Log summary
|
||||
if students_with_fail_ar:
|
||||
logger.warning(f"Found {len(students_with_fail_ar)} students with FAIL status")
|
||||
for student in students_with_fail_ar:
|
||||
logger.info(f"Failed student - ID: {student['ID']}, "
|
||||
f"Name: {student['Achternaam']}, {student['Voornaam']}, "
|
||||
f"SP: {student['Totaal_aantal_SP']}/{student['Aantal_SP_vereist']}")
|
||||
else:
|
||||
logger.info("No failed students found")
|
||||
|
||||
return students_with_fail_ar
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error in check_students_with_fail_ar: {e}")
|
||||
raise
|
||||
|
||||
|
||||
def print_students_with_fail_ar_summary(students_with_fail_ar: List[Dict[str, Any]], predelib_df: pd.DataFrame):
|
||||
"""Print a formatted summary of students with FAIL status"""
|
||||
print(f"\n{'='*80}")
|
||||
print("Students with FAIL AR status report")
|
||||
print(f"{'='*80}")
|
||||
print(f"Total students processed: {len(predelib_df)}")
|
||||
print(f"Students with FAIL status: {len(students_with_fail_ar)}")
|
||||
|
||||
if students_with_fail_ar:
|
||||
print(f"\nDetailed failed students list:")
|
||||
print(f"{'ID':<10} {'Name':<25} {'Email':<30} {'SP':<15} {'Warning':<20}")
|
||||
print(f"{'-'*10} {'-'*25} {'-'*30} {'-'*15} {'-'*20}")
|
||||
|
||||
for student in students_with_fail_ar:
|
||||
name = f"{student['Achternaam']}, {student['Voornaam']}"
|
||||
sp_info = f"{student['Totaal_aantal_SP']}/{student['Aantal_SP_vereist']}"
|
||||
warning = str(student['Waarschuwing']) if student['Waarschuwing'] else "None"
|
||||
|
||||
print(f"{str(student['ID']):<10} {name[:25]:<25} {str(student['E-mail'])[:30]:<30} "
|
||||
f"{sp_info:<15} {warning[:20]:<20}")
|
||||
else:
|
||||
print("\n✅ No students with FAIL status found!")
|
||||
|
||||
print(f"{'='*80}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Example usage - can be used for testing
|
||||
logger.info("Starting failed students check script")
|
||||
|
||||
try:
|
||||
from checkheaders import check_headers_predelibfile
|
||||
|
||||
# Read the Excel file
|
||||
logger.info("Reading predelib Excel file")
|
||||
try:
|
||||
df_predelib = pd.read_excel('db.xlsx')
|
||||
logger.info(f"Successfully loaded predelib file with shape: {df_predelib.shape}")
|
||||
except FileNotFoundError:
|
||||
logger.error("db.xlsx file not found")
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error reading db.xlsx: {e}")
|
||||
raise
|
||||
|
||||
# Process the dataframe
|
||||
logger.info("Processing predelib dataframe")
|
||||
try:
|
||||
processed_predelib_df = check_headers_predelibfile(df_predelib)
|
||||
logger.info(f"Processed predelib dataframe shape: {processed_predelib_df.shape}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing predelib file: {e}")
|
||||
raise
|
||||
|
||||
# Check for failed students
|
||||
logger.info("Checking for failed students")
|
||||
try:
|
||||
students_with_fail_ar = check_students_with_fail_adviesrapport(processed_predelib_df)
|
||||
logger.info(f"Failed students check completed. Found {len(students_with_fail_ar)} failed students.")
|
||||
|
||||
# Print summary for console output
|
||||
print_students_with_fail_ar_summary(students_with_fail_ar, processed_predelib_df)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error during failed students check: {e}")
|
||||
raise
|
||||
|
||||
except ImportError as e:
|
||||
logger.error(f"Import error: {e}")
|
||||
print("Error: Could not import required modules. Make sure checkheaders.py is in the same directory.")
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error in main execution: {e}")
|
||||
print(f"An error occurred: {e}")
|
||||
print("Check the log file 'predelib_processing.log' for detailed error information.")
|
||||
finally:
|
||||
logger.info("Failed students check script completed")
|
|
@ -6,6 +6,7 @@ import os
|
|||
from pathlib import Path
|
||||
|
||||
from checkheaders import check_headers_dashboard_inschrijvingenfile, check_headers_predelibfile
|
||||
from process_predelib_file import check_students_with_fail_adviesrapport, print_students_with_fail_ar_summary
|
||||
from compare_sp import compare_sp_values
|
||||
|
||||
# Configure logging
|
||||
|
@ -99,7 +100,11 @@ def process_files(predelib_path: str, dashboard_path: str, verbose: bool = False
|
|||
|
||||
logger.info("Processing dashboard file headers")
|
||||
processed_dashboard_df = check_headers_dashboard_inschrijvingenfile(df_dashboard)
|
||||
|
||||
|
||||
# Check the predeliberation file for students with a fail in 'Adviesrapport code'
|
||||
logger.info("Checking for students with FAIL status in predeliberation file")
|
||||
students_with_fail = check_students_with_fail_adviesrapport(processed_predelib_df)
|
||||
|
||||
# Compare SP values
|
||||
logger.info("Comparing SP values between files")
|
||||
mismatches = compare_sp_values(processed_predelib_df, processed_dashboard_df)
|
||||
|
@ -110,6 +115,8 @@ def process_files(predelib_path: str, dashboard_path: str, verbose: bool = False
|
|||
'dashboard_file': dashboard_path,
|
||||
'predelib_records': len(processed_predelib_df),
|
||||
'dashboard_records': len(processed_dashboard_df),
|
||||
'students_with_fail_count': len(students_with_fail),
|
||||
'students_with_fail': students_with_fail,
|
||||
'mismatches_count': len(mismatches),
|
||||
'mismatches': mismatches,
|
||||
'status': 'completed'
|
||||
|
@ -144,12 +151,16 @@ def print_summary(results: dict):
|
|||
print(f"Dashboard file: {results['dashboard_file']}")
|
||||
print(f"Predelib records processed: {results['predelib_records']}")
|
||||
print(f"Dashboard records processed: {results['dashboard_records']}")
|
||||
print(f"Students with FAIL adviesrapport found: {results['students_with_fail_count']}")
|
||||
print(f"Mismatches found: {results['mismatches_count']}")
|
||||
|
||||
if results['students_with_fail_count'] > 0:
|
||||
print_students_with_fail_ar_summary(results['students_with_fail'], results['predelib_file'])
|
||||
|
||||
if results['mismatches']:
|
||||
print(f"\nDetailed mismatches:")
|
||||
print(f"\nDetailed mismatches between SP predeliberatierapport and Dashboard Inschrijvingen:")
|
||||
for mismatch in results['mismatches']:
|
||||
print(f" ID {mismatch['ID']}: Predelib={mismatch['Predelib_SP']}, Dashboard={mismatch['Dashboard_SP']}")
|
||||
print(f"Mismatch - ID {mismatch['ID']} ({mismatch['Name']}): Predeliberatierapport SP={mismatch['Predelib_SP']}, Dashboard Inschrijvingen SP={mismatch['Dashboard_SP']}")
|
||||
else:
|
||||
print("\n✅ All SP values match perfectly!")
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user