ows-master/startpakketten/data_processor.py

74 lines
2.8 KiB
Python

"""
Core data processing functions for the startpakket processing script.
"""
import pandas as pd
import logging
from typing import Dict, Any, List
from checkheaders import check_headers_dashboard_inschrijvingenfile, check_headers_predelibfile
from process_predelib_file import check_students_with_fail_adviesrapport
from compare_sp import compare_sp_values
logger = logging.getLogger(__name__)
def process_files(predelib_path: str, dashboard_path: str, verbose: bool = False) -> Dict[str, Any]:
"""
Process the Excel files and return results.
Args:
predelib_path: Path to the predeliberation Excel file
dashboard_path: Path to the dashboard Excel file
verbose: Enable verbose logging
Returns:
Dictionary containing processing results
Raises:
Exception: If file processing fails
"""
try:
# Read Excel files
logger.info(f"Reading predeliberation file: {predelib_path}")
df_predelib = pd.read_excel(predelib_path)
logger.info(f"Predelib file loaded successfully. Shape: {df_predelib.shape}")
logger.info(f"Reading dashboard file: {dashboard_path}")
df_dashboard = pd.read_excel(dashboard_path)
logger.info(f"Dashboard file loaded successfully. Shape: {df_dashboard.shape}")
# Process the dataframes
logger.info("Processing predeliberation file headers")
processed_predelib_df = check_headers_predelibfile(df_predelib)
logger.info("Processing dashboard file headers")
processed_dashboard_df = check_headers_dashboard_inschrijvingenfile(df_dashboard)
# Check the predeliberation file for students with a fail in 'Adviesrapport code'
logger.info("Checking for students with FAIL status in predeliberation file")
students_with_fail = check_students_with_fail_adviesrapport(processed_predelib_df)
# Compare SP values
logger.info("Comparing SP values between files")
mismatches = compare_sp_values(processed_predelib_df, processed_dashboard_df)
# Prepare results
results = {
'predelib_file': predelib_path,
'dashboard_file': dashboard_path,
'predelib_records': len(processed_predelib_df),
'dashboard_records': len(processed_dashboard_df),
'students_with_fail_count': len(students_with_fail),
'students_with_fail': students_with_fail,
'mismatches_count': len(mismatches),
'mismatches': mismatches,
'status': 'completed'
}
logger.info(f"Processing completed successfully. Found {len(mismatches)} mismatches.")
return results
except Exception as e:
logger.error(f"Error processing files: {e}")
raise