74 lines
2.8 KiB
Python
74 lines
2.8 KiB
Python
"""
|
|
Core data processing functions for the startpakket processing script.
|
|
"""
|
|
import pandas as pd
|
|
import logging
|
|
from typing import Dict, Any, List
|
|
|
|
from checkheaders import check_headers_dashboard_inschrijvingenfile, check_headers_predelibfile
|
|
from process_predelib_file import check_students_with_fail_adviesrapport
|
|
from compare_sp import compare_sp_values
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def process_files(predelib_path: str, dashboard_path: str, verbose: bool = False) -> Dict[str, Any]:
|
|
"""
|
|
Process the Excel files and return results.
|
|
|
|
Args:
|
|
predelib_path: Path to the predeliberation Excel file
|
|
dashboard_path: Path to the dashboard Excel file
|
|
verbose: Enable verbose logging
|
|
|
|
Returns:
|
|
Dictionary containing processing results
|
|
|
|
Raises:
|
|
Exception: If file processing fails
|
|
"""
|
|
try:
|
|
# Read Excel files
|
|
logger.info(f"Reading predeliberation file: {predelib_path}")
|
|
df_predelib = pd.read_excel(predelib_path)
|
|
logger.info(f"Predelib file loaded successfully. Shape: {df_predelib.shape}")
|
|
|
|
logger.info(f"Reading dashboard file: {dashboard_path}")
|
|
df_dashboard = pd.read_excel(dashboard_path)
|
|
logger.info(f"Dashboard file loaded successfully. Shape: {df_dashboard.shape}")
|
|
|
|
# Process the dataframes
|
|
logger.info("Processing predeliberation file headers")
|
|
processed_predelib_df = check_headers_predelibfile(df_predelib)
|
|
|
|
logger.info("Processing dashboard file headers")
|
|
processed_dashboard_df = check_headers_dashboard_inschrijvingenfile(df_dashboard)
|
|
|
|
# Check the predeliberation file for students with a fail in 'Adviesrapport code'
|
|
logger.info("Checking for students with FAIL status in predeliberation file")
|
|
students_with_fail = check_students_with_fail_adviesrapport(processed_predelib_df)
|
|
|
|
# Compare SP values
|
|
logger.info("Comparing SP values between files")
|
|
mismatches = compare_sp_values(processed_predelib_df, processed_dashboard_df)
|
|
|
|
# Prepare results
|
|
results = {
|
|
'predelib_file': predelib_path,
|
|
'dashboard_file': dashboard_path,
|
|
'predelib_records': len(processed_predelib_df),
|
|
'dashboard_records': len(processed_dashboard_df),
|
|
'students_with_fail_count': len(students_with_fail),
|
|
'students_with_fail': students_with_fail,
|
|
'mismatches_count': len(mismatches),
|
|
'mismatches': mismatches,
|
|
'status': 'completed'
|
|
}
|
|
|
|
logger.info(f"Processing completed successfully. Found {len(mismatches)} mismatches.")
|
|
return results
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error processing files: {e}")
|
|
raise
|