212 lines
7.3 KiB
Python
212 lines
7.3 KiB
Python
import pandas as pd
|
|
import argparse
|
|
import logging
|
|
import sys
|
|
import os
|
|
from pathlib import Path
|
|
|
|
from checkheaders import check_headers_dashboard_inschrijvingenfile, check_headers_predelibfile
|
|
from process_predelib_file import check_students_with_fail_adviesrapport, print_students_with_fail_ar_summary
|
|
from compare_sp import compare_sp_values
|
|
|
|
# Configure logging
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
|
handlers=[
|
|
logging.FileHandler('startpakket_processing.log'),
|
|
logging.StreamHandler()
|
|
]
|
|
)
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def validate_file_path(file_path: str) -> str:
|
|
"""Validate that the file exists and is an Excel file"""
|
|
if not os.path.exists(file_path):
|
|
raise argparse.ArgumentTypeError(f"File '{file_path}' does not exist")
|
|
|
|
if not file_path.lower().endswith(('.xlsx', '.xls')):
|
|
raise argparse.ArgumentTypeError(f"File '{file_path}' is not an Excel file (.xlsx or .xls)")
|
|
|
|
return file_path
|
|
|
|
|
|
def parse_arguments():
|
|
"""Parse command line arguments"""
|
|
parser = argparse.ArgumentParser(
|
|
description='Process and compare student data from predeliberation and dashboard Excel files',
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
epilog="""
|
|
Examples:
|
|
%(prog)s --predelib db.xlsx --dashboard dashboard_inschrijvingen.xlsx
|
|
%(prog)s -p /path/to/predelib.xlsx -d /path/to/dashboard.xlsx --output results.json
|
|
%(prog)s --predelib db.xlsx --dashboard dashboard.xlsx --verbose
|
|
"""
|
|
)
|
|
|
|
parser.add_argument(
|
|
'--predelib', '-p',
|
|
type=validate_file_path,
|
|
required=True,
|
|
help='Path to the predeliberation Excel file (db.xlsx)'
|
|
)
|
|
|
|
parser.add_argument(
|
|
'--dashboard', '-d',
|
|
type=validate_file_path,
|
|
required=True,
|
|
help='Path to the dashboard Excel file (dashboard_inschrijvingen.xlsx)'
|
|
)
|
|
|
|
parser.add_argument(
|
|
'--output', '-o',
|
|
type=str,
|
|
help='Output file path for results (optional, prints to console if not specified)'
|
|
)
|
|
|
|
parser.add_argument(
|
|
'--verbose', '-v',
|
|
action='store_true',
|
|
help='Enable verbose logging'
|
|
)
|
|
|
|
parser.add_argument(
|
|
'--log-file',
|
|
type=str,
|
|
default='startpakket_processing.log',
|
|
help='Path to log file (default: startpakket_processing.log)'
|
|
)
|
|
|
|
return parser.parse_args()
|
|
|
|
|
|
def process_files(predelib_path: str, dashboard_path: str, verbose: bool = False):
|
|
"""Process the Excel files and return results"""
|
|
try:
|
|
# Read Excel files
|
|
logger.info(f"Reading predeliberation file: {predelib_path}")
|
|
df_predelib = pd.read_excel(predelib_path)
|
|
logger.info(f"Predelib file loaded successfully. Shape: {df_predelib.shape}")
|
|
|
|
logger.info(f"Reading dashboard file: {dashboard_path}")
|
|
df_dashboard = pd.read_excel(dashboard_path)
|
|
logger.info(f"Dashboard file loaded successfully. Shape: {df_dashboard.shape}")
|
|
|
|
# Process the dataframes
|
|
logger.info("Processing predeliberation file headers")
|
|
processed_predelib_df = check_headers_predelibfile(df_predelib)
|
|
|
|
logger.info("Processing dashboard file headers")
|
|
processed_dashboard_df = check_headers_dashboard_inschrijvingenfile(df_dashboard)
|
|
|
|
# Check the predeliberation file for students with a fail in 'Adviesrapport code'
|
|
logger.info("Checking for students with FAIL status in predeliberation file")
|
|
students_with_fail = check_students_with_fail_adviesrapport(processed_predelib_df)
|
|
|
|
# Compare SP values
|
|
logger.info("Comparing SP values between files")
|
|
mismatches = compare_sp_values(processed_predelib_df, processed_dashboard_df)
|
|
|
|
# Prepare results
|
|
results = {
|
|
'predelib_file': predelib_path,
|
|
'dashboard_file': dashboard_path,
|
|
'predelib_records': len(processed_predelib_df),
|
|
'dashboard_records': len(processed_dashboard_df),
|
|
'students_with_fail_count': len(students_with_fail),
|
|
'students_with_fail': students_with_fail,
|
|
'mismatches_count': len(mismatches),
|
|
'mismatches': mismatches,
|
|
'status': 'completed'
|
|
}
|
|
|
|
logger.info(f"Processing completed successfully. Found {len(mismatches)} mismatches.")
|
|
return results
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error processing files: {e}")
|
|
raise
|
|
|
|
|
|
def save_results(results: dict, output_path: str):
|
|
"""Save results to a file"""
|
|
try:
|
|
import json
|
|
with open(output_path, 'w', encoding='utf-8') as f:
|
|
json.dump(results, f, indent=2, ensure_ascii=False)
|
|
logger.info(f"Results saved to: {output_path}")
|
|
except Exception as e:
|
|
logger.error(f"Error saving results to {output_path}: {e}")
|
|
raise
|
|
|
|
|
|
def print_summary(results: dict):
|
|
"""Print a summary of the results to console"""
|
|
print(f"\n{'='*60}")
|
|
print("STARTPAKKET PROCESSING SUMMARY")
|
|
print(f"{'='*60}")
|
|
print(f"Predelib file: {results['predelib_file']}")
|
|
print(f"Dashboard file: {results['dashboard_file']}")
|
|
print(f"Predelib records processed: {results['predelib_records']}")
|
|
print(f"Dashboard records processed: {results['dashboard_records']}")
|
|
print(f"Students with FAIL adviesrapport found: {results['students_with_fail_count']}")
|
|
print(f"Mismatches found: {results['mismatches_count']}")
|
|
|
|
if results['students_with_fail_count'] > 0:
|
|
print_students_with_fail_ar_summary(results['students_with_fail'], results['predelib_file'])
|
|
|
|
if results['mismatches']:
|
|
print(f"\nDetailed mismatches between SP predeliberatierapport and Dashboard Inschrijvingen:")
|
|
for mismatch in results['mismatches']:
|
|
print(f"Mismatch - ID {mismatch['ID']} ({mismatch['Name']}): Predeliberatierapport SP={mismatch['Predelib_SP']}, Dashboard Inschrijvingen SP={mismatch['Dashboard_SP']}")
|
|
else:
|
|
print("\n✅ All SP values match perfectly!")
|
|
|
|
print(f"Status: {results['status']}")
|
|
print(f"{'='*60}")
|
|
|
|
|
|
def main():
|
|
"""Main function"""
|
|
try:
|
|
# Parse arguments
|
|
args = parse_arguments()
|
|
|
|
# Configure logging level
|
|
if args.verbose:
|
|
logging.getLogger().setLevel(logging.DEBUG)
|
|
logger.debug("Verbose logging enabled")
|
|
|
|
logger.info("Starting startpakket processing")
|
|
logger.info(f"Predelib file: {args.predelib}")
|
|
logger.info(f"Dashboard file: {args.dashboard}")
|
|
|
|
# Process files
|
|
results = process_files(args.predelib, args.dashboard, args.verbose)
|
|
|
|
# Save results if output path specified
|
|
if args.output:
|
|
save_results(results, args.output)
|
|
|
|
# Print summary
|
|
print_summary(results)
|
|
|
|
# Exit with appropriate code
|
|
exit_code = 0 if results['mismatches_count'] == 0 else 1
|
|
logger.info(f"Processing completed with exit code: {exit_code}")
|
|
sys.exit(exit_code)
|
|
|
|
except KeyboardInterrupt:
|
|
logger.info("Processing interrupted by user")
|
|
sys.exit(130)
|
|
except Exception as e:
|
|
logger.error(f"Fatal error: {e}")
|
|
print(f"Error: {e}")
|
|
sys.exit(1)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|