ows-master/startpakketten/script.py

212 lines
7.3 KiB
Python

import pandas as pd
import argparse
import logging
import sys
import os
from pathlib import Path
from checkheaders import check_headers_dashboard_inschrijvingenfile, check_headers_predelibfile
from process_predelib_file import check_students_with_fail_adviesrapport, print_students_with_fail_ar_summary
from compare_sp import compare_sp_values
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler('startpakket_processing.log'),
logging.StreamHandler()
]
)
logger = logging.getLogger(__name__)
def validate_file_path(file_path: str) -> str:
"""Validate that the file exists and is an Excel file"""
if not os.path.exists(file_path):
raise argparse.ArgumentTypeError(f"File '{file_path}' does not exist")
if not file_path.lower().endswith(('.xlsx', '.xls')):
raise argparse.ArgumentTypeError(f"File '{file_path}' is not an Excel file (.xlsx or .xls)")
return file_path
def parse_arguments():
"""Parse command line arguments"""
parser = argparse.ArgumentParser(
description='Process and compare student data from predeliberation and dashboard Excel files',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
%(prog)s --predelib db.xlsx --dashboard dashboard_inschrijvingen.xlsx
%(prog)s -p /path/to/predelib.xlsx -d /path/to/dashboard.xlsx --output results.json
%(prog)s --predelib db.xlsx --dashboard dashboard.xlsx --verbose
"""
)
parser.add_argument(
'--predelib', '-p',
type=validate_file_path,
required=True,
help='Path to the predeliberation Excel file (db.xlsx)'
)
parser.add_argument(
'--dashboard', '-d',
type=validate_file_path,
required=True,
help='Path to the dashboard Excel file (dashboard_inschrijvingen.xlsx)'
)
parser.add_argument(
'--output', '-o',
type=str,
help='Output file path for results (optional, prints to console if not specified)'
)
parser.add_argument(
'--verbose', '-v',
action='store_true',
help='Enable verbose logging'
)
parser.add_argument(
'--log-file',
type=str,
default='startpakket_processing.log',
help='Path to log file (default: startpakket_processing.log)'
)
return parser.parse_args()
def process_files(predelib_path: str, dashboard_path: str, verbose: bool = False):
"""Process the Excel files and return results"""
try:
# Read Excel files
logger.info(f"Reading predeliberation file: {predelib_path}")
df_predelib = pd.read_excel(predelib_path)
logger.info(f"Predelib file loaded successfully. Shape: {df_predelib.shape}")
logger.info(f"Reading dashboard file: {dashboard_path}")
df_dashboard = pd.read_excel(dashboard_path)
logger.info(f"Dashboard file loaded successfully. Shape: {df_dashboard.shape}")
# Process the dataframes
logger.info("Processing predeliberation file headers")
processed_predelib_df = check_headers_predelibfile(df_predelib)
logger.info("Processing dashboard file headers")
processed_dashboard_df = check_headers_dashboard_inschrijvingenfile(df_dashboard)
# Check the predeliberation file for students with a fail in 'Adviesrapport code'
logger.info("Checking for students with FAIL status in predeliberation file")
students_with_fail = check_students_with_fail_adviesrapport(processed_predelib_df)
# Compare SP values
logger.info("Comparing SP values between files")
mismatches = compare_sp_values(processed_predelib_df, processed_dashboard_df)
# Prepare results
results = {
'predelib_file': predelib_path,
'dashboard_file': dashboard_path,
'predelib_records': len(processed_predelib_df),
'dashboard_records': len(processed_dashboard_df),
'students_with_fail_count': len(students_with_fail),
'students_with_fail': students_with_fail,
'mismatches_count': len(mismatches),
'mismatches': mismatches,
'status': 'completed'
}
logger.info(f"Processing completed successfully. Found {len(mismatches)} mismatches.")
return results
except Exception as e:
logger.error(f"Error processing files: {e}")
raise
def save_results(results: dict, output_path: str):
"""Save results to a file"""
try:
import json
with open(output_path, 'w', encoding='utf-8') as f:
json.dump(results, f, indent=2, ensure_ascii=False)
logger.info(f"Results saved to: {output_path}")
except Exception as e:
logger.error(f"Error saving results to {output_path}: {e}")
raise
def print_summary(results: dict):
"""Print a summary of the results to console"""
print(f"\n{'='*60}")
print("STARTPAKKET PROCESSING SUMMARY")
print(f"{'='*60}")
print(f"Predelib file: {results['predelib_file']}")
print(f"Dashboard file: {results['dashboard_file']}")
print(f"Predelib records processed: {results['predelib_records']}")
print(f"Dashboard records processed: {results['dashboard_records']}")
print(f"Students with FAIL adviesrapport found: {results['students_with_fail_count']}")
print(f"Mismatches found: {results['mismatches_count']}")
if results['students_with_fail_count'] > 0:
print_students_with_fail_ar_summary(results['students_with_fail'], results['predelib_file'])
if results['mismatches']:
print(f"\nDetailed mismatches between SP predeliberatierapport and Dashboard Inschrijvingen:")
for mismatch in results['mismatches']:
print(f"Mismatch - ID {mismatch['ID']} ({mismatch['Name']}): Predeliberatierapport SP={mismatch['Predelib_SP']}, Dashboard Inschrijvingen SP={mismatch['Dashboard_SP']}")
else:
print("\n✅ All SP values match perfectly!")
print(f"Status: {results['status']}")
print(f"{'='*60}")
def main():
"""Main function"""
try:
# Parse arguments
args = parse_arguments()
# Configure logging level
if args.verbose:
logging.getLogger().setLevel(logging.DEBUG)
logger.debug("Verbose logging enabled")
logger.info("Starting startpakket processing")
logger.info(f"Predelib file: {args.predelib}")
logger.info(f"Dashboard file: {args.dashboard}")
# Process files
results = process_files(args.predelib, args.dashboard, args.verbose)
# Save results if output path specified
if args.output:
save_results(results, args.output)
# Print summary
print_summary(results)
# Exit with appropriate code
exit_code = 0 if results['mismatches_count'] == 0 else 1
logger.info(f"Processing completed with exit code: {exit_code}")
sys.exit(exit_code)
except KeyboardInterrupt:
logger.info("Processing interrupted by user")
sys.exit(130)
except Exception as e:
logger.error(f"Fatal error: {e}")
print(f"Error: {e}")
sys.exit(1)
if __name__ == "__main__":
main()