import pandas as pd import argparse import logging import sys import os from pathlib import Path from checkheaders import check_headers_dashboard_inschrijvingenfile, check_headers_predelibfile from compare_sp import compare_sp_values # Configure logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', handlers=[ logging.FileHandler('startpakket_processing.log'), logging.StreamHandler() ] ) logger = logging.getLogger(__name__) def validate_file_path(file_path: str) -> str: """Validate that the file exists and is an Excel file""" if not os.path.exists(file_path): raise argparse.ArgumentTypeError(f"File '{file_path}' does not exist") if not file_path.lower().endswith(('.xlsx', '.xls')): raise argparse.ArgumentTypeError(f"File '{file_path}' is not an Excel file (.xlsx or .xls)") return file_path def parse_arguments(): """Parse command line arguments""" parser = argparse.ArgumentParser( description='Process and compare student data from predeliberation and dashboard Excel files', formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: %(prog)s --predelib db.xlsx --dashboard dashboard_inschrijvingen.xlsx %(prog)s -p /path/to/predelib.xlsx -d /path/to/dashboard.xlsx --output results.json %(prog)s --predelib db.xlsx --dashboard dashboard.xlsx --verbose """ ) parser.add_argument( '--predelib', '-p', type=validate_file_path, required=True, help='Path to the predeliberation Excel file (db.xlsx)' ) parser.add_argument( '--dashboard', '-d', type=validate_file_path, required=True, help='Path to the dashboard Excel file (dashboard_inschrijvingen.xlsx)' ) parser.add_argument( '--output', '-o', type=str, help='Output file path for results (optional, prints to console if not specified)' ) parser.add_argument( '--verbose', '-v', action='store_true', help='Enable verbose logging' ) parser.add_argument( '--log-file', type=str, default='startpakket_processing.log', help='Path to log file (default: startpakket_processing.log)' ) return parser.parse_args() def process_files(predelib_path: str, dashboard_path: str, verbose: bool = False): """Process the Excel files and return results""" try: # Read Excel files logger.info(f"Reading predeliberation file: {predelib_path}") df_predelib = pd.read_excel(predelib_path) logger.info(f"Predelib file loaded successfully. Shape: {df_predelib.shape}") logger.info(f"Reading dashboard file: {dashboard_path}") df_dashboard = pd.read_excel(dashboard_path) logger.info(f"Dashboard file loaded successfully. Shape: {df_dashboard.shape}") # Process the dataframes logger.info("Processing predeliberation file headers") processed_predelib_df = check_headers_predelibfile(df_predelib) logger.info("Processing dashboard file headers") processed_dashboard_df = check_headers_dashboard_inschrijvingenfile(df_dashboard) # Compare SP values logger.info("Comparing SP values between files") mismatches = compare_sp_values(processed_predelib_df, processed_dashboard_df) # Prepare results results = { 'predelib_file': predelib_path, 'dashboard_file': dashboard_path, 'predelib_records': len(processed_predelib_df), 'dashboard_records': len(processed_dashboard_df), 'mismatches_count': len(mismatches), 'mismatches': mismatches, 'status': 'completed' } logger.info(f"Processing completed successfully. Found {len(mismatches)} mismatches.") return results except Exception as e: logger.error(f"Error processing files: {e}") raise def save_results(results: dict, output_path: str): """Save results to a file""" try: import json with open(output_path, 'w', encoding='utf-8') as f: json.dump(results, f, indent=2, ensure_ascii=False) logger.info(f"Results saved to: {output_path}") except Exception as e: logger.error(f"Error saving results to {output_path}: {e}") raise def print_summary(results: dict): """Print a summary of the results to console""" print(f"\n{'='*60}") print("STARTPAKKET PROCESSING SUMMARY") print(f"{'='*60}") print(f"Predelib file: {results['predelib_file']}") print(f"Dashboard file: {results['dashboard_file']}") print(f"Predelib records processed: {results['predelib_records']}") print(f"Dashboard records processed: {results['dashboard_records']}") print(f"Mismatches found: {results['mismatches_count']}") if results['mismatches']: print(f"\nDetailed mismatches:") for mismatch in results['mismatches']: print(f" ID {mismatch['ID']}: Predelib={mismatch['Predelib_SP']}, Dashboard={mismatch['Dashboard_SP']}") else: print("\n✅ All SP values match perfectly!") print(f"Status: {results['status']}") print(f"{'='*60}") def main(): """Main function""" try: # Parse arguments args = parse_arguments() # Configure logging level if args.verbose: logging.getLogger().setLevel(logging.DEBUG) logger.debug("Verbose logging enabled") logger.info("Starting startpakket processing") logger.info(f"Predelib file: {args.predelib}") logger.info(f"Dashboard file: {args.dashboard}") # Process files results = process_files(args.predelib, args.dashboard, args.verbose) # Save results if output path specified if args.output: save_results(results, args.output) # Print summary print_summary(results) # Exit with appropriate code exit_code = 0 if results['mismatches_count'] == 0 else 1 logger.info(f"Processing completed with exit code: {exit_code}") sys.exit(exit_code) except KeyboardInterrupt: logger.info("Processing interrupted by user") sys.exit(130) except Exception as e: logger.error(f"Fatal error: {e}") print(f"Error: {e}") sys.exit(1) if __name__ == "__main__": main()