ows-master/startpakketten/script.py

import pandas as pd
import argparse
import logging
import sys
import os
from pathlib import Path

from checkheaders import check_headers_dashboard_inschrijvingenfile, check_headers_predelibfile
from process_predelib_file import check_students_with_fail_adviesrapport, print_students_with_fail_ar_summary
from compare_sp import compare_sp_values

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler('startpakket_processing.log'),
        logging.StreamHandler()
    ]
)

logger = logging.getLogger(__name__)


def validate_file_path(file_path: str) -> str:
    """Validate that the file exists and is an Excel file"""
    if not os.path.exists(file_path):
        raise argparse.ArgumentTypeError(f"File '{file_path}' does not exist")

    if not file_path.lower().endswith(('.xlsx', '.xls')):
        raise argparse.ArgumentTypeError(f"File '{file_path}' is not an Excel file (.xlsx or .xls)")

    return file_path


def parse_arguments():
    """Parse command line arguments"""
    parser = argparse.ArgumentParser(
        description='Process and compare student data from predeliberation and dashboard Excel files',
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  %(prog)s --predelib db.xlsx --dashboard dashboard_inschrijvingen.xlsx
  %(prog)s -p /path/to/predelib.xlsx -d /path/to/dashboard.xlsx --output results.json
  %(prog)s --predelib db.xlsx --dashboard dashboard.xlsx --verbose
        """
    )

    parser.add_argument(
        '--predelib', '-p',
        type=validate_file_path,
        required=True,
        help='Path to the predeliberation Excel file (db.xlsx)'
    )

    parser.add_argument(
        '--dashboard', '-d',
        type=validate_file_path,
        required=True,
        help='Path to the dashboard Excel file (dashboard_inschrijvingen.xlsx)'
    )

    parser.add_argument(
        '--output', '-o',
        type=str,
        help='Output file path for results (optional, prints to console if not specified)'
    )

    parser.add_argument(
        '--verbose', '-v',
        action='store_true',
        help='Enable verbose logging'
    )

    parser.add_argument(
        '--log-file',
        type=str,
        default='startpakket_processing.log',
        help='Path to log file (default: startpakket_processing.log)'
    )

    return parser.parse_args()


def process_files(predelib_path: str, dashboard_path: str, verbose: bool = False):
    """Process the Excel files and return results"""
    try:
        # Read Excel files
        logger.info(f"Reading predeliberation file: {predelib_path}")
        df_predelib = pd.read_excel(predelib_path)
        logger.info(f"Predelib file loaded successfully. Shape: {df_predelib.shape}")

        logger.info(f"Reading dashboard file: {dashboard_path}")
        df_dashboard = pd.read_excel(dashboard_path)
        logger.info(f"Dashboard file loaded successfully. Shape: {df_dashboard.shape}")

        # Process the dataframes
        logger.info("Processing predeliberation file headers")
        processed_predelib_df = check_headers_predelibfile(df_predelib)

        logger.info("Processing dashboard file headers")
        processed_dashboard_df = check_headers_dashboard_inschrijvingenfile(df_dashboard)

        # Check the predeliberation file for students with a fail in 'Adviesrapport code'
        logger.info("Checking for students with FAIL status in predeliberation file")
        students_with_fail = check_students_with_fail_adviesrapport(processed_predelib_df)

        # Compare SP values
        logger.info("Comparing SP values between files")
        mismatches = compare_sp_values(processed_predelib_df, processed_dashboard_df)

        # Prepare results
        results = {
            'predelib_file': predelib_path,
            'dashboard_file': dashboard_path,
            'predelib_records': len(processed_predelib_df),
            'dashboard_records': len(processed_dashboard_df),
            'students_with_fail_count': len(students_with_fail),
            'students_with_fail': students_with_fail,
            'mismatches_count': len(mismatches),
            'mismatches': mismatches,
            'status': 'completed'
        }

        logger.info(f"Processing completed successfully. Found {len(mismatches)} mismatches.")
        return results

    except Exception as e:
        logger.error(f"Error processing files: {e}")
        raise


def save_results(results: dict, output_path: str):
    """Save results to a file"""
    try:
        import json
        with open(output_path, 'w', encoding='utf-8') as f:
            json.dump(results, f, indent=2, ensure_ascii=False)
        logger.info(f"Results saved to: {output_path}")
    except Exception as e:
        logger.error(f"Error saving results to {output_path}: {e}")
        raise


def print_summary(results: dict):
    """Print a summary of the results to console"""
    print(f"\n{'='*60}")
    print("STARTPAKKET PROCESSING SUMMARY")
    print(f"{'='*60}")
    print(f"Predelib file: {results['predelib_file']}")
    print(f"Dashboard file: {results['dashboard_file']}")
    print(f"Predelib records processed: {results['predelib_records']}")
    print(f"Dashboard records processed: {results['dashboard_records']}")
    print(f"Students with FAIL adviesrapport found: {results['students_with_fail_count']}")
    print(f"Mismatches found: {results['mismatches_count']}")

    if results['students_with_fail_count'] > 0:
       print_students_with_fail_ar_summary(results['students_with_fail'], results['predelib_file'])

    if results['mismatches']:
        print(f"\nDetailed mismatches between SP predeliberatierapport and Dashboard Inschrijvingen:")
        for mismatch in results['mismatches']:
            print(f"Mismatch - ID {mismatch['ID']} ({mismatch['Name']}): Predeliberatierapport SP={mismatch['Predelib_SP']}, Dashboard Inschrijvingen SP={mismatch['Dashboard_SP']}")
    else:
        print("\n✅ All SP values match perfectly!")

    print(f"Status: {results['status']}")
    print(f"{'='*60}")


def main():
    """Main function"""
    try:
        # Parse arguments
        args = parse_arguments()

        # Configure logging level
        if args.verbose:
            logging.getLogger().setLevel(logging.DEBUG)
            logger.debug("Verbose logging enabled")

        logger.info("Starting startpakket processing")
        logger.info(f"Predelib file: {args.predelib}")
        logger.info(f"Dashboard file: {args.dashboard}")

        # Process files
        results = process_files(args.predelib, args.dashboard, args.verbose)

        # Save results if output path specified
        if args.output:
            save_results(results, args.output)

        # Print summary
        print_summary(results)

        # Exit with appropriate code
        exit_code = 0 if results['mismatches_count'] == 0 else 1
        logger.info(f"Processing completed with exit code: {exit_code}")
        sys.exit(exit_code)

    except KeyboardInterrupt:
        logger.info("Processing interrupted by user")
        sys.exit(130)
    except Exception as e:
        logger.error(f"Fatal error: {e}")
        print(f"Error: {e}")
        sys.exit(1)


if __name__ == "__main__":
    main()