import pandas as pd
import argparse
import logging
import sys
import os
from pathlib import Path

from checkheaders import check_headers_dashboard_inschrijvingenfile, check_headers_predelibfile
from compare_sp import compare_sp_values

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler('startpakket_processing.log'),
        logging.StreamHandler()
    ]
)

logger = logging.getLogger(__name__)


def validate_file_path(file_path: str) -> str:
    """Validate that the file exists and is an Excel file"""
    if not os.path.exists(file_path):
        raise argparse.ArgumentTypeError(f"File '{file_path}' does not exist")
    
    if not file_path.lower().endswith(('.xlsx', '.xls')):
        raise argparse.ArgumentTypeError(f"File '{file_path}' is not an Excel file (.xlsx or .xls)")
    
    return file_path


def parse_arguments():
    """Parse command line arguments"""
    parser = argparse.ArgumentParser(
        description='Process and compare student data from predeliberation and dashboard Excel files',
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  %(prog)s --predelib db.xlsx --dashboard dashboard_inschrijvingen.xlsx
  %(prog)s -p /path/to/predelib.xlsx -d /path/to/dashboard.xlsx --output results.json
  %(prog)s --predelib db.xlsx --dashboard dashboard.xlsx --verbose
        """
    )
    
    parser.add_argument(
        '--predelib', '-p',
        type=validate_file_path,
        required=True,
        help='Path to the predeliberation Excel file (db.xlsx)'
    )
    
    parser.add_argument(
        '--dashboard', '-d',
        type=validate_file_path,
        required=True,
        help='Path to the dashboard Excel file (dashboard_inschrijvingen.xlsx)'
    )
    
    parser.add_argument(
        '--output', '-o',
        type=str,
        help='Output file path for results (optional, prints to console if not specified)'
    )
    
    parser.add_argument(
        '--verbose', '-v',
        action='store_true',
        help='Enable verbose logging'
    )
    
    parser.add_argument(
        '--log-file',
        type=str,
        default='startpakket_processing.log',
        help='Path to log file (default: startpakket_processing.log)'
    )
    
    return parser.parse_args()


def process_files(predelib_path: str, dashboard_path: str, verbose: bool = False):
    """Process the Excel files and return results"""
    try:
        # Read Excel files
        logger.info(f"Reading predeliberation file: {predelib_path}")
        df_predelib = pd.read_excel(predelib_path)
        logger.info(f"Predelib file loaded successfully. Shape: {df_predelib.shape}")
        
        logger.info(f"Reading dashboard file: {dashboard_path}")
        df_dashboard = pd.read_excel(dashboard_path)
        logger.info(f"Dashboard file loaded successfully. Shape: {df_dashboard.shape}")
        
        # Process the dataframes
        logger.info("Processing predeliberation file headers")
        processed_predelib_df = check_headers_predelibfile(df_predelib)
        
        logger.info("Processing dashboard file headers")
        processed_dashboard_df = check_headers_dashboard_inschrijvingenfile(df_dashboard)
        
        # Compare SP values
        logger.info("Comparing SP values between files")
        mismatches = compare_sp_values(processed_predelib_df, processed_dashboard_df)
        
        # Prepare results
        results = {
            'predelib_file': predelib_path,
            'dashboard_file': dashboard_path,
            'predelib_records': len(processed_predelib_df),
            'dashboard_records': len(processed_dashboard_df),
            'mismatches_count': len(mismatches),
            'mismatches': mismatches,
            'status': 'completed'
        }
        
        logger.info(f"Processing completed successfully. Found {len(mismatches)} mismatches.")
        return results
        
    except Exception as e:
        logger.error(f"Error processing files: {e}")
        raise


def save_results(results: dict, output_path: str):
    """Save results to a file"""
    try:
        import json
        with open(output_path, 'w', encoding='utf-8') as f:
            json.dump(results, f, indent=2, ensure_ascii=False)
        logger.info(f"Results saved to: {output_path}")
    except Exception as e:
        logger.error(f"Error saving results to {output_path}: {e}")
        raise


def print_summary(results: dict):
    """Print a summary of the results to console"""
    print(f"\n{'='*60}")
    print("STARTPAKKET PROCESSING SUMMARY")
    print(f"{'='*60}")
    print(f"Predelib file: {results['predelib_file']}")
    print(f"Dashboard file: {results['dashboard_file']}")
    print(f"Predelib records processed: {results['predelib_records']}")
    print(f"Dashboard records processed: {results['dashboard_records']}")
    print(f"Mismatches found: {results['mismatches_count']}")
    
    if results['mismatches']:
        print(f"\nDetailed mismatches:")
        for mismatch in results['mismatches']:
            print(f"  ID {mismatch['ID']}: Predelib={mismatch['Predelib_SP']}, Dashboard={mismatch['Dashboard_SP']}")
    else:
        print("\n✅ All SP values match perfectly!")
    
    print(f"Status: {results['status']}")
    print(f"{'='*60}")


def main():
    """Main function"""
    try:
        # Parse arguments
        args = parse_arguments()
        
        # Configure logging level
        if args.verbose:
            logging.getLogger().setLevel(logging.DEBUG)
            logger.debug("Verbose logging enabled")
        
        logger.info("Starting startpakket processing")
        logger.info(f"Predelib file: {args.predelib}")
        logger.info(f"Dashboard file: {args.dashboard}")
        
        # Process files
        results = process_files(args.predelib, args.dashboard, args.verbose)
        
        # Save results if output path specified
        if args.output:
            save_results(results, args.output)
        
        # Print summary
        print_summary(results)
        
        # Exit with appropriate code
        exit_code = 0 if results['mismatches_count'] == 0 else 1
        logger.info(f"Processing completed with exit code: {exit_code}")
        sys.exit(exit_code)
        
    except KeyboardInterrupt:
        logger.info("Processing interrupted by user")
        sys.exit(130)
    except Exception as e:
        logger.error(f"Fatal error: {e}")
        print(f"Error: {e}")
        sys.exit(1)


if __name__ == "__main__":
    main()