Enhance SP comparison script with logging, error handling, and command-line argument parsing

2025-07-29 14:07:38 +02:00 · 2025-07-29 14:07:38 +02:00 · c5d356b366
commit c5d356b366
parent 248417c4b8
4 changed files with 465 additions and 86 deletions
--- a/startpakketten/compare_sp.py
+++ b/startpakketten/compare_sp.py
@ -1,6 +1,20 @@
 import pandas as pd
+import logging
+from typing import List, Dict, Any, Optional

-def compare_sp_values(predelib_df, dashboard_df):
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+    handlers=[
+        logging.FileHandler('sp_comparison.log'),
+        logging.StreamHandler()
+    ]
+)
+
+logger = logging.getLogger(__name__)
+
+def compare_sp_values(predelib_df: pd.DataFrame, dashboard_df: pd.DataFrame) -> List[Dict[str, Any]]:
    """
    Compare 'Totaal aantal SP' from predelib_df with 'Ingeschr. SP (intern)' from dashboard_df
    for matching IDs between the two dataframes.
@ -11,85 +25,212 @@ def compare_sp_values(predelib_df, dashboard_df):
    
    Returns:
        list: List of dictionaries containing mismatches, or empty list if all match
+        
+    Raises:
+        ValueError: If input dataframes are invalid
+        KeyError: If required columns are missing
    """
-    if 'ID' not in predelib_df.columns:
-        print("Warning: 'ID' column not found in predelib dataframe")
-        return []
+    logger.info("Starting SP values comparison")
    
-    if 'ID' not in dashboard_df.columns:
-        print("Warning: 'ID' column not found in dashboard dataframe")
-        return []
-    
-    if 'Totaal aantal SP' not in predelib_df.columns:
-        print("Warning: 'Totaal aantal SP' column not found in predelib dataframe")
-        return []
-    
-    if 'Ingeschr. SP (intern)' not in dashboard_df.columns:
-        print("Warning: 'Ingeschr. SP (intern)' column not found in dashboard dataframe")
-        return []
-    
-    # Find matching IDs
-    # First, let's debug the ID columns
-    print(f"Predelib ID column type: {predelib_df['ID'].dtype}")
-    print(f"Dashboard ID column type: {dashboard_df['ID'].dtype}")
-    print(f"Sample predelib IDs: {list(predelib_df['ID'].head())}")
-    print(f"Sample dashboard IDs: {list(dashboard_df['ID'].head())}")
-    
-    # Convert IDs to strings to ensure consistent comparison
-    predelib_ids = set(str(x) for x in predelib_df['ID'] if pd.notna(x))
-    dashboard_ids = set(str(x) for x in dashboard_df['ID'] if pd.notna(x))
-    
-    matching_ids = predelib_ids.intersection(dashboard_ids)
-    print(f"Found {len(matching_ids)} matching IDs between the two dataframes")
-    
-    if len(matching_ids) == 0:
-        print("No matching IDs found between the dataframes")
-        print(f"Total predelib IDs: {len(predelib_ids)}")
-        print(f"Total dashboard IDs: {len(dashboard_ids)}")
-        return []
-    
-    # Compare SP values for matching IDs
-    mismatches = []
-    for id_val in matching_ids:
-        # Convert back to original type for filtering (try both string and original)
-        predelib_matches = predelib_df[predelib_df['ID'].astype(str) == id_val]
-        dashboard_matches = dashboard_df[dashboard_df['ID'].astype(str) == id_val]
-        
-        if len(predelib_matches) == 0 or len(dashboard_matches) == 0:
-            continue
+    try:
+        # Validate input dataframes
+        if predelib_df is None or predelib_df.empty:
+            error_msg = "Predelib dataframe is None or empty"
+            logger.error(error_msg)
+            raise ValueError(error_msg)
            
-        predelib_sp = predelib_matches['Totaal aantal SP'].iloc[0]
-        dashboard_sp = dashboard_matches['Ingeschr. SP (intern)'].iloc[0]
+        if dashboard_df is None or dashboard_df.empty:
+            error_msg = "Dashboard dataframe is None or empty"
+            logger.error(error_msg)
+            raise ValueError(error_msg)
        
-        if predelib_sp != dashboard_sp:
-            mismatches.append({
-                'ID': id_val,
-                'Predelib_SP': predelib_sp,
-                'Dashboard_SP': dashboard_sp
-            })
-    
-    if len(mismatches) == 0:
-        print("All SP values match between the two dataframes!")
-    else:
-        print(f"Found {len(mismatches)} mismatches:")
-        for mismatch in mismatches:
-            print(f"  ID {mismatch['ID']}: Predelib={mismatch['Predelib_SP']}, Dashboard={mismatch['Dashboard_SP']}")
-    
-    return mismatches
+        # Check for required columns
+        required_predelib_columns = ['ID', 'Totaal aantal SP']
+        required_dashboard_columns = ['ID', 'Ingeschr. SP (intern)']
+        
+        missing_predelib_cols = [col for col in required_predelib_columns if col not in predelib_df.columns]
+        missing_dashboard_cols = [col for col in required_dashboard_columns if col not in dashboard_df.columns]
+        
+        if missing_predelib_cols:
+            error_msg = f"Missing required columns in predelib dataframe: {missing_predelib_cols}"
+            logger.error(error_msg)
+            raise KeyError(error_msg)
+            
+        if missing_dashboard_cols:
+            error_msg = f"Missing required columns in dashboard dataframe: {missing_dashboard_cols}"
+            logger.error(error_msg)
+            raise KeyError(error_msg)
+        
+        logger.info("All required columns found in both dataframes")
+        
+        # Debug ID columns
+        logger.debug(f"Predelib ID column type: {predelib_df['ID'].dtype}")
+        logger.debug(f"Dashboard ID column type: {dashboard_df['ID'].dtype}")
+        logger.debug(f"Sample predelib IDs: {list(predelib_df['ID'].head())}")
+        logger.debug(f"Sample dashboard IDs: {list(dashboard_df['ID'].head())}")
+        
+        # Convert IDs to strings to ensure consistent comparison
+        try:
+            predelib_ids = set(str(x) for x in predelib_df['ID'] if pd.notna(x))
+            dashboard_ids = set(str(x) for x in dashboard_df['ID'] if pd.notna(x))
+        except Exception as e:
+            error_msg = f"Error converting IDs to strings: {e}"
+            logger.error(error_msg)
+            raise ValueError(error_msg)
+        
+        matching_ids = predelib_ids.intersection(dashboard_ids)
+        logger.info(f"Found {len(matching_ids)} matching IDs between the two dataframes")
+        logger.info(f"Total predelib IDs: {len(predelib_ids)}")
+        logger.info(f"Total dashboard IDs: {len(dashboard_ids)}")
+        
+        if len(matching_ids) == 0:
+            logger.warning("No matching IDs found between the dataframes")
+            return []
+        
+        # Compare SP values for matching IDs
+        mismatches = []
+        processed_count = 0
+        
+        for id_val in matching_ids:
+            try:
+                # Convert back to original type for filtering
+                predelib_matches = predelib_df[predelib_df['ID'].astype(str) == id_val]
+                dashboard_matches = dashboard_df[dashboard_df['ID'].astype(str) == id_val]
+                
+                if len(predelib_matches) == 0:
+                    logger.warning(f"No predelib records found for ID: {id_val}")
+                    continue
+                    
+                if len(dashboard_matches) == 0:
+                    logger.warning(f"No dashboard records found for ID: {id_val}")
+                    continue
+                
+                predelib_sp = predelib_matches['Totaal aantal SP'].iloc[0]
+                dashboard_sp = dashboard_matches['Ingeschr. SP (intern)'].iloc[0]
+                
+                # Handle potential NaN values
+                if pd.isna(predelib_sp) or pd.isna(dashboard_sp):
+                    logger.warning(f"NaN values found for ID {id_val}: Predelib={predelib_sp}, Dashboard={dashboard_sp}")
+                    continue
+                
+                # Convert to comparable types
+                try:
+                    predelib_sp_num = float(predelib_sp) if not pd.isna(predelib_sp) else 0
+                    dashboard_sp_num = float(dashboard_sp) if not pd.isna(dashboard_sp) else 0
+                except (ValueError, TypeError) as e:
+                    logger.warning(f"Error converting SP values to numbers for ID {id_val}: {e}")
+                    # Fall back to string comparison
+                    predelib_sp_num = str(predelib_sp)
+                    dashboard_sp_num = str(dashboard_sp)
+                
+                if predelib_sp_num != dashboard_sp_num:
+                    mismatch = {
+                        'ID': id_val,
+                        'Predelib_SP': predelib_sp,
+                        'Dashboard_SP': dashboard_sp
+                    }
+                    mismatches.append(mismatch)
+                    logger.debug(f"Mismatch found for ID {id_val}: Predelib={predelib_sp}, Dashboard={dashboard_sp}")
+                
+                processed_count += 1
+                
+            except Exception as e:
+                logger.error(f"Error processing ID {id_val}: {e}")
+                continue
+        
+        logger.info(f"Successfully processed {processed_count} matching records")
+        
+        if len(mismatches) == 0:
+            logger.info("All SP values match between the two dataframes!")
+        else:
+            logger.warning(f"Found {len(mismatches)} mismatches")
+            for mismatch in mismatches:
+                logger.info(f"Mismatch - ID {mismatch['ID']}: Predeliberatierapport SP={mismatch['Predelib_SP']}, Dashboard Inschrijvingen SP={mismatch['Dashboard_SP']}")
+        
+        return mismatches
+        
+    except Exception as e:
+        logger.error(f"Unexpected error in compare_sp_values: {e}")
+        raise


 if __name__ == "__main__":
    # Example usage - can be used for testing
-    from checkheaders import check_headers_predelibfile, check_headers_dashboard_inschrijvingenfile
+    logger.info("Starting SP comparison script")
    
-    # Read the Excel files
-    df_predelib = pd.read_excel('db.xlsx')
-    df_dashboard = pd.read_excel('dashboard_inschrijvingen.xlsx')
-    
-    # Process the dataframes
-    processed_predelib_df = check_headers_predelibfile(df_predelib)
-    processed_dashboard_df = check_headers_dashboard_inschrijvingenfile(df_dashboard)
-    
-    # Compare SP values between the two processed dataframes
-    print("\nComparing SP values between predelib and dashboard files:")
-    mismatches = compare_sp_values(processed_predelib_df, processed_dashboard_df)
+    try:
+        from checkheaders import check_headers_predelibfile, check_headers_dashboard_inschrijvingenfile
+        
+        # Read the Excel files
+        logger.info("Reading Excel files")
+        try:
+            df_predelib = pd.read_excel('db.xlsx')
+            logger.info(f"Successfully loaded predelib file with shape: {df_predelib.shape}")
+        except FileNotFoundError:
+            logger.error("db.xlsx file not found")
+            raise
+        except Exception as e:
+            logger.error(f"Error reading db.xlsx: {e}")
+            raise
+            
+        try:
+            df_dashboard = pd.read_excel('dashboard_inschrijvingen.xlsx')
+            logger.info(f"Successfully loaded dashboard file with shape: {df_dashboard.shape}")
+        except FileNotFoundError:
+            logger.error("dashboard_inschrijvingen.xlsx file not found")
+            raise
+        except Exception as e:
+            logger.error(f"Error reading dashboard_inschrijvingen.xlsx: {e}")
+            raise
+        
+        # Process the dataframes
+        logger.info("Processing dataframes")
+        try:
+            processed_predelib_df = check_headers_predelibfile(df_predelib)
+            logger.info(f"Processed predelib dataframe shape: {processed_predelib_df.shape}")
+        except Exception as e:
+            logger.error(f"Error processing predelib file: {e}")
+            raise
+            
+        try:
+            processed_dashboard_df = check_headers_dashboard_inschrijvingenfile(df_dashboard)
+            logger.info(f"Processed dashboard dataframe shape: {processed_dashboard_df.shape}")
+        except Exception as e:
+            logger.error(f"Error processing dashboard file: {e}")
+            raise
+        
+        # Compare SP values between the two processed dataframes
+        logger.info("Starting SP values comparison")
+        try:
+            mismatches = compare_sp_values(processed_predelib_df, processed_dashboard_df)
+            logger.info(f"SP comparison completed successfully. Found {len(mismatches)} mismatches.")
+            
+            # Print summary for console output
+            print(f"\n{'='*50}")
+            print("SP COMPARISON SUMMARY")
+            print(f"{'='*50}")
+            print(f"Predelib records processed: {len(processed_predelib_df)}")
+            print(f"Dashboard records processed: {len(processed_dashboard_df)}")
+            print(f"Mismatches found: {len(mismatches)}")
+            
+            if mismatches:
+                print(f"\nDetailed mismatches:")
+                for mismatch in mismatches:
+                    print(f"  ID {mismatch['ID']}: Predelib={mismatch['Predelib_SP']}, Dashboard={mismatch['Dashboard_SP']}")
+            else:
+                print("\nAll SP values match perfectly!")
+            print(f"{'='*50}")
+            
+        except Exception as e:
+            logger.error(f"Error during SP comparison: {e}")
+            raise
+            
+    except ImportError as e:
+        logger.error(f"Import error: {e}")
+        print("Error: Could not import required modules. Make sure checkheaders.py is in the same directory.")
+    except Exception as e:
+        logger.error(f"Unexpected error in main execution: {e}")
+        print(f"An error occurred: {e}")
+        print("Check the log file 'sp_comparison.log' for detailed error information.")
+    finally:
+        logger.info("SP comparison script completed")
--- a/startpakketten/script.py
+++ b/startpakketten/script.py
@ -1,20 +1,200 @@
 import pandas as pd
+import argparse
+import logging
+import sys
+import os
+from pathlib import Path

 from checkheaders import check_headers_dashboard_inschrijvingenfile, check_headers_predelibfile
 from compare_sp import compare_sp_values

-# Read the Excel file
-df_predelib = pd.read_excel('db.xlsx')
-df_dashboard = pd.read_excel('dashboard_inschrijvingen.xlsx')
-processed_predelib_df = check_headers_predelibfile(df_predelib)
-processed_dashboard_df = check_headers_dashboard_inschrijvingenfile(df_dashboard)
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+    handlers=[
+        logging.FileHandler('startpakket_processing.log'),
+        logging.StreamHandler()
+    ]
+)
+
+logger = logging.getLogger(__name__)


-# Further processing can be done with processed_predelib_df and processed_dashboard_df
-print("Processed Predelib DataFrame:")
-print(processed_predelib_df)
-print("\nProcessed Dashboard DataFrame:")
-print(processed_dashboard_df)
+def validate_file_path(file_path: str) -> str:
+    """Validate that the file exists and is an Excel file"""
+    if not os.path.exists(file_path):
+        raise argparse.ArgumentTypeError(f"File '{file_path}' does not exist")
+    
+    if not file_path.lower().endswith(('.xlsx', '.xls')):
+        raise argparse.ArgumentTypeError(f"File '{file_path}' is not an Excel file (.xlsx or .xls)")
+    
+    return file_path

-compare_sp_values(processed_predelib_df, processed_dashboard_df)
-print("\nComparison of the predelib file with the dashboard file on SP values complete.")
+
+def parse_arguments():
+    """Parse command line arguments"""
+    parser = argparse.ArgumentParser(
+        description='Process and compare student data from predeliberation and dashboard Excel files',
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  %(prog)s --predelib db.xlsx --dashboard dashboard_inschrijvingen.xlsx
+  %(prog)s -p /path/to/predelib.xlsx -d /path/to/dashboard.xlsx --output results.json
+  %(prog)s --predelib db.xlsx --dashboard dashboard.xlsx --verbose
+        """
+    )
+    
+    parser.add_argument(
+        '--predelib', '-p',
+        type=validate_file_path,
+        required=True,
+        help='Path to the predeliberation Excel file (db.xlsx)'
+    )
+    
+    parser.add_argument(
+        '--dashboard', '-d',
+        type=validate_file_path,
+        required=True,
+        help='Path to the dashboard Excel file (dashboard_inschrijvingen.xlsx)'
+    )
+    
+    parser.add_argument(
+        '--output', '-o',
+        type=str,
+        help='Output file path for results (optional, prints to console if not specified)'
+    )
+    
+    parser.add_argument(
+        '--verbose', '-v',
+        action='store_true',
+        help='Enable verbose logging'
+    )
+    
+    parser.add_argument(
+        '--log-file',
+        type=str,
+        default='startpakket_processing.log',
+        help='Path to log file (default: startpakket_processing.log)'
+    )
+    
+    return parser.parse_args()
+
+
+def process_files(predelib_path: str, dashboard_path: str, verbose: bool = False):
+    """Process the Excel files and return results"""
+    try:
+        # Read Excel files
+        logger.info(f"Reading predeliberation file: {predelib_path}")
+        df_predelib = pd.read_excel(predelib_path)
+        logger.info(f"Predelib file loaded successfully. Shape: {df_predelib.shape}")
+        
+        logger.info(f"Reading dashboard file: {dashboard_path}")
+        df_dashboard = pd.read_excel(dashboard_path)
+        logger.info(f"Dashboard file loaded successfully. Shape: {df_dashboard.shape}")
+        
+        # Process the dataframes
+        logger.info("Processing predeliberation file headers")
+        processed_predelib_df = check_headers_predelibfile(df_predelib)
+        
+        logger.info("Processing dashboard file headers")
+        processed_dashboard_df = check_headers_dashboard_inschrijvingenfile(df_dashboard)
+        
+        # Compare SP values
+        logger.info("Comparing SP values between files")
+        mismatches = compare_sp_values(processed_predelib_df, processed_dashboard_df)
+        
+        # Prepare results
+        results = {
+            'predelib_file': predelib_path,
+            'dashboard_file': dashboard_path,
+            'predelib_records': len(processed_predelib_df),
+            'dashboard_records': len(processed_dashboard_df),
+            'mismatches_count': len(mismatches),
+            'mismatches': mismatches,
+            'status': 'completed'
+        }
+        
+        logger.info(f"Processing completed successfully. Found {len(mismatches)} mismatches.")
+        return results
+        
+    except Exception as e:
+        logger.error(f"Error processing files: {e}")
+        raise
+
+
+def save_results(results: dict, output_path: str):
+    """Save results to a file"""
+    try:
+        import json
+        with open(output_path, 'w', encoding='utf-8') as f:
+            json.dump(results, f, indent=2, ensure_ascii=False)
+        logger.info(f"Results saved to: {output_path}")
+    except Exception as e:
+        logger.error(f"Error saving results to {output_path}: {e}")
+        raise
+
+
+def print_summary(results: dict):
+    """Print a summary of the results to console"""
+    print(f"\n{'='*60}")
+    print("STARTPAKKET PROCESSING SUMMARY")
+    print(f"{'='*60}")
+    print(f"Predelib file: {results['predelib_file']}")
+    print(f"Dashboard file: {results['dashboard_file']}")
+    print(f"Predelib records processed: {results['predelib_records']}")
+    print(f"Dashboard records processed: {results['dashboard_records']}")
+    print(f"Mismatches found: {results['mismatches_count']}")
+    
+    if results['mismatches']:
+        print(f"\nDetailed mismatches:")
+        for mismatch in results['mismatches']:
+            print(f"  ID {mismatch['ID']}: Predelib={mismatch['Predelib_SP']}, Dashboard={mismatch['Dashboard_SP']}")
+    else:
+        print("\n✅ All SP values match perfectly!")
+    
+    print(f"Status: {results['status']}")
+    print(f"{'='*60}")
+
+
+def main():
+    """Main function"""
+    try:
+        # Parse arguments
+        args = parse_arguments()
+        
+        # Configure logging level
+        if args.verbose:
+            logging.getLogger().setLevel(logging.DEBUG)
+            logger.debug("Verbose logging enabled")
+        
+        logger.info("Starting startpakket processing")
+        logger.info(f"Predelib file: {args.predelib}")
+        logger.info(f"Dashboard file: {args.dashboard}")
+        
+        # Process files
+        results = process_files(args.predelib, args.dashboard, args.verbose)
+        
+        # Save results if output path specified
+        if args.output:
+            save_results(results, args.output)
+        
+        # Print summary
+        print_summary(results)
+        
+        # Exit with appropriate code
+        exit_code = 0 if results['mismatches_count'] == 0 else 1
+        logger.info(f"Processing completed with exit code: {exit_code}")
+        sys.exit(exit_code)
+        
+    except KeyboardInterrupt:
+        logger.info("Processing interrupted by user")
+        sys.exit(130)
+    except Exception as e:
+        logger.error(f"Fatal error: {e}")
+        print(f"Error: {e}")
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
--- a/startpakketten/sp_comparison.log
+++ b/startpakketten/sp_comparison.log
@ -0,0 +1,58 @@
+2025-07-29 11:56:36,048 - __main__ - INFO - Starting SP comparison script
+2025-07-29 11:56:36,086 - __main__ - INFO - Reading Excel files
+2025-07-29 11:56:36,757 - __main__ - INFO - Successfully loaded predelib file with shape: (123, 22)
+2025-07-29 11:56:36,776 - __main__ - INFO - Successfully loaded dashboard file with shape: (40, 36)
+2025-07-29 11:56:36,776 - __main__ - INFO - Processing dataframes
+2025-07-29 11:56:36,778 - __main__ - INFO - Processed predelib dataframe shape: (123, 22)
+2025-07-29 11:56:36,780 - __main__ - INFO - Processed dashboard dataframe shape: (37, 36)
+2025-07-29 11:56:36,781 - __main__ - INFO - Starting SP values comparison
+2025-07-29 11:56:36,781 - __main__ - INFO - Starting SP values comparison
+2025-07-29 11:56:36,781 - __main__ - INFO - Predelib dataframe shape: (123, 22)
+2025-07-29 11:56:36,782 - __main__ - INFO - Dashboard dataframe shape: (37, 36)
+2025-07-29 11:56:36,782 - __main__ - INFO - All required columns found in both dataframes
+2025-07-29 11:56:36,782 - __main__ - INFO - Found 37 matching IDs between the two dataframes
+2025-07-29 11:56:36,783 - __main__ - INFO - Total predelib IDs: 37
+2025-07-29 11:56:36,783 - __main__ - INFO - Total dashboard IDs: 37
+2025-07-29 11:56:36,798 - __main__ - INFO - Successfully processed 37 matching records
+2025-07-29 11:56:36,798 - __main__ - WARNING - Found 1 mismatches
+2025-07-29 11:56:36,798 - __main__ - INFO - Mismatch - ID 20250706: Predelib=39, Dashboard=45
+2025-07-29 11:56:36,798 - __main__ - INFO - SP comparison completed successfully. Found 1 mismatches.
+2025-07-29 11:56:36,801 - __main__ - INFO - SP comparison script completed
+2025-07-29 13:29:44,971 - __main__ - INFO - Starting SP comparison script
+2025-07-29 13:29:45,011 - __main__ - INFO - Reading Excel files
+2025-07-29 13:29:48,429 - __main__ - INFO - Successfully loaded predelib file with shape: (123, 22)
+2025-07-29 13:29:48,456 - __main__ - INFO - Successfully loaded dashboard file with shape: (40, 36)
+2025-07-29 13:29:48,456 - __main__ - INFO - Processing dataframes
+2025-07-29 13:29:48,459 - __main__ - INFO - Processed predelib dataframe shape: (123, 22)
+2025-07-29 13:29:48,460 - __main__ - INFO - Processed dashboard dataframe shape: (37, 36)
+2025-07-29 13:29:48,460 - __main__ - INFO - Starting SP values comparison
+2025-07-29 13:29:48,460 - __main__ - INFO - Starting SP values comparison
+2025-07-29 13:29:48,460 - __main__ - INFO - All required columns found in both dataframes
+2025-07-29 13:29:48,460 - __main__ - INFO - Found 37 matching IDs between the two dataframes
+2025-07-29 13:29:48,460 - __main__ - INFO - Total predelib IDs: 37
+2025-07-29 13:29:48,461 - __main__ - INFO - Total dashboard IDs: 37
+2025-07-29 13:29:48,486 - __main__ - INFO - Successfully processed 37 matching records
+2025-07-29 13:29:48,487 - __main__ - WARNING - Found 1 mismatches
+2025-07-29 13:29:48,487 - __main__ - INFO - Mismatch - ID 20250706: Predeliberatierapport SP=39, Dashboard Inschrijvingen SP=45
+2025-07-29 13:29:48,487 - __main__ - INFO - SP comparison completed successfully. Found 1 mismatches.
+2025-07-29 13:29:48,488 - __main__ - INFO - SP comparison script completed
+2025-07-29 14:06:13,452 - __main__ - INFO - Starting startpakket processing
+2025-07-29 14:06:13,453 - __main__ - INFO - Predelib file: db.xlsx
+2025-07-29 14:06:13,453 - __main__ - INFO - Dashboard file: dashboard_inschrijvingen.xlsx
+2025-07-29 14:06:13,453 - __main__ - INFO - Reading predeliberation file: db.xlsx
+2025-07-29 14:06:14,888 - __main__ - INFO - Predelib file loaded successfully. Shape: (123, 22)
+2025-07-29 14:06:14,888 - __main__ - INFO - Reading dashboard file: dashboard_inschrijvingen.xlsx
+2025-07-29 14:06:14,948 - __main__ - INFO - Dashboard file loaded successfully. Shape: (40, 36)
+2025-07-29 14:06:14,948 - __main__ - INFO - Processing predeliberation file headers
+2025-07-29 14:06:14,952 - __main__ - INFO - Processing dashboard file headers
+2025-07-29 14:06:14,953 - __main__ - INFO - Comparing SP values between files
+2025-07-29 14:06:14,953 - compare_sp - INFO - Starting SP values comparison
+2025-07-29 14:06:14,953 - compare_sp - INFO - All required columns found in both dataframes
+2025-07-29 14:06:14,954 - compare_sp - INFO - Found 37 matching IDs between the two dataframes
+2025-07-29 14:06:14,955 - compare_sp - INFO - Total predelib IDs: 37
+2025-07-29 14:06:14,955 - compare_sp - INFO - Total dashboard IDs: 37
+2025-07-29 14:06:14,967 - compare_sp - INFO - Successfully processed 37 matching records
+2025-07-29 14:06:14,967 - compare_sp - WARNING - Found 1 mismatches
+2025-07-29 14:06:14,968 - compare_sp - INFO - Mismatch - ID 20250706: Predeliberatierapport SP=39, Dashboard Inschrijvingen SP=45
+2025-07-29 14:06:14,968 - __main__ - INFO - Processing completed successfully. Found 1 mismatches.
+2025-07-29 14:06:14,970 - __main__ - INFO - Processing completed with exit code: 1
--- a/startpakketten/startpakket_processing.log
+++ b/startpakketten/startpakket_processing.log