import pandas as pd def compare_sp_values(predelib_df, dashboard_df): """ Compare 'Totaal aantal SP' from predelib_df with 'Ingeschr. SP (intern)' from dashboard_df for matching IDs between the two dataframes. Args: predelib_df (pandas.DataFrame): Dataframe from predeliberation file with 'ID' and 'Totaal aantal SP' columns dashboard_df (pandas.DataFrame): Dataframe from dashboard file with 'ID' and 'Ingeschr. SP (intern)' columns Returns: list: List of dictionaries containing mismatches, or empty list if all match """ if 'ID' not in predelib_df.columns: print("Warning: 'ID' column not found in predelib dataframe") return [] if 'ID' not in dashboard_df.columns: print("Warning: 'ID' column not found in dashboard dataframe") return [] if 'Totaal aantal SP' not in predelib_df.columns: print("Warning: 'Totaal aantal SP' column not found in predelib dataframe") return [] if 'Ingeschr. SP (intern)' not in dashboard_df.columns: print("Warning: 'Ingeschr. SP (intern)' column not found in dashboard dataframe") return [] # Find matching IDs # First, let's debug the ID columns print(f"Predelib ID column type: {predelib_df['ID'].dtype}") print(f"Dashboard ID column type: {dashboard_df['ID'].dtype}") print(f"Sample predelib IDs: {list(predelib_df['ID'].head())}") print(f"Sample dashboard IDs: {list(dashboard_df['ID'].head())}") # Convert IDs to strings to ensure consistent comparison predelib_ids = set(str(x) for x in predelib_df['ID'] if pd.notna(x)) dashboard_ids = set(str(x) for x in dashboard_df['ID'] if pd.notna(x)) matching_ids = predelib_ids.intersection(dashboard_ids) print(f"Found {len(matching_ids)} matching IDs between the two dataframes") if len(matching_ids) == 0: print("No matching IDs found between the dataframes") print(f"Total predelib IDs: {len(predelib_ids)}") print(f"Total dashboard IDs: {len(dashboard_ids)}") return [] # Compare SP values for matching IDs mismatches = [] for id_val in matching_ids: # Convert back to original type for filtering (try both string and original) predelib_matches = predelib_df[predelib_df['ID'].astype(str) == id_val] dashboard_matches = dashboard_df[dashboard_df['ID'].astype(str) == id_val] if len(predelib_matches) == 0 or len(dashboard_matches) == 0: continue predelib_sp = predelib_matches['Totaal aantal SP'].iloc[0] dashboard_sp = dashboard_matches['Ingeschr. SP (intern)'].iloc[0] if predelib_sp != dashboard_sp: mismatches.append({ 'ID': id_val, 'Predelib_SP': predelib_sp, 'Dashboard_SP': dashboard_sp }) if len(mismatches) == 0: print("All SP values match between the two dataframes!") else: print(f"Found {len(mismatches)} mismatches:") for mismatch in mismatches: print(f" ID {mismatch['ID']}: Predelib={mismatch['Predelib_SP']}, Dashboard={mismatch['Dashboard_SP']}") return mismatches if __name__ == "__main__": # Example usage - can be used for testing from checkheaders import check_headers_predelibfile, check_headers_dashboard_inschrijvingenfile # Read the Excel files df_predelib = pd.read_excel('db.xlsx') df_dashboard = pd.read_excel('dashboard_inschrijvingen.xlsx') # Process the dataframes processed_predelib_df = check_headers_predelibfile(df_predelib) processed_dashboard_df = check_headers_dashboard_inschrijvingenfile(df_dashboard) # Compare SP values between the two processed dataframes print("\nComparing SP values between predelib and dashboard files:") mismatches = compare_sp_values(processed_predelib_df, processed_dashboard_df)