96 lines
3.9 KiB
Python
96 lines
3.9 KiB
Python
import pandas as pd
|
|
|
|
def compare_sp_values(predelib_df, dashboard_df):
|
|
"""
|
|
Compare 'Totaal aantal SP' from predelib_df with 'Ingeschr. SP (intern)' from dashboard_df
|
|
for matching IDs between the two dataframes.
|
|
|
|
Args:
|
|
predelib_df (pandas.DataFrame): Dataframe from predeliberation file with 'ID' and 'Totaal aantal SP' columns
|
|
dashboard_df (pandas.DataFrame): Dataframe from dashboard file with 'ID' and 'Ingeschr. SP (intern)' columns
|
|
|
|
Returns:
|
|
list: List of dictionaries containing mismatches, or empty list if all match
|
|
"""
|
|
if 'ID' not in predelib_df.columns:
|
|
print("Warning: 'ID' column not found in predelib dataframe")
|
|
return []
|
|
|
|
if 'ID' not in dashboard_df.columns:
|
|
print("Warning: 'ID' column not found in dashboard dataframe")
|
|
return []
|
|
|
|
if 'Totaal aantal SP' not in predelib_df.columns:
|
|
print("Warning: 'Totaal aantal SP' column not found in predelib dataframe")
|
|
return []
|
|
|
|
if 'Ingeschr. SP (intern)' not in dashboard_df.columns:
|
|
print("Warning: 'Ingeschr. SP (intern)' column not found in dashboard dataframe")
|
|
return []
|
|
|
|
# Find matching IDs
|
|
# First, let's debug the ID columns
|
|
print(f"Predelib ID column type: {predelib_df['ID'].dtype}")
|
|
print(f"Dashboard ID column type: {dashboard_df['ID'].dtype}")
|
|
print(f"Sample predelib IDs: {list(predelib_df['ID'].head())}")
|
|
print(f"Sample dashboard IDs: {list(dashboard_df['ID'].head())}")
|
|
|
|
# Convert IDs to strings to ensure consistent comparison
|
|
predelib_ids = set(str(x) for x in predelib_df['ID'] if pd.notna(x))
|
|
dashboard_ids = set(str(x) for x in dashboard_df['ID'] if pd.notna(x))
|
|
|
|
matching_ids = predelib_ids.intersection(dashboard_ids)
|
|
print(f"Found {len(matching_ids)} matching IDs between the two dataframes")
|
|
|
|
if len(matching_ids) == 0:
|
|
print("No matching IDs found between the dataframes")
|
|
print(f"Total predelib IDs: {len(predelib_ids)}")
|
|
print(f"Total dashboard IDs: {len(dashboard_ids)}")
|
|
return []
|
|
|
|
# Compare SP values for matching IDs
|
|
mismatches = []
|
|
for id_val in matching_ids:
|
|
# Convert back to original type for filtering (try both string and original)
|
|
predelib_matches = predelib_df[predelib_df['ID'].astype(str) == id_val]
|
|
dashboard_matches = dashboard_df[dashboard_df['ID'].astype(str) == id_val]
|
|
|
|
if len(predelib_matches) == 0 or len(dashboard_matches) == 0:
|
|
continue
|
|
|
|
predelib_sp = predelib_matches['Totaal aantal SP'].iloc[0]
|
|
dashboard_sp = dashboard_matches['Ingeschr. SP (intern)'].iloc[0]
|
|
|
|
if predelib_sp != dashboard_sp:
|
|
mismatches.append({
|
|
'ID': id_val,
|
|
'Predelib_SP': predelib_sp,
|
|
'Dashboard_SP': dashboard_sp
|
|
})
|
|
|
|
if len(mismatches) == 0:
|
|
print("All SP values match between the two dataframes!")
|
|
else:
|
|
print(f"Found {len(mismatches)} mismatches:")
|
|
for mismatch in mismatches:
|
|
print(f" ID {mismatch['ID']}: Predelib={mismatch['Predelib_SP']}, Dashboard={mismatch['Dashboard_SP']}")
|
|
|
|
return mismatches
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# Example usage - can be used for testing
|
|
from checkheaders import check_headers_predelibfile, check_headers_dashboard_inschrijvingenfile
|
|
|
|
# Read the Excel files
|
|
df_predelib = pd.read_excel('db.xlsx')
|
|
df_dashboard = pd.read_excel('dashboard_inschrijvingen.xlsx')
|
|
|
|
# Process the dataframes
|
|
processed_predelib_df = check_headers_predelibfile(df_predelib)
|
|
processed_dashboard_df = check_headers_dashboard_inschrijvingenfile(df_dashboard)
|
|
|
|
# Compare SP values between the two processed dataframes
|
|
print("\nComparing SP values between predelib and dashboard files:")
|
|
mismatches = compare_sp_values(processed_predelib_df, processed_dashboard_df)
|