Compare commits
No commits in common. "248417c4b8cb109a2364c882c6da84510691e605" and "c547a74bba943859fa549df1f8600bf895b0f167" have entirely different histories.
248417c4b8
...
c547a74bba
|
@ -1,85 +0,0 @@
|
||||||
import pandas as pd
|
|
||||||
|
|
||||||
def check_headers_predelibfile(df):
|
|
||||||
# Check if the headers are already in the column names (first row)
|
|
||||||
if 'Achternaam' in df.columns and 'Voornaam' in df.columns:
|
|
||||||
print("Headers found in first row - file already processed, returning unchanged")
|
|
||||||
return df # Return the dataframe unchanged
|
|
||||||
else:
|
|
||||||
# Find the row index where 'Achternaam' and 'Voornaam' appear as headers
|
|
||||||
header_row = None
|
|
||||||
for i, row in df.iterrows():
|
|
||||||
if 'Achternaam' in row.values and 'Voornaam' in row.values:
|
|
||||||
header_row = i
|
|
||||||
break
|
|
||||||
|
|
||||||
if header_row is not None:
|
|
||||||
# Delete all rows before the header row
|
|
||||||
df = df.iloc[header_row:].reset_index(drop=True)
|
|
||||||
|
|
||||||
# Set the first row as column headers
|
|
||||||
df.columns = df.iloc[0]
|
|
||||||
df = df.iloc[1:].reset_index(drop=True)
|
|
||||||
|
|
||||||
# Define the columns to keep
|
|
||||||
columns_to_keep = [
|
|
||||||
'ID', 'Achternaam', 'Voornaam', 'E-mail', 'Loopbaan',
|
|
||||||
'Drempelteller omschrijving', 'Programma status omschrijving',
|
|
||||||
'OO Periode', 'OO Studiegidsnummer', 'OO Lange omschrijving',
|
|
||||||
'OO Eenheden', 'OO Sessie', 'OO Credit (Y/N)', 'OO Periode credit',
|
|
||||||
'OO Programma code', 'OO Programma korte omschr.', 'Totaal aantal SP',
|
|
||||||
'Aantal SP vereist', 'Aantal SP zonder VZP', 'Adviesrapport code',
|
|
||||||
'Waarschuwing', 'Lijsttype'
|
|
||||||
]
|
|
||||||
|
|
||||||
# Keep only the specified columns (only if they exist in the dataframe)
|
|
||||||
existing_columns = [col for col in columns_to_keep if col in df.columns]
|
|
||||||
df = df[existing_columns]
|
|
||||||
|
|
||||||
print(f"Deleted {header_row} rows, set proper headers, and kept {len(existing_columns)} columns")
|
|
||||||
return df
|
|
||||||
else:
|
|
||||||
print("Headers 'Achternaam' and 'Voornaam' not found in the file")
|
|
||||||
return df
|
|
||||||
|
|
||||||
def check_headers_dashboard_inschrijvingenfile(df):
|
|
||||||
# Check if the headers are already in the column names (first row)
|
|
||||||
if 'Naam' in df.columns and 'Voornaam' in df.columns:
|
|
||||||
print("Headers found in first row of dashboard_inschrijvingen - no need to search for header row")
|
|
||||||
header_row = -1 # Indicates headers are already set
|
|
||||||
else:
|
|
||||||
# Find the row index where 'Naam' and 'Voornaam' appear as headers
|
|
||||||
header_row = None
|
|
||||||
for i, row in df.iterrows():
|
|
||||||
if 'Naam' in row.values and 'Voornaam' in row.values:
|
|
||||||
header_row = i
|
|
||||||
break
|
|
||||||
|
|
||||||
if header_row is not None:
|
|
||||||
# Delete all rows before the header row
|
|
||||||
df = df.iloc[header_row:].reset_index(drop=True)
|
|
||||||
|
|
||||||
# Set the first row as column headers
|
|
||||||
df.columns = df.iloc[0]
|
|
||||||
df = df.iloc[1:].reset_index(drop=True)
|
|
||||||
|
|
||||||
if header_row is not None and header_row >= 0:
|
|
||||||
print(f"Deleted {header_row} rows in dashboard_file, set proper headers")
|
|
||||||
elif header_row == -1:
|
|
||||||
print(f"Headers were already correct in dashboard_file.")
|
|
||||||
|
|
||||||
return df
|
|
||||||
else:
|
|
||||||
print("Headers 'Achternaam' and 'Voornaam' not found in the file")
|
|
||||||
return df
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
# Read the Excel files
|
|
||||||
df_predelib = pd.read_excel('db.xlsx')
|
|
||||||
df_dashboard = pd.read_excel('dashboard_inschrijvingen.xlsx')
|
|
||||||
|
|
||||||
# Process the dataframes
|
|
||||||
processed_predelib_df = check_headers_predelibfile(df_predelib)
|
|
||||||
processed_dashboard_df = check_headers_dashboard_inschrijvingenfile(df_dashboard)
|
|
||||||
|
|
|
@ -1,95 +0,0 @@
|
||||||
import pandas as pd
|
|
||||||
|
|
||||||
def compare_sp_values(predelib_df, dashboard_df):
|
|
||||||
"""
|
|
||||||
Compare 'Totaal aantal SP' from predelib_df with 'Ingeschr. SP (intern)' from dashboard_df
|
|
||||||
for matching IDs between the two dataframes.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
predelib_df (pandas.DataFrame): Dataframe from predeliberation file with 'ID' and 'Totaal aantal SP' columns
|
|
||||||
dashboard_df (pandas.DataFrame): Dataframe from dashboard file with 'ID' and 'Ingeschr. SP (intern)' columns
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
list: List of dictionaries containing mismatches, or empty list if all match
|
|
||||||
"""
|
|
||||||
if 'ID' not in predelib_df.columns:
|
|
||||||
print("Warning: 'ID' column not found in predelib dataframe")
|
|
||||||
return []
|
|
||||||
|
|
||||||
if 'ID' not in dashboard_df.columns:
|
|
||||||
print("Warning: 'ID' column not found in dashboard dataframe")
|
|
||||||
return []
|
|
||||||
|
|
||||||
if 'Totaal aantal SP' not in predelib_df.columns:
|
|
||||||
print("Warning: 'Totaal aantal SP' column not found in predelib dataframe")
|
|
||||||
return []
|
|
||||||
|
|
||||||
if 'Ingeschr. SP (intern)' not in dashboard_df.columns:
|
|
||||||
print("Warning: 'Ingeschr. SP (intern)' column not found in dashboard dataframe")
|
|
||||||
return []
|
|
||||||
|
|
||||||
# Find matching IDs
|
|
||||||
# First, let's debug the ID columns
|
|
||||||
print(f"Predelib ID column type: {predelib_df['ID'].dtype}")
|
|
||||||
print(f"Dashboard ID column type: {dashboard_df['ID'].dtype}")
|
|
||||||
print(f"Sample predelib IDs: {list(predelib_df['ID'].head())}")
|
|
||||||
print(f"Sample dashboard IDs: {list(dashboard_df['ID'].head())}")
|
|
||||||
|
|
||||||
# Convert IDs to strings to ensure consistent comparison
|
|
||||||
predelib_ids = set(str(x) for x in predelib_df['ID'] if pd.notna(x))
|
|
||||||
dashboard_ids = set(str(x) for x in dashboard_df['ID'] if pd.notna(x))
|
|
||||||
|
|
||||||
matching_ids = predelib_ids.intersection(dashboard_ids)
|
|
||||||
print(f"Found {len(matching_ids)} matching IDs between the two dataframes")
|
|
||||||
|
|
||||||
if len(matching_ids) == 0:
|
|
||||||
print("No matching IDs found between the dataframes")
|
|
||||||
print(f"Total predelib IDs: {len(predelib_ids)}")
|
|
||||||
print(f"Total dashboard IDs: {len(dashboard_ids)}")
|
|
||||||
return []
|
|
||||||
|
|
||||||
# Compare SP values for matching IDs
|
|
||||||
mismatches = []
|
|
||||||
for id_val in matching_ids:
|
|
||||||
# Convert back to original type for filtering (try both string and original)
|
|
||||||
predelib_matches = predelib_df[predelib_df['ID'].astype(str) == id_val]
|
|
||||||
dashboard_matches = dashboard_df[dashboard_df['ID'].astype(str) == id_val]
|
|
||||||
|
|
||||||
if len(predelib_matches) == 0 or len(dashboard_matches) == 0:
|
|
||||||
continue
|
|
||||||
|
|
||||||
predelib_sp = predelib_matches['Totaal aantal SP'].iloc[0]
|
|
||||||
dashboard_sp = dashboard_matches['Ingeschr. SP (intern)'].iloc[0]
|
|
||||||
|
|
||||||
if predelib_sp != dashboard_sp:
|
|
||||||
mismatches.append({
|
|
||||||
'ID': id_val,
|
|
||||||
'Predelib_SP': predelib_sp,
|
|
||||||
'Dashboard_SP': dashboard_sp
|
|
||||||
})
|
|
||||||
|
|
||||||
if len(mismatches) == 0:
|
|
||||||
print("All SP values match between the two dataframes!")
|
|
||||||
else:
|
|
||||||
print(f"Found {len(mismatches)} mismatches:")
|
|
||||||
for mismatch in mismatches:
|
|
||||||
print(f" ID {mismatch['ID']}: Predelib={mismatch['Predelib_SP']}, Dashboard={mismatch['Dashboard_SP']}")
|
|
||||||
|
|
||||||
return mismatches
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
# Example usage - can be used for testing
|
|
||||||
from checkheaders import check_headers_predelibfile, check_headers_dashboard_inschrijvingenfile
|
|
||||||
|
|
||||||
# Read the Excel files
|
|
||||||
df_predelib = pd.read_excel('db.xlsx')
|
|
||||||
df_dashboard = pd.read_excel('dashboard_inschrijvingen.xlsx')
|
|
||||||
|
|
||||||
# Process the dataframes
|
|
||||||
processed_predelib_df = check_headers_predelibfile(df_predelib)
|
|
||||||
processed_dashboard_df = check_headers_dashboard_inschrijvingenfile(df_dashboard)
|
|
||||||
|
|
||||||
# Compare SP values between the two processed dataframes
|
|
||||||
print("\nComparing SP values between predelib and dashboard files:")
|
|
||||||
mismatches = compare_sp_values(processed_predelib_df, processed_dashboard_df)
|
|
|
@ -1,20 +0,0 @@
|
||||||
import pandas as pd
|
|
||||||
|
|
||||||
from checkheaders import check_headers_dashboard_inschrijvingenfile, check_headers_predelibfile
|
|
||||||
from compare_sp import compare_sp_values
|
|
||||||
|
|
||||||
# Read the Excel file
|
|
||||||
df_predelib = pd.read_excel('db.xlsx')
|
|
||||||
df_dashboard = pd.read_excel('dashboard_inschrijvingen.xlsx')
|
|
||||||
processed_predelib_df = check_headers_predelibfile(df_predelib)
|
|
||||||
processed_dashboard_df = check_headers_dashboard_inschrijvingenfile(df_dashboard)
|
|
||||||
|
|
||||||
|
|
||||||
# Further processing can be done with processed_predelib_df and processed_dashboard_df
|
|
||||||
print("Processed Predelib DataFrame:")
|
|
||||||
print(processed_predelib_df)
|
|
||||||
print("\nProcessed Dashboard DataFrame:")
|
|
||||||
print(processed_dashboard_df)
|
|
||||||
|
|
||||||
compare_sp_values(processed_predelib_df, processed_dashboard_df)
|
|
||||||
print("\nComparison of the predelib file with the dashboard file on SP values complete.")
|
|
|
@ -1,4 +0,0 @@
|
||||||
extracurriculaire vakken komen niet uit de wizard. aparte excel voor nodig, nl dashboard inschrijvingen.
|
|
||||||
Voor de check of ze extra keuzevakken opnemen en de vereiste dus hoger moet komen te staan: vgl de kollomen Totaal aantal SP Aantal SP vereist
|
|
||||||
|
|
||||||
deze kolom moet ook worden gechecked als ze eigenlijk minder opnemen; dus die moeten altijd aan elkaar gelijk zijn.
|
|
Loading…
Reference in New Issue
Block a user