4 changed files with 0 additions and 204 deletions
--- a/startpakketten/checkheaders.py
+++ b/startpakketten/checkheaders.py
@ -1,85 +0,0 @@
 import pandas as pd
 def check_headers_predelibfile(df):
    # Check if the headers are already in the column names (first row)
    if 'Achternaam' in df.columns and 'Voornaam' in df.columns:
        print("Headers found in first row - file already processed, returning unchanged")
        return df  # Return the dataframe unchanged
    else:
        # Find the row index where 'Achternaam' and 'Voornaam' appear as headers
        header_row = None
        for i, row in df.iterrows():
            if 'Achternaam' in row.values and 'Voornaam' in row.values:
                header_row = i
                break
    if header_row is not None:
        # Delete all rows before the header row
        df = df.iloc[header_row:].reset_index(drop=True)
        # Set the first row as column headers
        df.columns = df.iloc[0]
        df = df.iloc[1:].reset_index(drop=True)
        # Define the columns to keep
        columns_to_keep = [
            'ID', 'Achternaam', 'Voornaam', 'E-mail', 'Loopbaan',
            'Drempelteller omschrijving', 'Programma status omschrijving',
            'OO Periode', 'OO Studiegidsnummer', 'OO Lange omschrijving',
            'OO Eenheden', 'OO Sessie', 'OO Credit (Y/N)', 'OO Periode credit',
            'OO Programma code', 'OO Programma korte omschr.', 'Totaal aantal SP',
            'Aantal SP vereist', 'Aantal SP zonder VZP', 'Adviesrapport code',
            'Waarschuwing', 'Lijsttype'
        ]
        # Keep only the specified columns (only if they exist in the dataframe)
        existing_columns = [col for col in columns_to_keep if col in df.columns]
        df = df[existing_columns]
        print(f"Deleted {header_row} rows, set proper headers, and kept {len(existing_columns)} columns")
        return df
    else:
        print("Headers 'Achternaam' and 'Voornaam' not found in the file")
        return df
 def check_headers_dashboard_inschrijvingenfile(df):
     # Check if the headers are already in the column names (first row)
    if 'Naam' in df.columns and 'Voornaam' in df.columns:
        print("Headers found in first row  of dashboard_inschrijvingen - no need to search for header row")
        header_row = -1  # Indicates headers are already set
    else:
        # Find the row index where 'Naam' and 'Voornaam' appear as headers
        header_row = None
        for i, row in df.iterrows():
            if 'Naam' in row.values and 'Voornaam' in row.values:
                header_row = i
                break
    if header_row is not None:
        # Delete all rows before the header row
        df = df.iloc[header_row:].reset_index(drop=True)
        # Set the first row as column headers
        df.columns = df.iloc[0]
        df = df.iloc[1:].reset_index(drop=True)
        if header_row is not None and header_row >= 0:
            print(f"Deleted {header_row} rows in dashboard_file, set proper headers")
        elif header_row == -1:
            print(f"Headers were already correct in dashboard_file.")
        return df
    else:
        print("Headers 'Achternaam' and 'Voornaam' not found in the file")
        return df
 if __name__ == "__main__":
    # Read the Excel files
    df_predelib = pd.read_excel('db.xlsx')
    df_dashboard = pd.read_excel('dashboard_inschrijvingen.xlsx')
    # Process the dataframes
    processed_predelib_df = check_headers_predelibfile(df_predelib)
    processed_dashboard_df = check_headers_dashboard_inschrijvingenfile(df_dashboard)
--- a/startpakketten/compare_sp.py
+++ b/startpakketten/compare_sp.py
@ -1,95 +0,0 @@
 import pandas as pd
 def compare_sp_values(predelib_df, dashboard_df):
    """
    Compare 'Totaal aantal SP' from predelib_df with 'Ingeschr. SP (intern)' from dashboard_df
    for matching IDs between the two dataframes.
    Args:
        predelib_df (pandas.DataFrame): Dataframe from predeliberation file with 'ID' and 'Totaal aantal SP' columns
        dashboard_df (pandas.DataFrame): Dataframe from dashboard file with 'ID' and 'Ingeschr. SP (intern)' columns
    Returns:
        list: List of dictionaries containing mismatches, or empty list if all match
    """
    if 'ID' not in predelib_df.columns:
        print("Warning: 'ID' column not found in predelib dataframe")
        return []
    if 'ID' not in dashboard_df.columns:
        print("Warning: 'ID' column not found in dashboard dataframe")
        return []
    if 'Totaal aantal SP' not in predelib_df.columns:
        print("Warning: 'Totaal aantal SP' column not found in predelib dataframe")
        return []
    if 'Ingeschr. SP (intern)' not in dashboard_df.columns:
        print("Warning: 'Ingeschr. SP (intern)' column not found in dashboard dataframe")
        return []
    # Find matching IDs
    # First, let's debug the ID columns
    print(f"Predelib ID column type: {predelib_df['ID'].dtype}")
    print(f"Dashboard ID column type: {dashboard_df['ID'].dtype}")
    print(f"Sample predelib IDs: {list(predelib_df['ID'].head())}")
    print(f"Sample dashboard IDs: {list(dashboard_df['ID'].head())}")
    # Convert IDs to strings to ensure consistent comparison
    predelib_ids = set(str(x) for x in predelib_df['ID'] if pd.notna(x))
    dashboard_ids = set(str(x) for x in dashboard_df['ID'] if pd.notna(x))
    matching_ids = predelib_ids.intersection(dashboard_ids)
    print(f"Found {len(matching_ids)} matching IDs between the two dataframes")
    if len(matching_ids) == 0:
        print("No matching IDs found between the dataframes")
        print(f"Total predelib IDs: {len(predelib_ids)}")
        print(f"Total dashboard IDs: {len(dashboard_ids)}")
        return []
    # Compare SP values for matching IDs
    mismatches = []
    for id_val in matching_ids:
        # Convert back to original type for filtering (try both string and original)
        predelib_matches = predelib_df[predelib_df['ID'].astype(str) == id_val]
        dashboard_matches = dashboard_df[dashboard_df['ID'].astype(str) == id_val]
        if len(predelib_matches) == 0 or len(dashboard_matches) == 0:
            continue
        predelib_sp = predelib_matches['Totaal aantal SP'].iloc[0]
        dashboard_sp = dashboard_matches['Ingeschr. SP (intern)'].iloc[0]
        if predelib_sp != dashboard_sp:
            mismatches.append({
                'ID': id_val,
                'Predelib_SP': predelib_sp,
                'Dashboard_SP': dashboard_sp
            })
    if len(mismatches) == 0:
        print("All SP values match between the two dataframes!")
    else:
        print(f"Found {len(mismatches)} mismatches:")
        for mismatch in mismatches:
            print(f"  ID {mismatch['ID']}: Predelib={mismatch['Predelib_SP']}, Dashboard={mismatch['Dashboard_SP']}")
    return mismatches
 if __name__ == "__main__":
    # Example usage - can be used for testing
    from checkheaders import check_headers_predelibfile, check_headers_dashboard_inschrijvingenfile
    # Read the Excel files
    df_predelib = pd.read_excel('db.xlsx')
    df_dashboard = pd.read_excel('dashboard_inschrijvingen.xlsx')
    # Process the dataframes
    processed_predelib_df = check_headers_predelibfile(df_predelib)
    processed_dashboard_df = check_headers_dashboard_inschrijvingenfile(df_dashboard)
    # Compare SP values between the two processed dataframes
    print("\nComparing SP values between predelib and dashboard files:")
    mismatches = compare_sp_values(processed_predelib_df, processed_dashboard_df)
--- a/startpakketten/script.py
+++ b/startpakketten/script.py
@ -1,20 +0,0 @@
 import pandas as pd
 from checkheaders import check_headers_dashboard_inschrijvingenfile, check_headers_predelibfile
 from compare_sp import compare_sp_values
 # Read the Excel file
 df_predelib = pd.read_excel('db.xlsx')
 df_dashboard = pd.read_excel('dashboard_inschrijvingen.xlsx')
 processed_predelib_df = check_headers_predelibfile(df_predelib)
 processed_dashboard_df = check_headers_dashboard_inschrijvingenfile(df_dashboard)
 # Further processing can be done with processed_predelib_df and processed_dashboard_df
 print("Processed Predelib DataFrame:")
 print(processed_predelib_df)
 print("\nProcessed Dashboard DataFrame:")
 print(processed_dashboard_df)
 compare_sp_values(processed_predelib_df, processed_dashboard_df)
 print("\nComparison of the predelib file with the dashboard file on SP values complete.")
--- a/startpakketten/todo.md
+++ b/startpakketten/todo.md
@ -1,4 +0,0 @@
 extracurriculaire vakken komen niet uit de wizard. aparte excel voor nodig, nl dashboard inschrijvingen.
 Voor de check of ze extra keuzevakken opnemen en de vereiste dus hoger moet komen te staan: vgl de kollomen Totaal aantal SP Aantal SP vereist
 deze kolom moet ook worden gechecked als ze eigenlijk minder opnemen; dus die moeten altijd aan elkaar gelijk zijn.