commit f3b5792fe55fdcfe94bb2076d06814999d2038f9
Author: bdaneels <brecht.daneels@hotmail.com>
Date:   Mon Oct 14 15:20:17 2024 +0200

    first commit

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..8b123cd
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+/Mentoraat_2024-2025.xlsx
+/reinoud.xlsx
+/sisa.xlsx
\ No newline at end of file
diff --git a/script.py b/script.py
new file mode 100644
index 0000000..cc3a650
--- /dev/null
+++ b/script.py
@@ -0,0 +1,82 @@
+import pandas as pd
+import logging
+from typing import List, Optional
+
+# Configure logging
+logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
+
+def load_excel(file_path: str, sheet_name: Optional[str] = None) -> pd.DataFrame:
+    """Load an Excel file into a DataFrame."""
+    try:
+        df = pd.read_excel(file_path, sheet_name=sheet_name)
+        if isinstance(df, dict):
+            raise ValueError(f"Multiple sheets found in {file_path}. Please specify a sheet name.")
+        return df
+    except FileNotFoundError:
+        logging.error(f"File not found: {file_path}")
+        raise
+    except Exception as e:
+        logging.error(f"Error loading file {file_path}: {e}")
+        raise
+
+def check_duplicates(df: pd.DataFrame, column: str) -> List[str]:
+    """Check for duplicate values in a specified column."""
+    duplicates = df[column].astype(str)[df[column].duplicated()]
+    return duplicates.tolist()
+
+def find_missing_ids(df1: pd.DataFrame, df2: pd.DataFrame, column: str) -> List[str]:
+    """Find IDs in df2 that are not in df1."""
+    ids1 = df1[column].astype(str)
+    ids2 = df2[column].astype(str)
+    missing_ids = ids2[~ids2.isin(ids1)]
+    return missing_ids.tolist()
+
+def append_missing_ids(reinoud_df: pd.DataFrame, sisa_df: pd.DataFrame, column: str, reinoud_file: str) -> pd.DataFrame:
+    """Append missing IDs and corresponding Naam, Voornaam, Plan, and Campus emailadres to reinoud_df."""
+    missing_ids = find_missing_ids(reinoud_df, sisa_df, column)
+    if missing_ids:
+        missing_rows = sisa_df[sisa_df[column].astype(str).isin(missing_ids)]
+        # Select only the specified columns
+        selected_columns = ['Rolnummer', 'Naam', 'Voornaam', 'Plan', 'Campus emailadres']
+        missing_rows = missing_rows[selected_columns]
+        
+        # Rename 'Campus emailadres' to 'mail' for reinoud_df
+        missing_rows = missing_rows.rename(columns={'Campus emailadres': 'mail'})
+        
+        # Append missing rows to reinoud_df
+        reinoud_df = pd.concat([reinoud_df, missing_rows], ignore_index=True)
+        
+        logging.info(f"Appended missing IDs to {reinoud_file}:")
+        for _, row in missing_rows.iterrows():
+            logging.info(f"ID: {row[column]}, Naam: {row['Naam']}, Voornaam: {row['Voornaam']}, Plan: {row['Plan']}, mail: {row['mail']}")
+    else:
+        logging.info("No missing IDs to append.")
+    return reinoud_df
+
+def main(reinoud_file: str, sisa_file: str, column: str, reinoud_sheet: Optional[str] = None, sisa_sheet: Optional[str] = None):
+    # Load the Excel files
+    reinoud_df = load_excel(reinoud_file, sheet_name=reinoud_sheet)
+    sisa_df = load_excel(sisa_file, sheet_name=sisa_sheet)
+
+    # Debug: Print columns of sisa_df
+    logging.info(f"Columns in {sisa_file}: {sisa_df.columns.tolist()}")
+
+    # Check for duplicates in reinoud
+    duplicates = check_duplicates(reinoud_df, column)
+    if duplicates:
+        logging.info("Duplicate IDs in reinoud.xlsx:")
+        logging.info(duplicates)
+    else:
+        logging.info("No duplicates found in reinoud.xlsx.")
+
+    # Append missing IDs from sisa to reinoud
+    reinoud_df = append_missing_ids(reinoud_df, sisa_df, column, reinoud_file)
+
+    # Save the updated reinoud_df back to the Excel file
+    reinoud_df.to_excel(reinoud_file, sheet_name=reinoud_sheet, index=False)
+    logging.info(f"Updated {reinoud_file} saved.")
+
+if __name__ == "__main__":
+    # Example usage
+    # change the file names, column name, and sheet names as needed
+    main('reinoud.xlsx', 'sisa.xlsx', 'Rolnummer', reinoud_sheet='Actief', sisa_sheet='sheet1')
\ No newline at end of file