Refactor code structure and remove redundant code blocks

This commit is contained in:
bdaneels
2025-10-21 14:20:04 +02:00
parent bcf8f3acae
commit 468df81386
10 changed files with 2054 additions and 19 deletions

View File

@@ -0,0 +1,4 @@
/Mentoraat_2024-2025.xlsx
/reinoud.xlsx
/sisa.xlsx
*.xlsx

View File

@@ -0,0 +1,90 @@
# Script Documentation
## Overview
This script processes two Excel files (
reinoud.xlsx
and
sisa.xlsx
) to find and append missing IDs from
sisa.xlsx
to
reinoud.xlsx
. It also checks for duplicate IDs in
reinoud.xlsx
.
## Functions
### [`load_excel(file_path: str, sheet_name: Optional[str] = None) -> pd.DataFrame`](command:_github.copilot.openSymbolFromReferences?%5B%22%22%2C%5B%7B%22uri%22%3A%7B%22scheme%22%3A%22file%22%2C%22authority%22%3A%22%22%2C%22path%22%3A%22%2Fc%3A%2FUsers%2Fbrech%2FDocuments%2FlocalReps%2Fows-mentoraat%2Fscript.py%22%2C%22query%22%3A%22%22%2C%22fragment%22%3A%22%22%7D%2C%22pos%22%3A%7B%22line%22%3A7%2C%22character%22%3A4%7D%7D%5D%2C%22723f84da-7613-47af-a432-459fba37ba55%22%5D "Go to definition")
Loads an Excel file into a DataFrame.
### [`check_duplicates(df: pd.DataFrame, column: str) -> List[str]`](command:_github.copilot.openSymbolFromReferences?%5B%22%22%2C%5B%7B%22uri%22%3A%7B%22scheme%22%3A%22file%22%2C%22authority%22%3A%22%22%2C%22path%22%3A%22%2Fc%3A%2FUsers%2Fbrech%2FDocuments%2FlocalReps%2Fows-mentoraat%2Fscript.py%22%2C%22query%22%3A%22%22%2C%22fragment%22%3A%22%22%7D%2C%22pos%22%3A%7B%22line%22%3A21%2C%22character%22%3A4%7D%7D%5D%2C%22723f84da-7613-47af-a432-459fba37ba55%22%5D "Go to definition")
Checks for duplicate values in a specified column.
### [`find_missing_ids(df1: pd.DataFrame, df2: pd.DataFrame, column: str) -> List[str]`](command:_github.copilot.openSymbolFromReferences?%5B%22%22%2C%5B%7B%22uri%22%3A%7B%22scheme%22%3A%22file%22%2C%22authority%22%3A%22%22%2C%22path%22%3A%22%2Fc%3A%2FUsers%2Fbrech%2FDocuments%2FlocalReps%2Fows-mentoraat%2Fscript.py%22%2C%22query%22%3A%22%22%2C%22fragment%22%3A%22%22%7D%2C%22pos%22%3A%7B%22line%22%3A26%2C%22character%22%3A4%7D%7D%5D%2C%22723f84da-7613-47af-a432-459fba37ba55%22%5D "Go to definition")
Finds IDs in [`df2`](command:_github.copilot.openSymbolFromReferences?%5B%22%22%2C%5B%7B%22uri%22%3A%7B%22scheme%22%3A%22file%22%2C%22authority%22%3A%22%22%2C%22path%22%3A%22%2Fc%3A%2FUsers%2Fbrech%2FDocuments%2FlocalReps%2Fows-mentoraat%2Fscript.py%22%2C%22query%22%3A%22%22%2C%22fragment%22%3A%22%22%7D%2C%22pos%22%3A%7B%22line%22%3A26%2C%22character%22%3A40%7D%7D%5D%2C%22723f84da-7613-47af-a432-459fba37ba55%22%5D "Go to definition") that are not in [`df1`](command:_github.copilot.openSymbolFromReferences?%5B%22%22%2C%5B%7B%22uri%22%3A%7B%22scheme%22%3A%22file%22%2C%22authority%22%3A%22%22%2C%22path%22%3A%22%2Fc%3A%2FUsers%2Fbrech%2FDocuments%2FlocalReps%2Fows-mentoraat%2Fscript.py%22%2C%22query%22%3A%22%22%2C%22fragment%22%3A%22%22%7D%2C%22pos%22%3A%7B%22line%22%3A26%2C%22character%22%3A21%7D%7D%5D%2C%22723f84da-7613-47af-a432-459fba37ba55%22%5D "Go to definition").
### [`append_missing_ids(reinoud_df: pd.DataFrame, sisa_df: pd.DataFrame, column: str, reinoud_file: str) -> pd.DataFrame`](command:_github.copilot.openSymbolFromReferences?%5B%22%22%2C%5B%7B%22uri%22%3A%7B%22scheme%22%3A%22file%22%2C%22authority%22%3A%22%22%2C%22path%22%3A%22%2Fc%3A%2FUsers%2Fbrech%2FDocuments%2FlocalReps%2Fows-mentoraat%2Fscript.py%22%2C%22query%22%3A%22%22%2C%22fragment%22%3A%22%22%7D%2C%22pos%22%3A%7B%22line%22%3A33%2C%22character%22%3A4%7D%7D%5D%2C%22723f84da-7613-47af-a432-459fba37ba55%22%5D "Go to definition")
Appends missing IDs and corresponding details from [`sisa_df`](command:_github.copilot.openSymbolFromReferences?%5B%22%22%2C%5B%7B%22uri%22%3A%7B%22scheme%22%3A%22file%22%2C%22authority%22%3A%22%22%2C%22path%22%3A%22%2Fc%3A%2FUsers%2Fbrech%2FDocuments%2FlocalReps%2Fows-mentoraat%2Fscript.py%22%2C%22query%22%3A%22%22%2C%22fragment%22%3A%22%22%7D%2C%22pos%22%3A%7B%22line%22%3A33%2C%22character%22%3A49%7D%7D%5D%2C%22723f84da-7613-47af-a432-459fba37ba55%22%5D "Go to definition") to [`reinoud_df`](command:_github.copilot.openSymbolFromReferences?%5B%22%22%2C%5B%7B%22uri%22%3A%7B%22scheme%22%3A%22file%22%2C%22authority%22%3A%22%22%2C%22path%22%3A%22%2Fc%3A%2FUsers%2Fbrech%2FDocuments%2FlocalReps%2Fows-mentoraat%2Fscript.py%22%2C%22query%22%3A%22%22%2C%22fragment%22%3A%22%22%7D%2C%22pos%22%3A%7B%22line%22%3A33%2C%22character%22%3A23%7D%7D%5D%2C%22723f84da-7613-47af-a432-459fba37ba55%22%5D "Go to definition").
### [`main(reinoud_file: str, sisa_file: str, column: str, reinoud_sheet: Optional[str] = None, sisa_sheet: Optional[str] = None)`](command:_github.copilot.openSymbolFromReferences?%5B%22%22%2C%5B%7B%22uri%22%3A%7B%22scheme%22%3A%22file%22%2C%22authority%22%3A%22%22%2C%22path%22%3A%22%2Fc%3A%2FUsers%2Fbrech%2FDocuments%2FlocalReps%2Fows-mentoraat%2Fscript.py%22%2C%22query%22%3A%22%22%2C%22fragment%22%3A%22%22%7D%2C%22pos%22%3A%7B%22line%22%3A55%2C%22character%22%3A4%7D%7D%5D%2C%22723f84da-7613-47af-a432-459fba37ba55%22%5D "Go to definition")
Main function to load the Excel files, check for duplicates, append missing IDs, and save the updated DataFrame back to the Excel file.
## Usage
Run the script with the following command:
```sh
python script.py
```
Example usage within the script:
```python
if __name__ == "__main__":
main('reinoud.xlsx', 'sisa.xlsx', 'Rolnummer', reinoud_sheet='Actief', sisa_sheet='sheet1')
```
## Logging
The script uses the [`logging`](command:_github.copilot.openSymbolFromReferences?%5B%22%22%2C%5B%7B%22uri%22%3A%7B%22scheme%22%3A%22file%22%2C%22authority%22%3A%22%22%2C%22path%22%3A%22%2Fc%3A%2FUsers%2Fbrech%2FDocuments%2FlocalReps%2Fows-mentoraat%2Fscript.py%22%2C%22query%22%3A%22%22%2C%22fragment%22%3A%22%22%7D%2C%22pos%22%3A%7B%22line%22%3A1%2C%22character%22%3A7%7D%7D%5D%2C%22723f84da-7613-47af-a432-459fba37ba55%22%5D "Go to definition") module to log information and errors. The log level is set to [`INFO`](command:_github.copilot.openSymbolFromReferences?%5B%22%22%2C%5B%7B%22uri%22%3A%7B%22scheme%22%3A%22file%22%2C%22authority%22%3A%22%22%2C%22path%22%3A%22%2Fc%3A%2FUsers%2Fbrech%2FDocuments%2FlocalReps%2Fows-mentoraat%2Fscript.py%22%2C%22query%22%3A%22%22%2C%22fragment%22%3A%22%22%7D%2C%22pos%22%3A%7B%22line%22%3A5%2C%22character%22%3A34%7D%7D%5D%2C%22723f84da-7613-47af-a432-459fba37ba55%22%5D "Go to definition").
## File Structure
```
.gitignore
reinoud.xlsx
script.py
sisa.xlsx
```
## Dependencies
- pandas
- logging
Install dependencies using:
```sh
pip install pandas
```
## License
This script is provided "as-is" without any warranty. Use at your own risk.

View File

@@ -0,0 +1,82 @@
import pandas as pd
import logging
from typing import List, Optional
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
def load_excel(file_path: str, sheet_name: Optional[str] = None) -> pd.DataFrame:
"""Load an Excel file into a DataFrame."""
try:
df = pd.read_excel(file_path, sheet_name=sheet_name)
if isinstance(df, dict):
raise ValueError(f"Multiple sheets found in {file_path}. Please specify a sheet name.")
return df
except FileNotFoundError:
logging.error(f"File not found: {file_path}")
raise
except Exception as e:
logging.error(f"Error loading file {file_path}: {e}")
raise
def check_duplicates(df: pd.DataFrame, column: str) -> List[str]:
"""Check for duplicate values in a specified column."""
duplicates = df[column].astype(str)[df[column].duplicated()]
return duplicates.tolist()
def find_missing_ids(df1: pd.DataFrame, df2: pd.DataFrame, column: str) -> List[str]:
"""Find IDs in df2 that are not in df1."""
ids1 = df1[column].astype(str)
ids2 = df2[column].astype(str)
missing_ids = ids2[~ids2.isin(ids1)]
return missing_ids.tolist()
def append_missing_ids(reinoud_df: pd.DataFrame, sisa_df: pd.DataFrame, column: str, reinoud_file: str) -> pd.DataFrame:
"""Append missing IDs and corresponding Naam, Voornaam, Plan, and Campus emailadres to reinoud_df."""
missing_ids = find_missing_ids(reinoud_df, sisa_df, column)
if missing_ids:
missing_rows = sisa_df[sisa_df[column].astype(str).isin(missing_ids)]
# Select only the specified columns
selected_columns = ['Rolnummer', 'Naam', 'Voornaam', 'Plan', 'Campus emailadres']
missing_rows = missing_rows[selected_columns]
# Rename 'Campus emailadres' to 'mail' for reinoud_df
missing_rows = missing_rows.rename(columns={'Campus emailadres': 'mail'})
# Append missing rows to reinoud_df
reinoud_df = pd.concat([reinoud_df, missing_rows], ignore_index=True)
logging.info(f"Appended missing IDs to {reinoud_file}:")
for _, row in missing_rows.iterrows():
logging.info(f"ID: {row[column]}, Naam: {row['Naam']}, Voornaam: {row['Voornaam']}, Plan: {row['Plan']}, mail: {row['mail']}")
else:
logging.info("No missing IDs to append.")
return reinoud_df
def main(reinoud_file: str, sisa_file: str, column: str, reinoud_sheet: Optional[str] = None, sisa_sheet: Optional[str] = None):
# Load the Excel files
reinoud_df = load_excel(reinoud_file, sheet_name=reinoud_sheet)
sisa_df = load_excel(sisa_file, sheet_name=sisa_sheet)
# Debug: Print columns of sisa_df
logging.info(f"Columns in {sisa_file}: {sisa_df.columns.tolist()}")
# Check for duplicates in reinoud
duplicates = check_duplicates(reinoud_df, column)
if duplicates:
logging.info("Duplicate IDs in reinoud.xlsx:")
logging.info(duplicates)
else:
logging.info("No duplicates found in reinoud.xlsx.")
# Append missing IDs from sisa to reinoud
reinoud_df = append_missing_ids(reinoud_df, sisa_df, column, reinoud_file)
# Save the updated reinoud_df back to the Excel file
reinoud_df.to_excel(reinoud_file, sheet_name=reinoud_sheet, index=False)
logging.info(f"Updated {reinoud_file} saved.")
if __name__ == "__main__":
# Example usage
# change the file names, column name, and sheet names as needed
main('reinoud.xlsx', 'sisa.xlsx', 'Rolnummer', reinoud_sheet='Actief', sisa_sheet='sheet1')