Compare commits
11 Commits
b021eabdab
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a2e9c6376e | ||
|
|
468df81386 | ||
|
|
bcf8f3acae | ||
|
|
067a450bfd | ||
|
|
8236038f11 | ||
|
|
c5d356b366 | ||
|
|
248417c4b8 | ||
|
|
eac9bad134 | ||
|
|
94ae88e756 | ||
|
|
c547a74bba | ||
|
|
b912de4c44 |
6
.gitignore
vendored
6
.gitignore
vendored
@@ -8,4 +8,8 @@ __pycache__/
|
||||
# Ignore Excel files
|
||||
*.xlsx
|
||||
|
||||
sisa_crawl/
|
||||
# Ignore log files
|
||||
*.log
|
||||
|
||||
sisa_crawl/
|
||||
|
||||
|
||||
104
bascriptie studiepunten controle/script.py
Normal file
104
bascriptie studiepunten controle/script.py
Normal file
@@ -0,0 +1,104 @@
|
||||
import os
|
||||
import sys
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
SCRIPT_DIR = os.path.dirname(__file__)
|
||||
SCRIPTIE_XLSX = os.path.join(SCRIPT_DIR, "scriptie.xlsx")
|
||||
DASH_XLSX = os.path.join(SCRIPT_DIR, "dashboard_inschrijvingen.xlsx")
|
||||
|
||||
|
||||
def find_common_id_col(df1, df2):
|
||||
common = set(df1.columns).intersection(df2.columns)
|
||||
# prefer obvious id-like names
|
||||
for kw in ("ID"):
|
||||
for c in common:
|
||||
if kw in c.lower():
|
||||
return c
|
||||
if len(common) == 1:
|
||||
return next(iter(common))
|
||||
if common:
|
||||
return sorted(common)[0]
|
||||
raise ValueError(f"No common column found between files.\nFile1 cols: {df1.columns.tolist()}\nFile2 cols: {df2.columns.tolist()}")
|
||||
|
||||
|
||||
def find_col_by_keywords(df, keywords):
|
||||
for k in keywords:
|
||||
for c in df.columns:
|
||||
if k in c.lower():
|
||||
return c
|
||||
return None
|
||||
|
||||
|
||||
def main():
|
||||
if not os.path.exists(SCRIPTIE_XLSX) or not os.path.exists(DASH_XLSX):
|
||||
print("Missing files. Make sure scriptie.xlsx and dashboard_inschrijvingen.xlsx are in the script folder.")
|
||||
sys.exit(1)
|
||||
|
||||
df_scriptie = pd.read_excel(SCRIPTIE_XLSX)
|
||||
df_dash = pd.read_excel(DASH_XLSX)
|
||||
|
||||
try:
|
||||
id_col = find_common_id_col(df_scriptie, df_dash)
|
||||
except ValueError as e:
|
||||
print(e)
|
||||
sys.exit(1)
|
||||
|
||||
# find relevant columns (best-effort)
|
||||
col_target_oo = find_col_by_keywords(df_scriptie, ["target oo", "target_oo", "targetoo", "target oo"])
|
||||
col_target_plan = find_col_by_keywords(df_scriptie, ["target plan", "target_plan", "targetplan", "target"])
|
||||
col_sp = find_col_by_keywords(df_dash, ["sp", "punten", "ects", "study points"])
|
||||
|
||||
if col_target_oo is None or col_target_plan is None or col_sp is None:
|
||||
print("Could not locate required columns. Detected:")
|
||||
print("scriptie columns:", df_scriptie.columns.tolist())
|
||||
print("dashboard columns:", df_dash.columns.tolist())
|
||||
print(f"Found -> target_oo: {col_target_oo}, target_plan: {col_target_plan}, SP: {col_sp}")
|
||||
sys.exit(1)
|
||||
|
||||
merged = df_scriptie.merge(df_dash, on=id_col, how="inner", suffixes=("_scriptie", "_dash"))
|
||||
|
||||
# filter rows where Target OO contains 1070FLWGES
|
||||
mask_oo = merged[col_target_oo].astype(str).str.contains("1070FLWGES", na=False)
|
||||
|
||||
subset = merged[mask_oo].copy()
|
||||
if subset.empty:
|
||||
print("No rows with Target OO containing '1070FLWGES'. No violations.")
|
||||
return
|
||||
|
||||
# determine thresholds per row: default 180-9 = 171, if Target plan contains 'Ba geschiedenis (major)' then 180-12 = 168
|
||||
plan_contains = subset[col_target_plan].astype(str).str.contains("ba geschiedenis (major)", case=False, na=False)
|
||||
subset["threshold"] = np.where(plan_contains, 180 - 12, 180 - 9)
|
||||
|
||||
# coerce SP to numeric (non-numeric become NaN)
|
||||
subset["SP_value"] = pd.to_numeric(subset[col_sp], errors="coerce")
|
||||
|
||||
# violation: SP is NaN or SP < threshold
|
||||
violations = subset[subset["SP_value"] < subset["threshold"]]
|
||||
|
||||
# also consider NaN as violation
|
||||
nan_viol = subset[subset["SP_value"].isna()]
|
||||
violations = pd.concat([violations, nan_viol]).drop_duplicates()
|
||||
|
||||
if violations.empty:
|
||||
print("No violations found for entries with Target OO == 1070FLWGES.")
|
||||
return
|
||||
|
||||
report_cols = [id_col, col_sp, "SP_value", "threshold", col_target_plan, col_target_oo]
|
||||
report = violations.loc[:, [id_col, col_sp, "SP_value", "threshold", col_target_plan, col_target_oo]]
|
||||
report = report.rename(columns={col_sp: "SP_raw", col_target_plan: "Target_plan", col_target_oo: "Target_OO"})
|
||||
|
||||
out_csv = os.path.join(SCRIPT_DIR, "violations_report.csv")
|
||||
out_xlsx = os.path.join(SCRIPT_DIR, "violations_report.xlsx")
|
||||
report.to_csv(out_csv, index=False)
|
||||
try:
|
||||
report.to_excel(out_xlsx, index=False)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
print(f"Found {len(report)} violation(s). Saved to: {out_csv} (and {out_xlsx} if Excel write succeeded).")
|
||||
print(report.to_string(index=False))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
3
bascriptie studiepunten controle/violations_report.csv
Normal file
3
bascriptie studiepunten controle/violations_report.csv
Normal file
@@ -0,0 +1,3 @@
|
||||
ID,SP_raw,SP_value,threshold,Target_plan,Target_OO
|
||||
20224729,168,168,171,Ba geschiedenis,1070FLWGES
|
||||
20224915,162,162,171,Ba geschiedenis,1070FLWGES
|
||||
|
104
check sociologie inleiding soc/script.py
Normal file
104
check sociologie inleiding soc/script.py
Normal file
@@ -0,0 +1,104 @@
|
||||
import os
|
||||
from pathlib import Path
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def find_duplicates(base_file: Path, ps_files: list[Path], id_col_candidates=None, grade_col_candidates=None):
|
||||
"""Read the base registration file and several ps files, then find IDs that appear in both.
|
||||
|
||||
Returns a DataFrame with columns: ID, Cijfer, SourceFile
|
||||
"""
|
||||
if id_col_candidates is None:
|
||||
id_col_candidates = ["ID", "Id", "id", "inschrijving_id"]
|
||||
if grade_col_candidates is None:
|
||||
grade_col_candidates = ["Cijfer", "cijfer", "Grade", "grade"]
|
||||
|
||||
# Read base IDs
|
||||
print(f"Reading base file: {base_file}")
|
||||
base_df = pd.read_excel(base_file)
|
||||
|
||||
# find ID column in base
|
||||
base_id_col = next((c for c in base_df.columns if c in id_col_candidates), None)
|
||||
if base_id_col is None:
|
||||
raise ValueError(f"Could not find an ID column in {base_file}. Tried: {id_col_candidates}")
|
||||
|
||||
base_ids = set(base_df[base_id_col].dropna().astype(str).str.strip())
|
||||
print(f"Found {len(base_ids)} IDs in base file (column '{base_id_col}').")
|
||||
|
||||
duplicates = []
|
||||
|
||||
for pf in ps_files:
|
||||
print(f"Processing ps file: {pf}")
|
||||
try:
|
||||
df = pd.read_excel(pf)
|
||||
except Exception as e:
|
||||
print(f" Skipping {pf} - failed to read: {e}")
|
||||
continue
|
||||
|
||||
# guess ID column
|
||||
id_col = next((c for c in df.columns if c in id_col_candidates), None)
|
||||
if id_col is None:
|
||||
# try fuzzy: column name contains 'id'
|
||||
id_col = next((c for c in df.columns if 'id' in str(c).lower()), None)
|
||||
if id_col is None:
|
||||
print(f" No ID column found in {pf}; skipping.")
|
||||
continue
|
||||
|
||||
grade_col = next((c for c in df.columns if c in grade_col_candidates), None)
|
||||
if grade_col is None:
|
||||
# try fuzzy: column name contains 'cij' or 'cijfer' or 'grade'
|
||||
grade_col = next((c for c in df.columns if any(k in str(c).lower() for k in ['cij', 'grade'])), None)
|
||||
|
||||
# normalize IDs to string
|
||||
df_ids = df[[id_col]].dropna()
|
||||
df_ids[id_col] = df_ids[id_col].astype(str).str.strip()
|
||||
|
||||
# merge to find intersection
|
||||
mask = df_ids[id_col].isin(base_ids)
|
||||
matched = df.loc[mask]
|
||||
if matched.empty:
|
||||
print(f" No duplicates found in {pf}.")
|
||||
continue
|
||||
|
||||
# collect results
|
||||
for _, row in matched.iterrows():
|
||||
id_val = str(row[id_col]).strip()
|
||||
grade_val = row[grade_col] if (grade_col in row and pd.notna(row[grade_col])) else None
|
||||
duplicates.append({"ID": id_val, "Cijfer": grade_val, "SourceFile": pf.name})
|
||||
|
||||
print(f" Found {len(matched)} duplicates in {pf}.")
|
||||
|
||||
dup_df = pd.DataFrame(duplicates)
|
||||
return dup_df
|
||||
|
||||
|
||||
def main():
|
||||
base = Path(__file__).parent / "inschrijvingslijst sociologie.xlsx"
|
||||
# match files like: ps (82).xls.xlsx
|
||||
ps_files = sorted(Path(__file__).parent.glob('ps *.xls.xlsx'))
|
||||
|
||||
if not base.exists():
|
||||
print(f"Base file not found: {base}")
|
||||
return
|
||||
|
||||
if not ps_files:
|
||||
print("No ps files found matching pattern 'ps (*.xls).xlsx'")
|
||||
return
|
||||
|
||||
dup_df = find_duplicates(base, ps_files)
|
||||
|
||||
if dup_df.empty:
|
||||
print("No duplicates found across provided files.")
|
||||
else:
|
||||
# print duplicates
|
||||
print("Duplicates found (ID - Cijfer - SourceFile):")
|
||||
for _, r in dup_df.iterrows():
|
||||
print(f"{r['ID']} - {r['Cijfer']} - {r['SourceFile']}")
|
||||
|
||||
out_csv = Path(__file__).parent / 'duplicates_summary.csv'
|
||||
dup_df.to_csv(out_csv, index=False)
|
||||
print(f"Wrote summary to {out_csv}")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
32
eindwerken ba controle/main.py
Normal file
32
eindwerken ba controle/main.py
Normal file
@@ -0,0 +1,32 @@
|
||||
|
||||
import pandas as pd
|
||||
|
||||
promotor_assessor = {
|
||||
"De Munck": ["De Groot"],
|
||||
"Gelderblom": ["Van Laer"],
|
||||
"Blondé": ["Schepers", "Kole"],
|
||||
"Puttevils": ["Heijmans"],
|
||||
"Greefs": ["Tanis"],
|
||||
"Wynants": ["Samoy", "Welslau"]
|
||||
}
|
||||
|
||||
df = pd.read_excel('sisa.xlsx', sheet_name='sheet1')
|
||||
|
||||
for idx, row in df.iterrows():
|
||||
promotor_cell = str(row.get('Promotor', '')).strip()
|
||||
assessors_cell = [a.strip() for a in str(row.get('Assessor(en)', '')).split(',')]
|
||||
matched_promotor = None
|
||||
for key in promotor_assessor:
|
||||
if key in promotor_cell or promotor_cell in key:
|
||||
matched_promotor = key
|
||||
break
|
||||
if matched_promotor:
|
||||
expected = promotor_assessor[matched_promotor]
|
||||
missing = []
|
||||
for e in expected:
|
||||
if not any(e in a or a in e for a in assessors_cell):
|
||||
missing.append(e)
|
||||
if missing:
|
||||
print(f"Row {idx+1}: ERROR: Expected assessors {expected}, found {assessors_cell}")
|
||||
|
||||
print("Check completed successfully.")
|
||||
32
eindwerken ma controle/eindwerken controle/main.py
Normal file
32
eindwerken ma controle/eindwerken controle/main.py
Normal file
@@ -0,0 +1,32 @@
|
||||
import pandas as pd
|
||||
import openpyxl
|
||||
|
||||
|
||||
|
||||
|
||||
sisa_file = pd.read_excel("sisa file.xlsx", sheet_name="Sheet1")
|
||||
reinoud_file = pd.read_excel("reinoud file.xlsx", sheet_name="Sheet1")
|
||||
|
||||
|
||||
sisa_file["Promotor"] = sisa_file["Promotor"].str.rsplit(" ", n=1).str[0]
|
||||
|
||||
sisa_file["Full Name"] = sisa_file["Achternaam"] + " " + sisa_file["Voornaam"]
|
||||
|
||||
# Step 3: Check if the Full Name exists in the Naam column of the reinoud file
|
||||
merged = sisa_file.merge(reinoud_file, left_on="Full Name", right_on="Naam", how="inner")
|
||||
|
||||
# Step 4: Find divergent Promotor values
|
||||
divergent_promotors = merged[merged["Promotor_x"] != merged["Promotor_y"]]
|
||||
|
||||
# Step 5: Check if Assessor(en) contains Lector 1 and Lector 2
|
||||
def check_assessors(row):
|
||||
assessors = row["Assessor(en)"]
|
||||
return all(lector in assessors for lector in [row["Lector 1"], row["Lector 2"]])
|
||||
|
||||
merged["Assessors Match"] = merged.apply(check_assessors, axis=1)
|
||||
|
||||
# Save results
|
||||
divergent_promotors.to_excel("divergent_promotors.xlsx", index=False)
|
||||
merged.to_excel("merged_results.xlsx", index=False)
|
||||
|
||||
print("Processing complete. Results saved.")
|
||||
@@ -2,7 +2,7 @@ import pandas as pd
|
||||
|
||||
# Constants
|
||||
FILE_PATH = 'file.xlsx'
|
||||
SHEET_NAME = 'ps (32)'
|
||||
SHEET_NAME = 'ps (53)'
|
||||
OUTPUT_FILE_PATH = 'filtered_grote_lokalen.xlsx'
|
||||
EXAM_FORM_COLUMN = 'Examenvorm'
|
||||
REGISTRATION_COLUMN = 'Aant. inschr.'
|
||||
@@ -10,6 +10,7 @@ BEGIN_TIME_COLUMN = 'Beginuur S+'
|
||||
END_TIME_COLUMN = 'Einduur S+'
|
||||
TEACHERS_COLUMN = 'Docenten'
|
||||
LOCATION_COLUMNS = ['Datum S+', BEGIN_TIME_COLUMN, END_TIME_COLUMN, 'Studiegidsnr.', 'Omschrijving', TEACHERS_COLUMN, REGISTRATION_COLUMN]
|
||||
AANTAL_STUDENTEN = 65
|
||||
|
||||
# Read the Excel file
|
||||
def read_excel(file_path, sheet_name):
|
||||
@@ -18,7 +19,7 @@ def read_excel(file_path, sheet_name):
|
||||
# Filter DataFrame
|
||||
def filter_dataframe(df):
|
||||
df = df[df[EXAM_FORM_COLUMN] == 'Schriftelijk']
|
||||
df = df[df[REGISTRATION_COLUMN] > 65]
|
||||
df = df[df[REGISTRATION_COLUMN] > AANTAL_STUDENTEN]
|
||||
return df[LOCATION_COLUMNS]
|
||||
|
||||
# Format time strings
|
||||
|
||||
1
link-weaver-dashboard-view
Submodule
1
link-weaver-dashboard-view
Submodule
Submodule link-weaver-dashboard-view added at 846d0f6fb4
@@ -0,0 +1,4 @@
|
||||
/Mentoraat_2024-2025.xlsx
|
||||
/reinoud.xlsx
|
||||
/sisa.xlsx
|
||||
*.xlsx
|
||||
@@ -0,0 +1,90 @@
|
||||
# Script Documentation
|
||||
|
||||
## Overview
|
||||
|
||||
This script processes two Excel files (
|
||||
|
||||
reinoud.xlsx
|
||||
|
||||
and
|
||||
|
||||
sisa.xlsx
|
||||
|
||||
) to find and append missing IDs from
|
||||
|
||||
sisa.xlsx
|
||||
|
||||
to
|
||||
|
||||
reinoud.xlsx
|
||||
|
||||
. It also checks for duplicate IDs in
|
||||
|
||||
reinoud.xlsx
|
||||
|
||||
.
|
||||
|
||||
## Functions
|
||||
|
||||
### [`load_excel(file_path: str, sheet_name: Optional[str] = None) -> pd.DataFrame`](command:_github.copilot.openSymbolFromReferences?%5B%22%22%2C%5B%7B%22uri%22%3A%7B%22scheme%22%3A%22file%22%2C%22authority%22%3A%22%22%2C%22path%22%3A%22%2Fc%3A%2FUsers%2Fbrech%2FDocuments%2FlocalReps%2Fows-mentoraat%2Fscript.py%22%2C%22query%22%3A%22%22%2C%22fragment%22%3A%22%22%7D%2C%22pos%22%3A%7B%22line%22%3A7%2C%22character%22%3A4%7D%7D%5D%2C%22723f84da-7613-47af-a432-459fba37ba55%22%5D "Go to definition")
|
||||
|
||||
Loads an Excel file into a DataFrame.
|
||||
|
||||
### [`check_duplicates(df: pd.DataFrame, column: str) -> List[str]`](command:_github.copilot.openSymbolFromReferences?%5B%22%22%2C%5B%7B%22uri%22%3A%7B%22scheme%22%3A%22file%22%2C%22authority%22%3A%22%22%2C%22path%22%3A%22%2Fc%3A%2FUsers%2Fbrech%2FDocuments%2FlocalReps%2Fows-mentoraat%2Fscript.py%22%2C%22query%22%3A%22%22%2C%22fragment%22%3A%22%22%7D%2C%22pos%22%3A%7B%22line%22%3A21%2C%22character%22%3A4%7D%7D%5D%2C%22723f84da-7613-47af-a432-459fba37ba55%22%5D "Go to definition")
|
||||
|
||||
Checks for duplicate values in a specified column.
|
||||
|
||||
### [`find_missing_ids(df1: pd.DataFrame, df2: pd.DataFrame, column: str) -> List[str]`](command:_github.copilot.openSymbolFromReferences?%5B%22%22%2C%5B%7B%22uri%22%3A%7B%22scheme%22%3A%22file%22%2C%22authority%22%3A%22%22%2C%22path%22%3A%22%2Fc%3A%2FUsers%2Fbrech%2FDocuments%2FlocalReps%2Fows-mentoraat%2Fscript.py%22%2C%22query%22%3A%22%22%2C%22fragment%22%3A%22%22%7D%2C%22pos%22%3A%7B%22line%22%3A26%2C%22character%22%3A4%7D%7D%5D%2C%22723f84da-7613-47af-a432-459fba37ba55%22%5D "Go to definition")
|
||||
|
||||
Finds IDs in [`df2`](command:_github.copilot.openSymbolFromReferences?%5B%22%22%2C%5B%7B%22uri%22%3A%7B%22scheme%22%3A%22file%22%2C%22authority%22%3A%22%22%2C%22path%22%3A%22%2Fc%3A%2FUsers%2Fbrech%2FDocuments%2FlocalReps%2Fows-mentoraat%2Fscript.py%22%2C%22query%22%3A%22%22%2C%22fragment%22%3A%22%22%7D%2C%22pos%22%3A%7B%22line%22%3A26%2C%22character%22%3A40%7D%7D%5D%2C%22723f84da-7613-47af-a432-459fba37ba55%22%5D "Go to definition") that are not in [`df1`](command:_github.copilot.openSymbolFromReferences?%5B%22%22%2C%5B%7B%22uri%22%3A%7B%22scheme%22%3A%22file%22%2C%22authority%22%3A%22%22%2C%22path%22%3A%22%2Fc%3A%2FUsers%2Fbrech%2FDocuments%2FlocalReps%2Fows-mentoraat%2Fscript.py%22%2C%22query%22%3A%22%22%2C%22fragment%22%3A%22%22%7D%2C%22pos%22%3A%7B%22line%22%3A26%2C%22character%22%3A21%7D%7D%5D%2C%22723f84da-7613-47af-a432-459fba37ba55%22%5D "Go to definition").
|
||||
|
||||
### [`append_missing_ids(reinoud_df: pd.DataFrame, sisa_df: pd.DataFrame, column: str, reinoud_file: str) -> pd.DataFrame`](command:_github.copilot.openSymbolFromReferences?%5B%22%22%2C%5B%7B%22uri%22%3A%7B%22scheme%22%3A%22file%22%2C%22authority%22%3A%22%22%2C%22path%22%3A%22%2Fc%3A%2FUsers%2Fbrech%2FDocuments%2FlocalReps%2Fows-mentoraat%2Fscript.py%22%2C%22query%22%3A%22%22%2C%22fragment%22%3A%22%22%7D%2C%22pos%22%3A%7B%22line%22%3A33%2C%22character%22%3A4%7D%7D%5D%2C%22723f84da-7613-47af-a432-459fba37ba55%22%5D "Go to definition")
|
||||
|
||||
Appends missing IDs and corresponding details from [`sisa_df`](command:_github.copilot.openSymbolFromReferences?%5B%22%22%2C%5B%7B%22uri%22%3A%7B%22scheme%22%3A%22file%22%2C%22authority%22%3A%22%22%2C%22path%22%3A%22%2Fc%3A%2FUsers%2Fbrech%2FDocuments%2FlocalReps%2Fows-mentoraat%2Fscript.py%22%2C%22query%22%3A%22%22%2C%22fragment%22%3A%22%22%7D%2C%22pos%22%3A%7B%22line%22%3A33%2C%22character%22%3A49%7D%7D%5D%2C%22723f84da-7613-47af-a432-459fba37ba55%22%5D "Go to definition") to [`reinoud_df`](command:_github.copilot.openSymbolFromReferences?%5B%22%22%2C%5B%7B%22uri%22%3A%7B%22scheme%22%3A%22file%22%2C%22authority%22%3A%22%22%2C%22path%22%3A%22%2Fc%3A%2FUsers%2Fbrech%2FDocuments%2FlocalReps%2Fows-mentoraat%2Fscript.py%22%2C%22query%22%3A%22%22%2C%22fragment%22%3A%22%22%7D%2C%22pos%22%3A%7B%22line%22%3A33%2C%22character%22%3A23%7D%7D%5D%2C%22723f84da-7613-47af-a432-459fba37ba55%22%5D "Go to definition").
|
||||
|
||||
### [`main(reinoud_file: str, sisa_file: str, column: str, reinoud_sheet: Optional[str] = None, sisa_sheet: Optional[str] = None)`](command:_github.copilot.openSymbolFromReferences?%5B%22%22%2C%5B%7B%22uri%22%3A%7B%22scheme%22%3A%22file%22%2C%22authority%22%3A%22%22%2C%22path%22%3A%22%2Fc%3A%2FUsers%2Fbrech%2FDocuments%2FlocalReps%2Fows-mentoraat%2Fscript.py%22%2C%22query%22%3A%22%22%2C%22fragment%22%3A%22%22%7D%2C%22pos%22%3A%7B%22line%22%3A55%2C%22character%22%3A4%7D%7D%5D%2C%22723f84da-7613-47af-a432-459fba37ba55%22%5D "Go to definition")
|
||||
|
||||
Main function to load the Excel files, check for duplicates, append missing IDs, and save the updated DataFrame back to the Excel file.
|
||||
|
||||
## Usage
|
||||
|
||||
Run the script with the following command:
|
||||
|
||||
```sh
|
||||
python script.py
|
||||
```
|
||||
|
||||
Example usage within the script:
|
||||
|
||||
```python
|
||||
if __name__ == "__main__":
|
||||
main('reinoud.xlsx', 'sisa.xlsx', 'Rolnummer', reinoud_sheet='Actief', sisa_sheet='sheet1')
|
||||
```
|
||||
|
||||
## Logging
|
||||
|
||||
The script uses the [`logging`](command:_github.copilot.openSymbolFromReferences?%5B%22%22%2C%5B%7B%22uri%22%3A%7B%22scheme%22%3A%22file%22%2C%22authority%22%3A%22%22%2C%22path%22%3A%22%2Fc%3A%2FUsers%2Fbrech%2FDocuments%2FlocalReps%2Fows-mentoraat%2Fscript.py%22%2C%22query%22%3A%22%22%2C%22fragment%22%3A%22%22%7D%2C%22pos%22%3A%7B%22line%22%3A1%2C%22character%22%3A7%7D%7D%5D%2C%22723f84da-7613-47af-a432-459fba37ba55%22%5D "Go to definition") module to log information and errors. The log level is set to [`INFO`](command:_github.copilot.openSymbolFromReferences?%5B%22%22%2C%5B%7B%22uri%22%3A%7B%22scheme%22%3A%22file%22%2C%22authority%22%3A%22%22%2C%22path%22%3A%22%2Fc%3A%2FUsers%2Fbrech%2FDocuments%2FlocalReps%2Fows-mentoraat%2Fscript.py%22%2C%22query%22%3A%22%22%2C%22fragment%22%3A%22%22%7D%2C%22pos%22%3A%7B%22line%22%3A5%2C%22character%22%3A34%7D%7D%5D%2C%22723f84da-7613-47af-a432-459fba37ba55%22%5D "Go to definition").
|
||||
|
||||
## File Structure
|
||||
|
||||
```
|
||||
.gitignore
|
||||
reinoud.xlsx
|
||||
script.py
|
||||
sisa.xlsx
|
||||
```
|
||||
|
||||
## Dependencies
|
||||
|
||||
- pandas
|
||||
- logging
|
||||
|
||||
Install dependencies using:
|
||||
|
||||
```sh
|
||||
pip install pandas
|
||||
```
|
||||
|
||||
## License
|
||||
|
||||
This script is provided "as-is" without any warranty. Use at your own risk.
|
||||
@@ -0,0 +1,82 @@
|
||||
import pandas as pd
|
||||
import logging
|
||||
from typing import List, Optional
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
|
||||
|
||||
def load_excel(file_path: str, sheet_name: Optional[str] = None) -> pd.DataFrame:
|
||||
"""Load an Excel file into a DataFrame."""
|
||||
try:
|
||||
df = pd.read_excel(file_path, sheet_name=sheet_name)
|
||||
if isinstance(df, dict):
|
||||
raise ValueError(f"Multiple sheets found in {file_path}. Please specify a sheet name.")
|
||||
return df
|
||||
except FileNotFoundError:
|
||||
logging.error(f"File not found: {file_path}")
|
||||
raise
|
||||
except Exception as e:
|
||||
logging.error(f"Error loading file {file_path}: {e}")
|
||||
raise
|
||||
|
||||
def check_duplicates(df: pd.DataFrame, column: str) -> List[str]:
|
||||
"""Check for duplicate values in a specified column."""
|
||||
duplicates = df[column].astype(str)[df[column].duplicated()]
|
||||
return duplicates.tolist()
|
||||
|
||||
def find_missing_ids(df1: pd.DataFrame, df2: pd.DataFrame, column: str) -> List[str]:
|
||||
"""Find IDs in df2 that are not in df1."""
|
||||
ids1 = df1[column].astype(str)
|
||||
ids2 = df2[column].astype(str)
|
||||
missing_ids = ids2[~ids2.isin(ids1)]
|
||||
return missing_ids.tolist()
|
||||
|
||||
def append_missing_ids(reinoud_df: pd.DataFrame, sisa_df: pd.DataFrame, column: str, reinoud_file: str) -> pd.DataFrame:
|
||||
"""Append missing IDs and corresponding Naam, Voornaam, Plan, and Campus emailadres to reinoud_df."""
|
||||
missing_ids = find_missing_ids(reinoud_df, sisa_df, column)
|
||||
if missing_ids:
|
||||
missing_rows = sisa_df[sisa_df[column].astype(str).isin(missing_ids)]
|
||||
# Select only the specified columns
|
||||
selected_columns = ['Rolnummer', 'Naam', 'Voornaam', 'Plan', 'Campus emailadres']
|
||||
missing_rows = missing_rows[selected_columns]
|
||||
|
||||
# Rename 'Campus emailadres' to 'mail' for reinoud_df
|
||||
missing_rows = missing_rows.rename(columns={'Campus emailadres': 'mail'})
|
||||
|
||||
# Append missing rows to reinoud_df
|
||||
reinoud_df = pd.concat([reinoud_df, missing_rows], ignore_index=True)
|
||||
|
||||
logging.info(f"Appended missing IDs to {reinoud_file}:")
|
||||
for _, row in missing_rows.iterrows():
|
||||
logging.info(f"ID: {row[column]}, Naam: {row['Naam']}, Voornaam: {row['Voornaam']}, Plan: {row['Plan']}, mail: {row['mail']}")
|
||||
else:
|
||||
logging.info("No missing IDs to append.")
|
||||
return reinoud_df
|
||||
|
||||
def main(reinoud_file: str, sisa_file: str, column: str, reinoud_sheet: Optional[str] = None, sisa_sheet: Optional[str] = None):
|
||||
# Load the Excel files
|
||||
reinoud_df = load_excel(reinoud_file, sheet_name=reinoud_sheet)
|
||||
sisa_df = load_excel(sisa_file, sheet_name=sisa_sheet)
|
||||
|
||||
# Debug: Print columns of sisa_df
|
||||
logging.info(f"Columns in {sisa_file}: {sisa_df.columns.tolist()}")
|
||||
|
||||
# Check for duplicates in reinoud
|
||||
duplicates = check_duplicates(reinoud_df, column)
|
||||
if duplicates:
|
||||
logging.info("Duplicate IDs in reinoud.xlsx:")
|
||||
logging.info(duplicates)
|
||||
else:
|
||||
logging.info("No duplicates found in reinoud.xlsx.")
|
||||
|
||||
# Append missing IDs from sisa to reinoud
|
||||
reinoud_df = append_missing_ids(reinoud_df, sisa_df, column, reinoud_file)
|
||||
|
||||
# Save the updated reinoud_df back to the Excel file
|
||||
reinoud_df.to_excel(reinoud_file, sheet_name=reinoud_sheet, index=False)
|
||||
logging.info(f"Updated {reinoud_file} saved.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Example usage
|
||||
# change the file names, column name, and sheet names as needed
|
||||
main('reinoud.xlsx', 'sisa.xlsx', 'Rolnummer', reinoud_sheet='Actief', sisa_sheet='sheet1')
|
||||
326
ongeloofelijken tool/script.py
Normal file
326
ongeloofelijken tool/script.py
Normal file
@@ -0,0 +1,326 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Update 'BAGES' sheet in 'ongeloofelijken tool.xlsx' with the latest (2025-2026) bachelor History
|
||||
study programme from UAntwerpen. It scrapes the official page and writes a normalized table.
|
||||
|
||||
Source page (2025-2026 bachelor study programme):
|
||||
https://www.uantwerpen.be/nl/studeren/aanbod/alle-opleidingen/geschiedenis-studeren/bachelor/studieprogramma/
|
||||
- In 2025-2026 the 'Geschiedenis per periode en gebied' structure changed to a two-pillar model:
|
||||
* Chronologische pijler: 3 OOs (middeleeuwen, nieuwe tijd, nieuwste tijd)
|
||||
* Thematische pijler: 2 OOs
|
||||
(See faculty helpdesk note with change summary and transition measures.)
|
||||
|
||||
IMPORTANT:
|
||||
- This script only updates the 'BAGES' (Bachelor) sheet, because the provided link covers the bachelor page.
|
||||
- 'MAGES' and 'SPVP' sheets remain untouched.
|
||||
|
||||
Tested with: requests, beautifulsoup4, lxml, pandas, openpyxl
|
||||
"""
|
||||
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
import urllib.parse
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Optional, Tuple
|
||||
|
||||
import requests
|
||||
import pandas as pd
|
||||
from bs4 import BeautifulSoup
|
||||
from openpyxl import load_workbook
|
||||
from openpyxl.utils.exceptions import InvalidFileException
|
||||
|
||||
|
||||
# ------------------------- Configuration -------------------------
|
||||
EXCEL_PATH = "ongeloofelijken tool.xlsx"
|
||||
TARGET_SHEET = "BAGES"
|
||||
ARCHIVE_PREFIX = "BAGES_OLD_"
|
||||
UA_BA_URL = "https://www.uantwerpen.be/nl/studeren/aanbod/alle-opleidingen/geschiedenis-studeren/bachelor/studieprogramma/"
|
||||
TARGET_YEAR_PREFIX = "2025-" # Anchor/course URLs have '?id=<year>-<code>'; we filter with '2025-'
|
||||
TIMEOUT = 30
|
||||
HEADERS = {
|
||||
"User-Agent": "Mozilla/5.0 (compatible; uantwerpen-bages-updater/1.0; +https://www.uantwerpen.be/)",
|
||||
"Accept-Language": "nl,en;q=0.8"
|
||||
}
|
||||
|
||||
|
||||
# ------------------------- Helpers -------------------------
|
||||
def fetch_html(url: str) -> BeautifulSoup:
|
||||
resp = requests.get(url, headers=HEADERS, timeout=TIMEOUT)
|
||||
resp.raise_for_status()
|
||||
return BeautifulSoup(resp.text, "lxml")
|
||||
|
||||
|
||||
def extract_text(el) -> str:
|
||||
return re.sub(r"\s+", " ", " ".join(el.stripped_strings)) if el else ""
|
||||
|
||||
|
||||
def parse_meta_from_block(block_text: str) -> Tuple[Optional[str], Optional[str], Optional[str], Optional[str]]:
|
||||
"""
|
||||
Try to parse Semester, Credits, Lecturers, Notes from a block of text next to a course link.
|
||||
Returns (semester, credits, lecturers, notes)
|
||||
"""
|
||||
text = block_text
|
||||
|
||||
# Semester examples: '1E SEM', '2E SEM', '1E/2E SEM'
|
||||
sem = None
|
||||
m_sem = re.search(r"\b(1E\s*/\s*2E\s*SEM|1E\s*SEM|2E\s*SEM)\b", text, flags=re.I)
|
||||
if m_sem:
|
||||
sem = m_sem.group(1).upper().replace(" ", "")
|
||||
|
||||
# Credits examples: '6 studiepunten', '3 studiepunten'
|
||||
credits = None
|
||||
m_sp = re.search(r"(\d+)\s*studiepunten", text, flags=re.I)
|
||||
if m_sp:
|
||||
credits = m_sp.group(1)
|
||||
|
||||
# Lecturers: after 'Lesgever (s):' or 'Lesgever(s):'
|
||||
lecturers = None
|
||||
m_lect = re.search(r"Lesgever\s*\(s\)\s*:\s*([^|]+?)(?:\s{2,}|$)", text, flags=re.I)
|
||||
if not m_lect:
|
||||
m_lect = re.search(r"Lesgever[s]?\s*:\s*([^|]+?)(?:\s{2,}|$)", text, flags=re.I)
|
||||
if m_lect:
|
||||
lecturers = m_lect.group(1).strip(" .").replace(" ,", ",")
|
||||
|
||||
# Notes: look for two-yearly etc.
|
||||
notes = None
|
||||
if re.search(r"Tweejaarlijks", text, flags=re.I):
|
||||
# Try to capture the "even/oneven" phrasing
|
||||
m_ev = re.search(r"tweejaarlijks[^.]*?(even|oneven)[^.]*jaar", text, flags=re.I)
|
||||
notes = "Tweejaarlijks" + (f" ({m_ev.group(1).lower()} jaar)" if m_ev else "")
|
||||
|
||||
return sem, credits, lecturers, notes
|
||||
|
||||
|
||||
def nearest_sections(a_tag) -> Tuple[Optional[str], Optional[str], Optional[str]]:
|
||||
"""
|
||||
Find nearest preceding headings to classify the row.
|
||||
Returns (section, subsection, pillar)
|
||||
- section: e.g., 'Modeltraject deel 1/2/3'
|
||||
- subsection: e.g., 'Wijsbegeerte en sociale wetenschappen', 'Keuzeopleidingsonderdelen', 'Inleiding tot de geschiedenis', etc.
|
||||
- pillar: for deel 3: 'Chronologische pijler', 'Thematische pijler' or None
|
||||
"""
|
||||
# The site uses a variety of headings (h2, h3, h4); we trace back to find labels
|
||||
h = a_tag.find_previous(["h2", "h3", "h4", "h5"])
|
||||
section = subsection = pillar = None
|
||||
|
||||
# Walk up multiple previous headings to capture a hierarchy
|
||||
prev_heads = []
|
||||
cur = a_tag
|
||||
for _ in range(40): # limit walk to avoid infinite loops
|
||||
cur = cur.find_previous(["h2", "h3", "h4", "h5"])
|
||||
if not cur:
|
||||
break
|
||||
txt = extract_text(cur)
|
||||
prev_heads.append(txt)
|
||||
|
||||
# Determine labels from the nearest few headings
|
||||
for txt in prev_heads:
|
||||
t = txt.lower()
|
||||
if section is None and "modeltraject deel" in t:
|
||||
# Normalize like "Modeltraject deel 1"
|
||||
section = txt
|
||||
if subsection is None:
|
||||
# Typical subsections
|
||||
if any(k in t for k in [
|
||||
"wijsbegeerte en sociale wetenschappen",
|
||||
"methodologie van de geschiedenis",
|
||||
"historische oefeningen",
|
||||
"inleiding tot de geschiedenis",
|
||||
"heuristiek",
|
||||
"historisch overzicht",
|
||||
"keuzeopleidingsonderdelen",
|
||||
"sociale wetenschappen",
|
||||
]):
|
||||
subsection = txt
|
||||
if pillar is None and ("chronologische pijler" in t or "thematische pijler" in t):
|
||||
pillar = txt
|
||||
|
||||
if section and (subsection or pillar):
|
||||
# Good enough
|
||||
break
|
||||
|
||||
return section, subsection, pillar
|
||||
|
||||
|
||||
def parse_courses_from_page(soup: BeautifulSoup) -> pd.DataFrame:
|
||||
"""
|
||||
Parse all course links for the 2025-xxxx academic year, infer metadata from nearby text,
|
||||
and return a normalized DataFrame.
|
||||
"""
|
||||
rows = []
|
||||
|
||||
# Capture all anchors that look like course links containing '?id=2025-<CODE>'
|
||||
for a in soup.find_all("a", href=True):
|
||||
href = a["href"]
|
||||
# Normalize relative links
|
||||
full_url = urllib.parse.urljoin(UA_BA_URL, href)
|
||||
# Filter by the 'id=2025-' parameter (2025-2026)
|
||||
if "id=" in href:
|
||||
q = urllib.parse.urlparse(href).query
|
||||
params = urllib.parse.parse_qs(q)
|
||||
ids = params.get("id", [])
|
||||
if not ids:
|
||||
continue
|
||||
# Some pages use '2025-XXXXX' or '2025-XXXXX&lang=nl'
|
||||
if not any(idv.startswith(TARGET_YEAR_PREFIX) for idv in ids):
|
||||
continue
|
||||
course_id = ids[0] # e.g., '2025-1002FLWGES'
|
||||
else:
|
||||
# No id=... parameter; skip
|
||||
continue
|
||||
|
||||
# Extract code after '2025-'
|
||||
code = None
|
||||
m = re.match(r"2025-([A-Za-z0-9]+)", course_id)
|
||||
if m:
|
||||
code = m.group(1)
|
||||
|
||||
name = extract_text(a).strip()
|
||||
if not name or not code:
|
||||
continue
|
||||
|
||||
# Use a reasonably large ancestor block for metadata search
|
||||
container = a
|
||||
for _ in range(4):
|
||||
if container.parent:
|
||||
container = container.parent
|
||||
block_text = extract_text(container)
|
||||
|
||||
semester, credits, lecturers, notes = parse_meta_from_block(block_text)
|
||||
section, subsection, pillar = nearest_sections(a)
|
||||
|
||||
rows.append({
|
||||
"Section": section,
|
||||
"Subsection": subsection,
|
||||
"Pillar": pillar,
|
||||
"Course Code": code,
|
||||
"Course Name": name,
|
||||
"URL": full_url,
|
||||
"Semester": semester,
|
||||
"Credits": credits,
|
||||
"Lecturers": lecturers,
|
||||
"Notes": notes
|
||||
})
|
||||
|
||||
df = pd.DataFrame(rows).drop_duplicates(subset=["Course Code", "Course Name"])
|
||||
# Keep only rows that clearly belong to the 'Bachelor' page; sometimes cross-links appear
|
||||
# Heuristic: we keep rows with a Section that starts with "Modeltraject deel" or that have a Pillar marker
|
||||
mask = (
|
||||
df["Section"].fillna("").str.contains(r"Modeltraject deel", case=False) |
|
||||
df["Pillar"].fillna("").str.contains(r"Pijler", case=False)
|
||||
)
|
||||
df = df[mask].copy()
|
||||
|
||||
# Clean up text for consistency
|
||||
def clean_col(s):
|
||||
return s.str.replace(r"\s+", " ", regex=True).str.strip()
|
||||
|
||||
for col in ["Section", "Subsection", "Pillar", "Course Name", "Lecturers", "Notes"]:
|
||||
df[col] = clean_col(df[col].astype(str))
|
||||
|
||||
# Ensure missing pillar/subsection are empty strings for consistent sorting
|
||||
df["Pillar"] = df["Pillar"].fillna("")
|
||||
df["Subsection"] = df["Subsection"].fillna("")
|
||||
|
||||
# Sort for readability: section → pillar → subsection → name
|
||||
df.sort_values(
|
||||
by=["Section", "Pillar", "Subsection", "Course Name"],
|
||||
inplace=True
|
||||
)
|
||||
df.reset_index(drop=True, inplace=True)
|
||||
return df
|
||||
|
||||
|
||||
def archive_and_write(excel_path: str, df: pd.DataFrame, target_sheet: str):
|
||||
"""
|
||||
- If sheet 'BAGES' exists, rename it to 'BAGES_OLD_YYYYMMDD'
|
||||
- Write df to 'BAGES'
|
||||
"""
|
||||
try:
|
||||
wb = load_workbook(excel_path)
|
||||
except FileNotFoundError:
|
||||
print(f"[INFO] File not found, creating new workbook: {excel_path}")
|
||||
# Write a new file straight away
|
||||
with pd.ExcelWriter(excel_path, engine="openpyxl") as writer:
|
||||
df.to_excel(writer, sheet_name=target_sheet, index=False)
|
||||
return
|
||||
except InvalidFileException:
|
||||
print(f"[ERROR] Not a valid Excel file: {excel_path}")
|
||||
sys.exit(1)
|
||||
|
||||
# Rename existing BAGES to archive
|
||||
if target_sheet in wb.sheetnames:
|
||||
date_suffix = datetime.now().strftime("%Y%m%d")
|
||||
archive_name = ARCHIVE_PREFIX + date_suffix
|
||||
# Ensure uniqueness (append a counter if necessary)
|
||||
counter = 1
|
||||
final_archive = archive_name
|
||||
while final_archive in wb.sheetnames:
|
||||
counter += 1
|
||||
final_archive = f"{archive_name}_{counter}"
|
||||
ws = wb[target_sheet]
|
||||
ws.title = final_archive
|
||||
print(f"[INFO] Archived existing '{target_sheet}' as '{final_archive}'")
|
||||
|
||||
# Save intermediate
|
||||
wb.save(excel_path)
|
||||
|
||||
# Now write the new sheet
|
||||
with pd.ExcelWriter(excel_path, engine="openpyxl", mode="a", if_sheet_exists="overlay") as writer:
|
||||
df.to_excel(writer, sheet_name=target_sheet, index=False)
|
||||
|
||||
print(f"[SUCCESS] Wrote updated '{target_sheet}' sheet to '{excel_path}'")
|
||||
# Also save the updated DataFrame to a separate new Excel file for convenience
|
||||
try:
|
||||
src = Path(excel_path)
|
||||
new_name = src.with_name(f"{src.stem}_updated{src.suffix}")
|
||||
# Write a fresh workbook containing only the updated sheet
|
||||
with pd.ExcelWriter(str(new_name), engine="openpyxl") as writer:
|
||||
df.to_excel(writer, sheet_name=target_sheet, index=False)
|
||||
print(f"[INFO] Also wrote updated data to new file '{new_name}'")
|
||||
except Exception as e:
|
||||
print(f"[WARNING] Could not write updated copy to new file: {e}")
|
||||
|
||||
|
||||
def main():
|
||||
print("[STEP 1] Fetching the bachelor study programme page (2025-2026)…")
|
||||
soup = fetch_html(UA_BA_URL)
|
||||
time.sleep(0.3)
|
||||
|
||||
print("[STEP 2] Parsing courses and metadata (this may take a few seconds)…")
|
||||
df = parse_courses_from_page(soup)
|
||||
if df.empty:
|
||||
print("[WARNING] No 2025-xxxx course rows found. The page structure may have changed.")
|
||||
print(" Please open the URL in a browser and check if '2025-2026' content is visible.")
|
||||
else:
|
||||
# Sanity: flag pillar rows (deel 3) visibly
|
||||
df["Pillar"] = df["Pillar"].replace({"": None})
|
||||
print(f"[INFO] Parsed {len(df)} course rows for 2025-2026.")
|
||||
|
||||
# Optional: give you a quick view in console
|
||||
head = df.head(10).to_string(index=False)
|
||||
print("[PREVIEW]\n" + head)
|
||||
|
||||
print(f"[STEP 3] Updating Excel: {EXCEL_PATH}")
|
||||
archive_and_write(EXCEL_PATH, df, TARGET_SHEET)
|
||||
|
||||
print("\nDone. You can now open the workbook and review the refreshed 'BAGES' sheet.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
# Try to save a copy of the corrected script beside the original.
|
||||
def save_copy(dest_name: str = "script_fixed.py"):
|
||||
try:
|
||||
import pathlib
|
||||
src = pathlib.Path(__file__)
|
||||
dst = src.with_name(dest_name)
|
||||
dst.write_text(src.read_text(encoding="utf-8"), encoding="utf-8")
|
||||
print(f"[INFO] Wrote a copy of this script to '{dst}'")
|
||||
except Exception as e:
|
||||
print(f"[WARNING] Could not write copy: {e}")
|
||||
|
||||
save_copy()
|
||||
326
ongeloofelijken tool/script_fixed.py
Normal file
326
ongeloofelijken tool/script_fixed.py
Normal file
@@ -0,0 +1,326 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Update 'BAGES' sheet in 'ongeloofelijken tool.xlsx' with the latest (2025-2026) bachelor History
|
||||
study programme from UAntwerpen. It scrapes the official page and writes a normalized table.
|
||||
|
||||
Source page (2025-2026 bachelor study programme):
|
||||
https://www.uantwerpen.be/nl/studeren/aanbod/alle-opleidingen/geschiedenis-studeren/bachelor/studieprogramma/
|
||||
- In 2025-2026 the 'Geschiedenis per periode en gebied' structure changed to a two-pillar model:
|
||||
* Chronologische pijler: 3 OOs (middeleeuwen, nieuwe tijd, nieuwste tijd)
|
||||
* Thematische pijler: 2 OOs
|
||||
(See faculty helpdesk note with change summary and transition measures.)
|
||||
|
||||
IMPORTANT:
|
||||
- This script only updates the 'BAGES' (Bachelor) sheet, because the provided link covers the bachelor page.
|
||||
- 'MAGES' and 'SPVP' sheets remain untouched.
|
||||
|
||||
Tested with: requests, beautifulsoup4, lxml, pandas, openpyxl
|
||||
"""
|
||||
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
import urllib.parse
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Optional, Tuple
|
||||
|
||||
import requests
|
||||
import pandas as pd
|
||||
from bs4 import BeautifulSoup
|
||||
from openpyxl import load_workbook
|
||||
from openpyxl.utils.exceptions import InvalidFileException
|
||||
|
||||
|
||||
# ------------------------- Configuration -------------------------
|
||||
EXCEL_PATH = "ongeloofelijken tool.xlsx"
|
||||
TARGET_SHEET = "BAGES"
|
||||
ARCHIVE_PREFIX = "BAGES_OLD_"
|
||||
UA_BA_URL = "https://www.uantwerpen.be/nl/studeren/aanbod/alle-opleidingen/geschiedenis-studeren/bachelor/studieprogramma/"
|
||||
TARGET_YEAR_PREFIX = "2025-" # Anchor/course URLs have '?id=<year>-<code>'; we filter with '2025-'
|
||||
TIMEOUT = 30
|
||||
HEADERS = {
|
||||
"User-Agent": "Mozilla/5.0 (compatible; uantwerpen-bages-updater/1.0; +https://www.uantwerpen.be/)",
|
||||
"Accept-Language": "nl,en;q=0.8"
|
||||
}
|
||||
|
||||
|
||||
# ------------------------- Helpers -------------------------
|
||||
def fetch_html(url: str) -> BeautifulSoup:
|
||||
resp = requests.get(url, headers=HEADERS, timeout=TIMEOUT)
|
||||
resp.raise_for_status()
|
||||
return BeautifulSoup(resp.text, "lxml")
|
||||
|
||||
|
||||
def extract_text(el) -> str:
|
||||
return re.sub(r"\s+", " ", " ".join(el.stripped_strings)) if el else ""
|
||||
|
||||
|
||||
def parse_meta_from_block(block_text: str) -> Tuple[Optional[str], Optional[str], Optional[str], Optional[str]]:
|
||||
"""
|
||||
Try to parse Semester, Credits, Lecturers, Notes from a block of text next to a course link.
|
||||
Returns (semester, credits, lecturers, notes)
|
||||
"""
|
||||
text = block_text
|
||||
|
||||
# Semester examples: '1E SEM', '2E SEM', '1E/2E SEM'
|
||||
sem = None
|
||||
m_sem = re.search(r"\b(1E\s*/\s*2E\s*SEM|1E\s*SEM|2E\s*SEM)\b", text, flags=re.I)
|
||||
if m_sem:
|
||||
sem = m_sem.group(1).upper().replace(" ", "")
|
||||
|
||||
# Credits examples: '6 studiepunten', '3 studiepunten'
|
||||
credits = None
|
||||
m_sp = re.search(r"(\d+)\s*studiepunten", text, flags=re.I)
|
||||
if m_sp:
|
||||
credits = m_sp.group(1)
|
||||
|
||||
# Lecturers: after 'Lesgever (s):' or 'Lesgever(s):'
|
||||
lecturers = None
|
||||
m_lect = re.search(r"Lesgever\s*\(s\)\s*:\s*([^|]+?)(?:\s{2,}|$)", text, flags=re.I)
|
||||
if not m_lect:
|
||||
m_lect = re.search(r"Lesgever[s]?\s*:\s*([^|]+?)(?:\s{2,}|$)", text, flags=re.I)
|
||||
if m_lect:
|
||||
lecturers = m_lect.group(1).strip(" .").replace(" ,", ",")
|
||||
|
||||
# Notes: look for two-yearly etc.
|
||||
notes = None
|
||||
if re.search(r"Tweejaarlijks", text, flags=re.I):
|
||||
# Try to capture the "even/oneven" phrasing
|
||||
m_ev = re.search(r"tweejaarlijks[^.]*?(even|oneven)[^.]*jaar", text, flags=re.I)
|
||||
notes = "Tweejaarlijks" + (f" ({m_ev.group(1).lower()} jaar)" if m_ev else "")
|
||||
|
||||
return sem, credits, lecturers, notes
|
||||
|
||||
|
||||
def nearest_sections(a_tag) -> Tuple[Optional[str], Optional[str], Optional[str]]:
|
||||
"""
|
||||
Find nearest preceding headings to classify the row.
|
||||
Returns (section, subsection, pillar)
|
||||
- section: e.g., 'Modeltraject deel 1/2/3'
|
||||
- subsection: e.g., 'Wijsbegeerte en sociale wetenschappen', 'Keuzeopleidingsonderdelen', 'Inleiding tot de geschiedenis', etc.
|
||||
- pillar: for deel 3: 'Chronologische pijler', 'Thematische pijler' or None
|
||||
"""
|
||||
# The site uses a variety of headings (h2, h3, h4); we trace back to find labels
|
||||
h = a_tag.find_previous(["h2", "h3", "h4", "h5"])
|
||||
section = subsection = pillar = None
|
||||
|
||||
# Walk up multiple previous headings to capture a hierarchy
|
||||
prev_heads = []
|
||||
cur = a_tag
|
||||
for _ in range(40): # limit walk to avoid infinite loops
|
||||
cur = cur.find_previous(["h2", "h3", "h4", "h5"])
|
||||
if not cur:
|
||||
break
|
||||
txt = extract_text(cur)
|
||||
prev_heads.append(txt)
|
||||
|
||||
# Determine labels from the nearest few headings
|
||||
for txt in prev_heads:
|
||||
t = txt.lower()
|
||||
if section is None and "modeltraject deel" in t:
|
||||
# Normalize like "Modeltraject deel 1"
|
||||
section = txt
|
||||
if subsection is None:
|
||||
# Typical subsections
|
||||
if any(k in t for k in [
|
||||
"wijsbegeerte en sociale wetenschappen",
|
||||
"methodologie van de geschiedenis",
|
||||
"historische oefeningen",
|
||||
"inleiding tot de geschiedenis",
|
||||
"heuristiek",
|
||||
"historisch overzicht",
|
||||
"keuzeopleidingsonderdelen",
|
||||
"sociale wetenschappen",
|
||||
]):
|
||||
subsection = txt
|
||||
if pillar is None and ("chronologische pijler" in t or "thematische pijler" in t):
|
||||
pillar = txt
|
||||
|
||||
if section and (subsection or pillar):
|
||||
# Good enough
|
||||
break
|
||||
|
||||
return section, subsection, pillar
|
||||
|
||||
|
||||
def parse_courses_from_page(soup: BeautifulSoup) -> pd.DataFrame:
|
||||
"""
|
||||
Parse all course links for the 2025-xxxx academic year, infer metadata from nearby text,
|
||||
and return a normalized DataFrame.
|
||||
"""
|
||||
rows = []
|
||||
|
||||
# Capture all anchors that look like course links containing '?id=2025-<CODE>'
|
||||
for a in soup.find_all("a", href=True):
|
||||
href = a["href"]
|
||||
# Normalize relative links
|
||||
full_url = urllib.parse.urljoin(UA_BA_URL, href)
|
||||
# Filter by the 'id=2025-' parameter (2025-2026)
|
||||
if "id=" in href:
|
||||
q = urllib.parse.urlparse(href).query
|
||||
params = urllib.parse.parse_qs(q)
|
||||
ids = params.get("id", [])
|
||||
if not ids:
|
||||
continue
|
||||
# Some pages use '2025-XXXXX' or '2025-XXXXX&lang=nl'
|
||||
if not any(idv.startswith(TARGET_YEAR_PREFIX) for idv in ids):
|
||||
continue
|
||||
course_id = ids[0] # e.g., '2025-1002FLWGES'
|
||||
else:
|
||||
# No id=... parameter; skip
|
||||
continue
|
||||
|
||||
# Extract code after '2025-'
|
||||
code = None
|
||||
m = re.match(r"2025-([A-Za-z0-9]+)", course_id)
|
||||
if m:
|
||||
code = m.group(1)
|
||||
|
||||
name = extract_text(a).strip()
|
||||
if not name or not code:
|
||||
continue
|
||||
|
||||
# Use a reasonably large ancestor block for metadata search
|
||||
container = a
|
||||
for _ in range(4):
|
||||
if container.parent:
|
||||
container = container.parent
|
||||
block_text = extract_text(container)
|
||||
|
||||
semester, credits, lecturers, notes = parse_meta_from_block(block_text)
|
||||
section, subsection, pillar = nearest_sections(a)
|
||||
|
||||
rows.append({
|
||||
"Section": section,
|
||||
"Subsection": subsection,
|
||||
"Pillar": pillar,
|
||||
"Course Code": code,
|
||||
"Course Name": name,
|
||||
"URL": full_url,
|
||||
"Semester": semester,
|
||||
"Credits": credits,
|
||||
"Lecturers": lecturers,
|
||||
"Notes": notes
|
||||
})
|
||||
|
||||
df = pd.DataFrame(rows).drop_duplicates(subset=["Course Code", "Course Name"])
|
||||
# Keep only rows that clearly belong to the 'Bachelor' page; sometimes cross-links appear
|
||||
# Heuristic: we keep rows with a Section that starts with "Modeltraject deel" or that have a Pillar marker
|
||||
mask = (
|
||||
df["Section"].fillna("").str.contains(r"Modeltraject deel", case=False) |
|
||||
df["Pillar"].fillna("").str.contains(r"Pijler", case=False)
|
||||
)
|
||||
df = df[mask].copy()
|
||||
|
||||
# Clean up text for consistency
|
||||
def clean_col(s):
|
||||
return s.str.replace(r"\s+", " ", regex=True).str.strip()
|
||||
|
||||
for col in ["Section", "Subsection", "Pillar", "Course Name", "Lecturers", "Notes"]:
|
||||
df[col] = clean_col(df[col].astype(str))
|
||||
|
||||
# Ensure missing pillar/subsection are empty strings for consistent sorting
|
||||
df["Pillar"] = df["Pillar"].fillna("")
|
||||
df["Subsection"] = df["Subsection"].fillna("")
|
||||
|
||||
# Sort for readability: section → pillar → subsection → name
|
||||
df.sort_values(
|
||||
by=["Section", "Pillar", "Subsection", "Course Name"],
|
||||
inplace=True
|
||||
)
|
||||
df.reset_index(drop=True, inplace=True)
|
||||
return df
|
||||
|
||||
|
||||
def archive_and_write(excel_path: str, df: pd.DataFrame, target_sheet: str):
|
||||
"""
|
||||
- If sheet 'BAGES' exists, rename it to 'BAGES_OLD_YYYYMMDD'
|
||||
- Write df to 'BAGES'
|
||||
"""
|
||||
try:
|
||||
wb = load_workbook(excel_path)
|
||||
except FileNotFoundError:
|
||||
print(f"[INFO] File not found, creating new workbook: {excel_path}")
|
||||
# Write a new file straight away
|
||||
with pd.ExcelWriter(excel_path, engine="openpyxl") as writer:
|
||||
df.to_excel(writer, sheet_name=target_sheet, index=False)
|
||||
return
|
||||
except InvalidFileException:
|
||||
print(f"[ERROR] Not a valid Excel file: {excel_path}")
|
||||
sys.exit(1)
|
||||
|
||||
# Rename existing BAGES to archive
|
||||
if target_sheet in wb.sheetnames:
|
||||
date_suffix = datetime.now().strftime("%Y%m%d")
|
||||
archive_name = ARCHIVE_PREFIX + date_suffix
|
||||
# Ensure uniqueness (append a counter if necessary)
|
||||
counter = 1
|
||||
final_archive = archive_name
|
||||
while final_archive in wb.sheetnames:
|
||||
counter += 1
|
||||
final_archive = f"{archive_name}_{counter}"
|
||||
ws = wb[target_sheet]
|
||||
ws.title = final_archive
|
||||
print(f"[INFO] Archived existing '{target_sheet}' as '{final_archive}'")
|
||||
|
||||
# Save intermediate
|
||||
wb.save(excel_path)
|
||||
|
||||
# Now write the new sheet
|
||||
with pd.ExcelWriter(excel_path, engine="openpyxl", mode="a", if_sheet_exists="overlay") as writer:
|
||||
df.to_excel(writer, sheet_name=target_sheet, index=False)
|
||||
|
||||
print(f"[SUCCESS] Wrote updated '{target_sheet}' sheet to '{excel_path}'")
|
||||
# Also save the updated DataFrame to a separate new Excel file for convenience
|
||||
try:
|
||||
src = Path(excel_path)
|
||||
new_name = src.with_name(f"{src.stem}_updated{src.suffix}")
|
||||
# Write a fresh workbook containing only the updated sheet
|
||||
with pd.ExcelWriter(str(new_name), engine="openpyxl") as writer:
|
||||
df.to_excel(writer, sheet_name=target_sheet, index=False)
|
||||
print(f"[INFO] Also wrote updated data to new file '{new_name}'")
|
||||
except Exception as e:
|
||||
print(f"[WARNING] Could not write updated copy to new file: {e}")
|
||||
|
||||
|
||||
def main():
|
||||
print("[STEP 1] Fetching the bachelor study programme page (2025-2026)…")
|
||||
soup = fetch_html(UA_BA_URL)
|
||||
time.sleep(0.3)
|
||||
|
||||
print("[STEP 2] Parsing courses and metadata (this may take a few seconds)…")
|
||||
df = parse_courses_from_page(soup)
|
||||
if df.empty:
|
||||
print("[WARNING] No 2025-xxxx course rows found. The page structure may have changed.")
|
||||
print(" Please open the URL in a browser and check if '2025-2026' content is visible.")
|
||||
else:
|
||||
# Sanity: flag pillar rows (deel 3) visibly
|
||||
df["Pillar"] = df["Pillar"].replace({"": None})
|
||||
print(f"[INFO] Parsed {len(df)} course rows for 2025-2026.")
|
||||
|
||||
# Optional: give you a quick view in console
|
||||
head = df.head(10).to_string(index=False)
|
||||
print("[PREVIEW]\n" + head)
|
||||
|
||||
print(f"[STEP 3] Updating Excel: {EXCEL_PATH}")
|
||||
archive_and_write(EXCEL_PATH, df, TARGET_SHEET)
|
||||
|
||||
print("\nDone. You can now open the workbook and review the refreshed 'BAGES' sheet.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
# Try to save a copy of the corrected script beside the original.
|
||||
def save_copy(dest_name: str = "script_fixed.py"):
|
||||
try:
|
||||
import pathlib
|
||||
src = pathlib.Path(__file__)
|
||||
dst = src.with_name(dest_name)
|
||||
dst.write_text(src.read_text(encoding="utf-8"), encoding="utf-8")
|
||||
print(f"[INFO] Wrote a copy of this script to '{dst}'")
|
||||
except Exception as e:
|
||||
print(f"[WARNING] Could not write copy: {e}")
|
||||
|
||||
save_copy()
|
||||
BIN
ongeloofelijken tool/updated
Normal file
BIN
ongeloofelijken tool/updated
Normal file
Binary file not shown.
231
random/archief/examenrooster pre-syllabus (2).xls
Normal file
231
random/archief/examenrooster pre-syllabus (2).xls
Normal file
@@ -0,0 +1,231 @@
|
||||
<!DOCTYPE html>
|
||||
<html class='pc chrome win psc_dir-ltr psc_form-xlarge' dir='ltr' lang='nl'>
|
||||
<!-- Copyright (c) 2000, 2022, Oracle and/or its affiliates. -->
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" /><body><table border='1' cellpadding='3' cellspacing='0'>
|
||||
<tr>
|
||||
<th>Studiegidsnr.</th><th>Omschrijving</th><th>Docenten</th><th>SP</th><th>Aant. inschr.</th><th>Examenvorm</th><th>Tijdslots aanvr.SSS</th><th>Examen groep</th><th>Aant. stdnt. gr.</th><th>Datum S+</th><th>Beginuur S+</th><th>Einduur S+</th><th>Facilitieit S+</th><th>Code examenrooster</th><th>Extra info voor studenten</th></tr>
|
||||
<tr>
|
||||
<td >2002FLWGES</td>
|
||||
<td >Historisch atelier</td>
|
||||
<td >Titularis Beyen,Marnix</td>
|
||||
<td >6,00</td>
|
||||
<td >2</td>
|
||||
<td >Mondeling</td>
|
||||
<td >N</td>
|
||||
<td >1</td>
|
||||
<td >2</td>
|
||||
<td >22/08/2025</td>
|
||||
<td >09:00</td>
|
||||
<td >13:00</td>
|
||||
<td ></td>
|
||||
<td >2002FLWGES7852_224003_M1_1</td>
|
||||
<td ></td>
|
||||
<tr>
|
||||
<td >2004FLWGES</td>
|
||||
<td >Theorie van de hist. kennis</td>
|
||||
<td >Titularis De Munck,Bert</td>
|
||||
<td >6,00</td>
|
||||
<td >8</td>
|
||||
<td >Mondeling</td>
|
||||
<td >N</td>
|
||||
<td >1</td>
|
||||
<td >8</td>
|
||||
<td >25/08/2025</td>
|
||||
<td >08:30</td>
|
||||
<td >12:30</td>
|
||||
<td ></td>
|
||||
<td >2004FLWGES8023_224003_M1_1</td>
|
||||
<td ></td>
|
||||
<tr>
|
||||
<td >2013FLWGES</td>
|
||||
<td >Landschapsgeschiedenis</td>
|
||||
<td >Titularis Soens,Tim - Titularis Jongepier,Iason</td>
|
||||
<td >6,00</td>
|
||||
<td >2</td>
|
||||
<td >Mondeling</td>
|
||||
<td >N</td>
|
||||
<td >1</td>
|
||||
<td >2</td>
|
||||
<td >21/08/2025</td>
|
||||
<td >14:00</td>
|
||||
<td >17:00</td>
|
||||
<td ></td>
|
||||
<td >2013FLWGES1238_224003_M1_1</td>
|
||||
<td ></td>
|
||||
<tr>
|
||||
<td >2064FLWGES</td>
|
||||
<td >Politieke Geschiedenis: thema</td>
|
||||
<td >Titularis de Smaele,Henk</td>
|
||||
<td >6,00</td>
|
||||
<td >3</td>
|
||||
<td >Mondeling</td>
|
||||
<td >N</td>
|
||||
<td >1</td>
|
||||
<td >3</td>
|
||||
<td ></td>
|
||||
<td ></td>
|
||||
<td ></td>
|
||||
<td ></td>
|
||||
<td >2064FLWGES16077_224003_M1_1</td>
|
||||
<td >niet roosteren, studenten vragen docenten te contacteren</td>
|
||||
<tr>
|
||||
<td >2041FLWGES</td>
|
||||
<td >Theorie en gesch stedenbouw</td>
|
||||
<td >Titularis De Block,Greet</td>
|
||||
<td >6,00</td>
|
||||
<td >2</td>
|
||||
<td >Mondeling</td>
|
||||
<td >N</td>
|
||||
<td >1</td>
|
||||
<td >2</td>
|
||||
<td >01/09/2025</td>
|
||||
<td >09:30</td>
|
||||
<td >12:00</td>
|
||||
<td ></td>
|
||||
<td >2041FLWGES15506_224003_M1_1</td>
|
||||
<td ></td>
|
||||
<tr>
|
||||
<td >2066FLWGES</td>
|
||||
<td >War and Occupation Middle East</td>
|
||||
<td >Co-Titularis Sayim,Burak<br />
|
||||
Titularis Shaery-Yazdi,Roschanack<br />
|
||||
Titularis Beyen,Marnix</td>
|
||||
<td >6,00</td>
|
||||
<td ></td>
|
||||
<td >Mondeling</td>
|
||||
<td >N</td>
|
||||
<td >1</td>
|
||||
<td ></td>
|
||||
<td ></td>
|
||||
<td ></td>
|
||||
<td ></td>
|
||||
<td ></td>
|
||||
<td >2066FLWGES16110_224003_M1_2</td>
|
||||
<td ></td>
|
||||
<tr>
|
||||
<td >2046FLWGES</td>
|
||||
<td >Stage Cultureel Erfgoed</td>
|
||||
<td >Titularis Delsaerdt,Pierre</td>
|
||||
<td >6,00</td>
|
||||
<td ></td>
|
||||
<td >Mondeling</td>
|
||||
<td >N</td>
|
||||
<td >1</td>
|
||||
<td ></td>
|
||||
<td ></td>
|
||||
<td ></td>
|
||||
<td ></td>
|
||||
<td ></td>
|
||||
<td >2046FLWGES15618_224003_M1_1</td>
|
||||
<td ></td>
|
||||
<tr>
|
||||
<td >2053FLWGES</td>
|
||||
<td >Urban History and Theory</td>
|
||||
<td >Titularis Van Damme,Ilja</td>
|
||||
<td >6,00</td>
|
||||
<td >1</td>
|
||||
<td >Mondeling</td>
|
||||
<td >N</td>
|
||||
<td >1</td>
|
||||
<td >1</td>
|
||||
<td >02/09/2025</td>
|
||||
<td >09:00</td>
|
||||
<td >12:30</td>
|
||||
<td ></td>
|
||||
<td >2053FLWGES15754_224003_M1_1</td>
|
||||
<td ></td>
|
||||
<tr>
|
||||
<td >2063FLWGES</td>
|
||||
<td >Cultureel erfgoed</td>
|
||||
<td >Titularis Shaery-Yazdi,Roschanack - Titularis De Roo,Bas</td>
|
||||
<td >6,00</td>
|
||||
<td >1</td>
|
||||
<td >Mondeling</td>
|
||||
<td >N</td>
|
||||
<td >1</td>
|
||||
<td >1</td>
|
||||
<td >04/09/2025</td>
|
||||
<td >10:00</td>
|
||||
<td >12:30</td>
|
||||
<td ></td>
|
||||
<td >2063FLWGES16071_224003_M1_1</td>
|
||||
<td ></td>
|
||||
<tr>
|
||||
<td >2066FLWGES</td>
|
||||
<td >War and Occupation Middle East</td>
|
||||
<td >Co-Titularis Sayim,Burak<br />
|
||||
Titularis Shaery-Yazdi,Roschanack<br />
|
||||
Titularis Beyen,Marnix</td>
|
||||
<td >6,00</td>
|
||||
<td ></td>
|
||||
<td >PC examen</td>
|
||||
<td >N</td>
|
||||
<td >1</td>
|
||||
<td ></td>
|
||||
<td ></td>
|
||||
<td ></td>
|
||||
<td ></td>
|
||||
<td ></td>
|
||||
<td >2066FLWGES16110_224003_D1_1</td>
|
||||
<td ></td>
|
||||
<tr>
|
||||
<td >2065FLWGES</td>
|
||||
<td >Cultureel erfgoed</td>
|
||||
<td >Titularis Delsaerdt,Pierre</td>
|
||||
<td >6,00</td>
|
||||
<td ></td>
|
||||
<td >Schriftelijk</td>
|
||||
<td >N</td>
|
||||
<td >1</td>
|
||||
<td ></td>
|
||||
<td ></td>
|
||||
<td ></td>
|
||||
<td ></td>
|
||||
<td ></td>
|
||||
<td >2065FLWGES16091_224003_S1_1</td>
|
||||
<td ></td>
|
||||
<tr>
|
||||
<td >2045FLWGES</td>
|
||||
<td >Masterproef</td>
|
||||
<td >Co-Titularis Vermoesen,Reinoud - Titularis NNB,- - Medewerker NNB,</td>
|
||||
<td >18,00</td>
|
||||
<td >18</td>
|
||||
<td >Schriftelijk</td>
|
||||
<td >N</td>
|
||||
<td >1</td>
|
||||
<td >18</td>
|
||||
<td >18/08/2025</td>
|
||||
<td >09:00</td>
|
||||
<td >16:00</td>
|
||||
<td ></td>
|
||||
<td >2045FLWGES15597_224003_S1_1</td>
|
||||
<td ></td>
|
||||
<tr>
|
||||
<td >2023FLWGES</td>
|
||||
<td >Hist Body, Gender, Sexuality</td>
|
||||
<td >Titularis de Smaele,Henk</td>
|
||||
<td >6,00</td>
|
||||
<td >3</td>
|
||||
<td >Schriftelijk</td>
|
||||
<td >N</td>
|
||||
<td >1</td>
|
||||
<td >3</td>
|
||||
<td >28/08/2025</td>
|
||||
<td >13:30</td>
|
||||
<td >16:30</td>
|
||||
<td ></td>
|
||||
<td >2023FLWGES15226_224003_S1_1</td>
|
||||
<td ></td>
|
||||
<tr>
|
||||
<td >2073FLWGES</td>
|
||||
<td >Joodse Geschiedenis NT</td>
|
||||
<td >Titularis Dunkelgrün,Theodor</td>
|
||||
<td >6,00</td>
|
||||
<td >1</td>
|
||||
<td >Schriftelijk</td>
|
||||
<td >N</td>
|
||||
<td >1</td>
|
||||
<td >1</td>
|
||||
<td >26/08/2025</td>
|
||||
<td >08:00</td>
|
||||
<td >12:00</td>
|
||||
324
random/archief/examenrooster pre-syllabus.xls
Normal file
324
random/archief/examenrooster pre-syllabus.xls
Normal file
@@ -0,0 +1,324 @@
|
||||
<html xmlns:v="urn:schemas-microsoft-com:vml"
|
||||
xmlns:o="urn:schemas-microsoft-com:office:office"
|
||||
xmlns:x="urn:schemas-microsoft-com:office:excel"
|
||||
xmlns="http://www.w3.org/TR/REC-html40">
|
||||
|
||||
<head>
|
||||
<meta name="Excel Workbook Frameset">
|
||||
<meta http-equiv=Content-Type content="text/html; charset=utf-8">
|
||||
<meta name=ProgId content=Excel.Sheet>
|
||||
<meta name=Generator content="Microsoft Excel 15">
|
||||
<link rel=File-List href="examenrooster%20pre-syllabus_files/filelist.xml">
|
||||
<![if !supportTabStrip]>
|
||||
<link id="shLink" href="examenrooster%20pre-syllabus_files/sheet001.htm">
|
||||
|
||||
<link id="shLink">
|
||||
|
||||
<script language="JavaScript">
|
||||
<!--
|
||||
var c_lTabs=1;
|
||||
|
||||
var c_rgszSh=new Array(c_lTabs);
|
||||
c_rgszSh[0] = "examenrooster pre-syllabus";
|
||||
|
||||
|
||||
|
||||
var c_rgszClr=new Array(8);
|
||||
c_rgszClr[0]="window";
|
||||
c_rgszClr[1]="buttonface";
|
||||
c_rgszClr[2]="windowframe";
|
||||
c_rgszClr[3]="windowtext";
|
||||
c_rgszClr[4]="threedlightshadow";
|
||||
c_rgszClr[5]="threedhighlight";
|
||||
c_rgszClr[6]="threeddarkshadow";
|
||||
c_rgszClr[7]="threedshadow";
|
||||
|
||||
var g_iShCur;
|
||||
var g_rglTabX=new Array(c_lTabs);
|
||||
|
||||
function fnGetIEVer()
|
||||
{
|
||||
var ua=window.navigator.userAgent
|
||||
var msie=ua.indexOf("MSIE")
|
||||
if (msie>0 && window.navigator.platform=="Win32")
|
||||
return parseInt(ua.substring(msie+5,ua.indexOf(".", msie)));
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
function fnBuildFrameset()
|
||||
{
|
||||
var szHTML="<frameset rows=\"*,18\" border=0 width=0 frameborder=no framespacing=0>"+
|
||||
"<frame src=\""+document.all.item("shLink")[0].href+"\" name=\"frSheet\" noresize>"+
|
||||
"<frameset cols=\"54,*\" border=0 width=0 frameborder=no framespacing=0>"+
|
||||
"<frame src=\"\" name=\"frScroll\" marginwidth=0 marginheight=0 scrolling=no>"+
|
||||
"<frame src=\"\" name=\"frTabs\" marginwidth=0 marginheight=0 scrolling=no>"+
|
||||
"</frameset></frameset><plaintext>";
|
||||
|
||||
with (document) {
|
||||
open("text/html","replace");
|
||||
write(szHTML);
|
||||
close();
|
||||
}
|
||||
|
||||
fnBuildTabStrip();
|
||||
}
|
||||
|
||||
function fnBuildTabStrip()
|
||||
{
|
||||
var szHTML=
|
||||
"<html><head><style>.clScroll {font:8pt Courier New;color:"+c_rgszClr[6]+";cursor:default;line-height:10pt;}"+
|
||||
".clScroll2 {font:10pt Arial;color:"+c_rgszClr[6]+";cursor:default;line-height:11pt;}</style></head>"+
|
||||
"<body onclick=\"event.returnValue=false;\" ondragstart=\"event.returnValue=false;\" onselectstart=\"event.returnValue=false;\" bgcolor="+c_rgszClr[4]+" topmargin=0 leftmargin=0><table cellpadding=0 cellspacing=0 width=100%>"+
|
||||
"<tr><td colspan=6 height=1 bgcolor="+c_rgszClr[2]+"></td></tr>"+
|
||||
"<tr><td style=\"font:1pt\"> <td>"+
|
||||
"<td valign=top id=tdScroll class=\"clScroll\" onclick=\"parent.fnFastScrollTabs(0);\" onmouseover=\"parent.fnMouseOverScroll(0);\" onmouseout=\"parent.fnMouseOutScroll(0);\"><a>«</a></td>"+
|
||||
"<td valign=top id=tdScroll class=\"clScroll2\" onclick=\"parent.fnScrollTabs(0);\" ondblclick=\"parent.fnScrollTabs(0);\" onmouseover=\"parent.fnMouseOverScroll(1);\" onmouseout=\"parent.fnMouseOutScroll(1);\"><a><</a></td>"+
|
||||
"<td valign=top id=tdScroll class=\"clScroll2\" onclick=\"parent.fnScrollTabs(1);\" ondblclick=\"parent.fnScrollTabs(1);\" onmouseover=\"parent.fnMouseOverScroll(2);\" onmouseout=\"parent.fnMouseOutScroll(2);\"><a>></a></td>"+
|
||||
"<td valign=top id=tdScroll class=\"clScroll\" onclick=\"parent.fnFastScrollTabs(1);\" onmouseover=\"parent.fnMouseOverScroll(3);\" onmouseout=\"parent.fnMouseOutScroll(3);\"><a>»</a></td>"+
|
||||
"<td style=\"font:1pt\"> <td></tr></table></body></html>";
|
||||
|
||||
with (frames['frScroll'].document) {
|
||||
open("text/html","replace");
|
||||
write(szHTML);
|
||||
close();
|
||||
}
|
||||
|
||||
szHTML =
|
||||
"<html><head>"+
|
||||
"<style>A:link,A:visited,A:active {text-decoration:none;"+"color:"+c_rgszClr[3]+";}"+
|
||||
".clTab {cursor:hand;background:"+c_rgszClr[1]+";font:9pt Arial;padding-left:3px;padding-right:3px;text-align:center;}"+
|
||||
".clBorder {background:"+c_rgszClr[2]+";font:1pt;}"+
|
||||
"</style></head><body onload=\"parent.fnInit();\" onselectstart=\"event.returnValue=false;\" ondragstart=\"event.returnValue=false;\" bgcolor="+c_rgszClr[4]+
|
||||
" topmargin=0 leftmargin=0><table id=tbTabs cellpadding=0 cellspacing=0>";
|
||||
|
||||
var iCellCount=(c_lTabs+1)*2;
|
||||
|
||||
var i;
|
||||
for (i=0;i<iCellCount;i+=2)
|
||||
szHTML+="<col width=1><col>";
|
||||
|
||||
var iRow;
|
||||
for (iRow=0;iRow<6;iRow++) {
|
||||
|
||||
szHTML+="<tr>";
|
||||
|
||||
if (iRow==5)
|
||||
szHTML+="<td colspan="+iCellCount+"></td>";
|
||||
else {
|
||||
if (iRow==0) {
|
||||
for(i=0;i<iCellCount;i++)
|
||||
szHTML+="<td height=1 class=\"clBorder\"></td>";
|
||||
} else if (iRow==1) {
|
||||
for(i=0;i<c_lTabs;i++) {
|
||||
szHTML+="<td height=1 nowrap class=\"clBorder\"> </td>";
|
||||
szHTML+=
|
||||
"<td id=tdTab height=1 nowrap class=\"clTab\" onmouseover=\"parent.fnMouseOverTab("+i+");\" onmouseout=\"parent.fnMouseOutTab("+i+");\">"+
|
||||
"<a href=\""+document.all.item("shLink")[i].href+"\" target=\"frSheet\" id=aTab> "+c_rgszSh[i]+" </a></td>";
|
||||
}
|
||||
szHTML+="<td id=tdTab height=1 nowrap class=\"clBorder\"><a id=aTab> </a></td><td width=100%></td>";
|
||||
} else if (iRow==2) {
|
||||
for (i=0;i<c_lTabs;i++)
|
||||
szHTML+="<td height=1></td><td height=1 class=\"clBorder\"></td>";
|
||||
szHTML+="<td height=1></td><td height=1></td>";
|
||||
} else if (iRow==3) {
|
||||
for (i=0;i<iCellCount;i++)
|
||||
szHTML+="<td height=1></td>";
|
||||
} else if (iRow==4) {
|
||||
for (i=0;i<c_lTabs;i++)
|
||||
szHTML+="<td height=1 width=1></td><td height=1></td>";
|
||||
szHTML+="<td height=1 width=1></td><td></td>";
|
||||
}
|
||||
}
|
||||
szHTML+="</tr>";
|
||||
}
|
||||
|
||||
szHTML+="</table></body></html>";
|
||||
with (frames['frTabs'].document) {
|
||||
open("text/html","replace");
|
||||
charset=document.charset;
|
||||
write(szHTML);
|
||||
close();
|
||||
}
|
||||
}
|
||||
|
||||
function fnInit()
|
||||
{
|
||||
g_rglTabX[0]=0;
|
||||
var i;
|
||||
for (i=1;i<=c_lTabs;i++)
|
||||
with (frames['frTabs'].document.all.tbTabs.rows[1].cells[fnTabToCol(i-1)])
|
||||
g_rglTabX[i]=offsetLeft+offsetWidth-6;
|
||||
}
|
||||
|
||||
function fnTabToCol(iTab)
|
||||
{
|
||||
return 2*iTab+1;
|
||||
}
|
||||
|
||||
function fnNextTab(fDir)
|
||||
{
|
||||
var iNextTab=-1;
|
||||
var i;
|
||||
|
||||
with (frames['frTabs'].document.body) {
|
||||
if (fDir==0) {
|
||||
if (scrollLeft>0) {
|
||||
for (i=0;i<c_lTabs&&g_rglTabX[i]<scrollLeft;i++);
|
||||
if (i<c_lTabs)
|
||||
iNextTab=i-1;
|
||||
}
|
||||
} else {
|
||||
if (g_rglTabX[c_lTabs]+6>offsetWidth+scrollLeft) {
|
||||
for (i=0;i<c_lTabs&&g_rglTabX[i]<=scrollLeft;i++);
|
||||
if (i<c_lTabs)
|
||||
iNextTab=i;
|
||||
}
|
||||
}
|
||||
}
|
||||
return iNextTab;
|
||||
}
|
||||
|
||||
function fnScrollTabs(fDir)
|
||||
{
|
||||
var iNextTab=fnNextTab(fDir);
|
||||
|
||||
if (iNextTab>=0) {
|
||||
frames['frTabs'].scroll(g_rglTabX[iNextTab],0);
|
||||
return true;
|
||||
} else
|
||||
return false;
|
||||
}
|
||||
|
||||
function fnFastScrollTabs(fDir)
|
||||
{
|
||||
if (c_lTabs>16)
|
||||
frames['frTabs'].scroll(g_rglTabX[fDir?c_lTabs-1:0],0);
|
||||
else
|
||||
if (fnScrollTabs(fDir)>0) window.setTimeout("fnFastScrollTabs("+fDir+");",5);
|
||||
}
|
||||
|
||||
function fnSetTabProps(iTab,fActive)
|
||||
{
|
||||
var iCol=fnTabToCol(iTab);
|
||||
var i;
|
||||
|
||||
if (iTab>=0) {
|
||||
with (frames['frTabs'].document.all) {
|
||||
with (tbTabs) {
|
||||
for (i=0;i<=4;i++) {
|
||||
with (rows[i]) {
|
||||
if (i==0)
|
||||
cells[iCol].style.background=c_rgszClr[fActive?0:2];
|
||||
else if (i>0 && i<4) {
|
||||
if (fActive) {
|
||||
cells[iCol-1].style.background=c_rgszClr[2];
|
||||
cells[iCol].style.background=c_rgszClr[0];
|
||||
cells[iCol+1].style.background=c_rgszClr[2];
|
||||
} else {
|
||||
if (i==1) {
|
||||
cells[iCol-1].style.background=c_rgszClr[2];
|
||||
cells[iCol].style.background=c_rgszClr[1];
|
||||
cells[iCol+1].style.background=c_rgszClr[2];
|
||||
} else {
|
||||
cells[iCol-1].style.background=c_rgszClr[4];
|
||||
cells[iCol].style.background=c_rgszClr[(i==2)?2:4];
|
||||
cells[iCol+1].style.background=c_rgszClr[4];
|
||||
}
|
||||
}
|
||||
} else
|
||||
cells[iCol].style.background=c_rgszClr[fActive?2:4];
|
||||
}
|
||||
}
|
||||
}
|
||||
with (aTab[iTab].style) {
|
||||
cursor=(fActive?"default":"hand");
|
||||
color=c_rgszClr[3];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function fnMouseOverScroll(iCtl)
|
||||
{
|
||||
frames['frScroll'].document.all.tdScroll[iCtl].style.color=c_rgszClr[7];
|
||||
}
|
||||
|
||||
function fnMouseOutScroll(iCtl)
|
||||
{
|
||||
frames['frScroll'].document.all.tdScroll[iCtl].style.color=c_rgszClr[6];
|
||||
}
|
||||
|
||||
function fnMouseOverTab(iTab)
|
||||
{
|
||||
if (iTab!=g_iShCur) {
|
||||
var iCol=fnTabToCol(iTab);
|
||||
with (frames['frTabs'].document.all) {
|
||||
tdTab[iTab].style.background=c_rgszClr[5];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function fnMouseOutTab(iTab)
|
||||
{
|
||||
if (iTab>=0) {
|
||||
var elFrom=frames['frTabs'].event.srcElement;
|
||||
var elTo=frames['frTabs'].event.toElement;
|
||||
|
||||
if ((!elTo) ||
|
||||
(elFrom.tagName==elTo.tagName) ||
|
||||
(elTo.tagName=="A" && elTo.parentElement!=elFrom) ||
|
||||
(elFrom.tagName=="A" && elFrom.parentElement!=elTo)) {
|
||||
|
||||
if (iTab!=g_iShCur) {
|
||||
with (frames['frTabs'].document.all) {
|
||||
tdTab[iTab].style.background=c_rgszClr[1];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function fnSetActiveSheet(iSh)
|
||||
{
|
||||
if (iSh!=g_iShCur) {
|
||||
fnSetTabProps(g_iShCur,false);
|
||||
fnSetTabProps(iSh,true);
|
||||
g_iShCur=iSh;
|
||||
}
|
||||
}
|
||||
|
||||
window.g_iIEVer=fnGetIEVer();
|
||||
if (window.g_iIEVer>=4)
|
||||
fnBuildFrameset();
|
||||
//-->
|
||||
</script>
|
||||
<![endif]><!--[if gte mso 9]><xml>
|
||||
<x:ExcelWorkbook>
|
||||
<x:ExcelWorksheets>
|
||||
<x:ExcelWorksheet>
|
||||
<x:Name>examenrooster pre-syllabus</x:Name>
|
||||
<x:WorksheetSource HRef="examenrooster%20pre-syllabus_files/sheet001.htm"/>
|
||||
</x:ExcelWorksheet>
|
||||
</x:ExcelWorksheets>
|
||||
<x:Stylesheet HRef="examenrooster%20pre-syllabus_files/stylesheet.css"/>
|
||||
<x:WindowHeight>8676</x:WindowHeight>
|
||||
<x:WindowWidth>23040</x:WindowWidth>
|
||||
<x:WindowTopX>32767</x:WindowTopX>
|
||||
<x:WindowTopY>32767</x:WindowTopY>
|
||||
<x:ProtectStructure>False</x:ProtectStructure>
|
||||
<x:ProtectWindows>False</x:ProtectWindows>
|
||||
</x:ExcelWorkbook>
|
||||
</xml><![endif]-->
|
||||
</head>
|
||||
|
||||
<frameset rows="*,39" border=0 width=0 frameborder=no framespacing=0>
|
||||
<frame src="examenrooster%20pre-syllabus_files/sheet001.htm" name="frSheet">
|
||||
<frame src="examenrooster%20pre-syllabus_files/tabstrip.htm" name="frTabs" marginwidth=0 marginheight=0>
|
||||
<noframes>
|
||||
<body>
|
||||
<p>This page uses frames, but your browser doesn't support them.</p>
|
||||
</body>
|
||||
</noframes>
|
||||
</frameset>
|
||||
</html>
|
||||
31
random/bascriptiegroepennaarexamengroepen.py
Normal file
31
random/bascriptiegroepennaarexamengroepen.py
Normal file
@@ -0,0 +1,31 @@
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def assign_exam_group(value):
|
||||
mapping = {
|
||||
"Hoofd versus hand: de waardering van ambacht en ambachtelijkheid herbekeken, 1500-2024": 6,
|
||||
"De wereld van een koopman in de briefwisseling van Henri-François Schilders, tweede helft zeventiende eeuw": 1,
|
||||
"Hoe overleef ik een revolutie? Huishoudens en hun overlevingsstrategieën in een tijd van polarisatie en verandering (1750-1850)": 5,
|
||||
"Komt dat zien! Het theatrale uitgaansleven in België in de negentiende en vroege twintigste eeuw (1830-1930)": 4 ,
|
||||
"Erfenisaangiften als venster op de Antwerpse samenleving, 1835 – 1912": 3,
|
||||
"Sporen van gulden en franken. De geldzaken van huishoudens in de twintigste eeuw": 2,
|
||||
# Add more mappings as needed
|
||||
}
|
||||
return mapping.get(value, 0) # Default to 0 if no match
|
||||
|
||||
|
||||
def process_excel(file_path):
|
||||
df = pd.read_excel(file_path, sheet_name="Sheet1")
|
||||
|
||||
if 'Groep' not in df.columns:
|
||||
raise ValueError("Column 'Groep' not found in the Excel file.")
|
||||
|
||||
df['Examengroep'] = df['Groep'].apply(assign_exam_group)
|
||||
|
||||
output_file = "processed_" + file_path
|
||||
df.to_excel(output_file, index=False)
|
||||
print(f"Processed file saved as {output_file}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
process_excel("bascriptie groepen.xlsx")
|
||||
@@ -128,4 +128,4 @@ def compare_roosters(base_file, comparison_file, output_file):
|
||||
|
||||
|
||||
# Example usage:
|
||||
compare_roosters('afgewerkte.xlsx', 'bages rooster voor s.xlsx', 'differences_output.xlsx')
|
||||
compare_roosters('examenrooster post-syllabus.xlsx', 'examenrooster pre-syllabus.xlsx', 'differences_output.xlsx')
|
||||
|
||||
@@ -2,65 +2,53 @@ import pandas as pd
|
||||
from datetime import datetime
|
||||
import locale
|
||||
|
||||
file_path = 'bages rooster voor s.xlsx'
|
||||
file_path = 'examenrooster pre-syllabus.xlsx'
|
||||
sheet_name = 'rooster'
|
||||
|
||||
df = pd.read_excel(file_path, sheet_name=sheet_name)
|
||||
|
||||
date_ranges = {
|
||||
(pd.Timestamp('2025-01-06'), pd.Timestamp('2025-01-12')): 16,
|
||||
(pd.Timestamp('2025-01-13'), pd.Timestamp('2025-01-19')): 17,
|
||||
(pd.Timestamp('2025-01-20'), pd.Timestamp('2025-01-26')): 18,
|
||||
(pd.Timestamp('2025-01-27'), pd.Timestamp('2025-02-02')): 19,
|
||||
|
||||
# add more ranges as needed
|
||||
(pd.Timestamp('2025-05-26'), pd.Timestamp('2025-06-01')): 36,
|
||||
(pd.Timestamp('2025-06-02'), pd.Timestamp('2025-06-08')): 37,
|
||||
(pd.Timestamp('2025-06-09'), pd.Timestamp('2025-06-15')): 38,
|
||||
(pd.Timestamp('2025-06-16'), pd.Timestamp('2025-06-22')): 39,
|
||||
(pd.Timestamp('2025-08-18'), pd.Timestamp('2025-08-24')): 48,
|
||||
(pd.Timestamp('2025-08-25'), pd.Timestamp('2025-08-31')): 49,
|
||||
(pd.Timestamp('2025-09-01'), pd.Timestamp('2025-09-06')): 50,
|
||||
}
|
||||
|
||||
|
||||
# Custom date parser function
|
||||
def parse_custom_date(date_str):
|
||||
if pd.isna(date_str):
|
||||
return pd.NaT # Return pandas NaT for missing dates
|
||||
return pd.NaT
|
||||
if isinstance(date_str, pd.Timestamp):
|
||||
return date_str
|
||||
if isinstance(date_str, str):
|
||||
try:
|
||||
# Set locale to Dutch
|
||||
locale.setlocale(locale.LC_TIME, 'nl_NL.UTF-8')
|
||||
return datetime.strptime(date_str, '%A %d %B %Y')
|
||||
except ValueError as e:
|
||||
raise ValueError(f"Date conversion error: {e} for date string: {date_str}")
|
||||
finally:
|
||||
# Reset locale to the default setting
|
||||
locale.setlocale(locale.LC_TIME, 'C')
|
||||
else:
|
||||
raise TypeError(f"Expected string, got {type(date_str).__name__}: {date_str}")
|
||||
raise TypeError(f"Expected string or Timestamp, got {type(date_str).__name__}: {date_str}")
|
||||
|
||||
def update_lesweek(date):
|
||||
if pd.isna(date):
|
||||
return 0
|
||||
for date_range, lesweek_value in date_ranges.items():
|
||||
if date_range[0] <= date <= date_range[1]:
|
||||
return lesweek_value
|
||||
return 0
|
||||
|
||||
# Ensure the column 'Datum S+' exists and is processed correctly
|
||||
if 'Datum S+' in df.columns:
|
||||
if 'Datum' in df.columns:
|
||||
try:
|
||||
# Convert 'Datum S+' column to datetime using the custom parser
|
||||
df['Datum S+'] = df['Datum S+'].apply(parse_custom_date)
|
||||
df['Datum'] = df['Datum'].apply(parse_custom_date)
|
||||
print(df['Datum'].apply(type).value_counts()) # Debug: print types after parsing
|
||||
except (ValueError, TypeError) as e:
|
||||
print(f"Error: {e}")
|
||||
# Optionally, re-raise the exception if you want to stop execution
|
||||
raise
|
||||
df['Lesweek'] = df['Datum'].apply(update_lesweek)
|
||||
|
||||
|
||||
# Function to update Lesweek based on date ranges
|
||||
def update_lesweek(date):
|
||||
if pd.isna(date): # Handle NaT values
|
||||
return 0
|
||||
for date_range, lesweek_value in date_ranges.items():
|
||||
if date_range[0] <= date <= date_range[1]:
|
||||
return lesweek_value
|
||||
return 0 # Default value if date doesn't fall in any range
|
||||
|
||||
|
||||
# Apply the function to 'Datum S+' column
|
||||
df['Lesweek'] = df['Datum S+'].apply(update_lesweek)
|
||||
|
||||
# Check the results
|
||||
print("\nFirst few rows of the DataFrame to verify date formatting:\n", df.head())
|
||||
|
||||
# If needed, you can save the DataFrame to a new Excel file to verify changes
|
||||
df.to_excel('updated_rooster.xlsx', index=False)
|
||||
105
startpakketten/README.md
Normal file
105
startpakketten/README.md
Normal file
@@ -0,0 +1,105 @@
|
||||
# Startpakket Processing Tool
|
||||
|
||||
A Python tool for processing and comparing student data from predeliberation and dashboard Excel files. The tool identifies students with FAIL status and compares SP (study points) values between different data sources.
|
||||
|
||||
## Project Structure
|
||||
|
||||
The codebase has been organized into focused modules:
|
||||
|
||||
### Core Scripts
|
||||
|
||||
- **`script.py`** - Main orchestration script, handles command-line interface and coordinates all processing
|
||||
- **`data_processor.py`** - Core data processing functions for Excel file handling
|
||||
- **`cli_args.py`** - Command-line argument parsing and validation
|
||||
- **`config.py`** - Configuration management and logging setup
|
||||
- **`file_utils.py`** - File I/O utilities and output formatting
|
||||
|
||||
### Processing Modules
|
||||
|
||||
- **`checkheaders.py`** - Excel file header processing and normalization
|
||||
- **`process_predelib_file.py`** - Predeliberation file analysis and FAIL status detection
|
||||
- **`compare_sp.py`** - SP value comparison between predeliberation and dashboard files
|
||||
|
||||
## Usage
|
||||
|
||||
### Basic Usage
|
||||
|
||||
```bash
|
||||
python script.py --predelib db.xlsx --dashboard dashboard_inschrijvingen.xlsx
|
||||
```
|
||||
|
||||
### Advanced Usage
|
||||
|
||||
```bash
|
||||
# Save results to JSON file
|
||||
python script.py -p db.xlsx -d dashboard_inschrijvingen.xlsx --output results.json
|
||||
|
||||
# Enable verbose logging
|
||||
python script.py --predelib db.xlsx --dashboard dashboard_inschrijvingen.xlsx --verbose
|
||||
|
||||
# Custom log file
|
||||
python script.py --predelib db.xlsx --dashboard dashboard_inschrijvingen.xlsx --log-file custom.log
|
||||
```
|
||||
|
||||
### Command-line Options
|
||||
|
||||
- `--predelib, -p`: Path to predeliberation Excel file (required)
|
||||
- `--dashboard, -d`: Path to dashboard Excel file (required)
|
||||
- `--output, -o`: Output file path for JSON results (optional)
|
||||
- `--verbose, -v`: Enable verbose logging
|
||||
- `--log-file`: Custom log file path (default: startpakket_processing.log)
|
||||
|
||||
## Features
|
||||
|
||||
1. **File Validation**: Automatically validates that input files exist and are Excel format
|
||||
2. **Header Processing**: Intelligently detects and normalizes Excel headers
|
||||
3. **FAIL Detection**: Identifies students with FAIL status in adviesrapport code
|
||||
4. **SP Comparison**: Compares study points between predeliberation and dashboard data
|
||||
5. **Comprehensive Logging**: Detailed logging with configurable verbosity
|
||||
6. **Flexible Output**: Console summary with optional JSON export
|
||||
7. **Error Handling**: Robust error handling with appropriate exit codes
|
||||
|
||||
## Installation
|
||||
|
||||
1. Ensure Python 3.12+ is installed
|
||||
2. Install required dependencies:
|
||||
```bash
|
||||
pip install pandas openpyxl
|
||||
```
|
||||
|
||||
## Input File Requirements
|
||||
|
||||
### Predeliberation File (db.xlsx)
|
||||
|
||||
Must contain columns:
|
||||
|
||||
- ID, Achternaam, Voornaam, E-mail
|
||||
- Totaal aantal SP, Aantal SP vereist
|
||||
- Adviesrapport code, Waarschuwing
|
||||
|
||||
### Dashboard File (dashboard_inschrijvingen.xlsx)
|
||||
|
||||
Must contain columns:
|
||||
|
||||
- ID, Naam, Voornaam
|
||||
- Ingeschr. SP (intern)
|
||||
|
||||
## Output
|
||||
|
||||
The tool provides:
|
||||
|
||||
1. **Console Summary**: Overview of processing results
|
||||
2. **Failed Students Report**: Detailed list of students with FAIL status
|
||||
3. **SP Mismatch Report**: Any discrepancies between predeliberation and dashboard SP values
|
||||
4. **Optional JSON Export**: Machine-readable results for further processing
|
||||
|
||||
## Exit Codes
|
||||
|
||||
- `0`: Success (no mismatches found)
|
||||
- `1`: Processing completed but mismatches found
|
||||
- `130`: Process interrupted by user
|
||||
- Other: Fatal error occurred
|
||||
|
||||
## Logging
|
||||
|
||||
All processing activities are logged to `startpakket_processing.log` by default. Use `--verbose` for detailed debug information.
|
||||
109
startpakketten/checkheaders.py
Normal file
109
startpakketten/checkheaders.py
Normal file
@@ -0,0 +1,109 @@
|
||||
import pandas as pd
|
||||
|
||||
def check_headers_predelibfile(df):
|
||||
# Check if the headers are already in the column names (first row)
|
||||
if 'Achternaam' in df.columns and 'Voornaam' in df.columns:
|
||||
print("Headers found in first row - file already processed, returning unchanged")
|
||||
return df # Return the dataframe unchanged
|
||||
else:
|
||||
# Find the row index where 'Achternaam' and 'Voornaam' appear as headers
|
||||
header_row = None
|
||||
for i, row in df.iterrows():
|
||||
if 'Achternaam' in row.values and 'Voornaam' in row.values:
|
||||
header_row = i
|
||||
break
|
||||
|
||||
if header_row is not None:
|
||||
# Delete all rows before the header row
|
||||
df = df.iloc[header_row:].reset_index(drop=True)
|
||||
|
||||
# Set the first row as column headers
|
||||
df.columns = df.iloc[0]
|
||||
df = df.iloc[1:].reset_index(drop=True)
|
||||
|
||||
# Define the columns to keep
|
||||
columns_to_keep = [
|
||||
'ID', 'Achternaam', 'Voornaam', 'E-mail', 'Loopbaan',
|
||||
'Drempelteller omschrijving', 'Programma status omschrijving',
|
||||
'OO Periode', 'OO Studiegidsnummer', 'OO Lange omschrijving',
|
||||
'OO Eenheden', 'OO Sessie', 'OO Credit (Y/N)', 'OO Periode credit',
|
||||
'OO Programma code', 'OO Programma korte omschr.', 'Totaal aantal SP',
|
||||
'Aantal SP vereist', 'Aantal SP zonder VZP', 'Adviesrapport code',
|
||||
'Waarschuwing', 'Lijsttype'
|
||||
]
|
||||
|
||||
# Keep only the specified columns (only if they exist in the dataframe)
|
||||
existing_columns = [col for col in columns_to_keep if col in df.columns]
|
||||
df = df[existing_columns]
|
||||
|
||||
print(f"Deleted {header_row} rows, set proper headers, and kept {len(existing_columns)} columns")
|
||||
else:
|
||||
print("Headers 'Achternaam' and 'Voornaam' not found in the file")
|
||||
return df
|
||||
|
||||
if 'Programma status omschrijving' in df.columns:
|
||||
before = len(df)
|
||||
mask = df['Programma status omschrijving'].astype(str).str.contains(r'\bBeëindigd\b', case=False, na=False)
|
||||
df = df[~mask].reset_index(drop=True)
|
||||
removed = before - len(df)
|
||||
print(f"Removed {removed} rows where Programma status omschrijving contains 'Beëindigd'")
|
||||
else:
|
||||
print("Column 'Programma status omschrijving' not found; no rows removed")
|
||||
|
||||
return df
|
||||
|
||||
|
||||
|
||||
|
||||
def check_headers_dashboard_inschrijvingenfile(df):
|
||||
# Check if the headers are already in the column names (first row)
|
||||
if 'Naam' in df.columns and 'Voornaam' in df.columns:
|
||||
print("Headers found in first row of dashboard_inschrijvingen - no need to search for header row")
|
||||
header_row = -1 # Indicates headers are already set
|
||||
else:
|
||||
# Find the row index where 'Naam' and 'Voornaam' appear as headers
|
||||
header_row = None
|
||||
for i, row in df.iterrows():
|
||||
if 'Naam' in row.values and 'Voornaam' in row.values:
|
||||
header_row = i
|
||||
break
|
||||
|
||||
# Apply headers only when a valid header row was found (>= 0)
|
||||
if header_row is not None and header_row >= 0:
|
||||
# Delete all rows before the header row
|
||||
df = df.iloc[header_row:].reset_index(drop=True)
|
||||
|
||||
# Set the first row as column headers
|
||||
df.columns = df.iloc[0]
|
||||
df = df.iloc[1:].reset_index(drop=True)
|
||||
|
||||
print(f"Deleted {header_row} rows in dashboard_file, set proper headers")
|
||||
elif header_row == -1:
|
||||
# Headers were already correct; nothing to change
|
||||
print("Headers were already correct in dashboard_file.")
|
||||
else:
|
||||
print("Headers 'Naam' and 'Voornaam' not found in the file")
|
||||
return df
|
||||
|
||||
# Remove rows where Status contains 'Beëindigd'
|
||||
if 'Status' in df.columns:
|
||||
before = len(df)
|
||||
mask = df['Status'].astype(str).str.contains(r'\bBeëindigd\b', case=False, na=False)
|
||||
df = df[~mask].reset_index(drop=True)
|
||||
removed = before - len(df)
|
||||
print(f"Removed {removed} rows where Status contains 'Beëindigd'")
|
||||
else:
|
||||
print("Column 'Status' not found; no rows removed")
|
||||
|
||||
return df
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Read the Excel files
|
||||
df_predelib = pd.read_excel('db.xlsx')
|
||||
df_dashboard = pd.read_excel('dashboard_inschrijvingen.xlsx')
|
||||
|
||||
# Process the dataframes
|
||||
processed_predelib_df = check_headers_predelibfile(df_predelib)
|
||||
processed_dashboard_df = check_headers_dashboard_inschrijvingenfile(df_dashboard)
|
||||
|
||||
65
startpakketten/cli_args.py
Normal file
65
startpakketten/cli_args.py
Normal file
@@ -0,0 +1,65 @@
|
||||
"""
|
||||
Command-line argument parsing for the startpakket processing script.
|
||||
"""
|
||||
import argparse
|
||||
import os
|
||||
|
||||
|
||||
def validate_file_path(file_path: str) -> str:
|
||||
"""Validate that the file exists and is an Excel file"""
|
||||
if not os.path.exists(file_path):
|
||||
raise argparse.ArgumentTypeError(f"File '{file_path}' does not exist")
|
||||
|
||||
if not file_path.lower().endswith(('.xlsx', '.xls')):
|
||||
raise argparse.ArgumentTypeError(f"File '{file_path}' is not an Excel file (.xlsx or .xls)")
|
||||
|
||||
return file_path
|
||||
|
||||
|
||||
def parse_arguments():
|
||||
"""Parse command line arguments"""
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Process and compare student data from predeliberation and dashboard Excel files',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
%(prog)s --predelib db.xlsx --dashboard dashboard_inschrijvingen.xlsx
|
||||
%(prog)s -p /path/to/predelib.xlsx -d /path/to/dashboard.xlsx --output results.json
|
||||
%(prog)s --predelib db.xlsx --dashboard dashboard.xlsx --verbose
|
||||
"""
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--predelib', '-p',
|
||||
type=validate_file_path,
|
||||
required=True,
|
||||
help='Path to the predeliberation Excel file (db.xlsx)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--dashboard', '-d',
|
||||
type=validate_file_path,
|
||||
required=True,
|
||||
help='Path to the dashboard Excel file (dashboard_inschrijvingen.xlsx)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--output', '-o',
|
||||
type=str,
|
||||
help='Output file path for results (optional, prints to console if not specified)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--verbose', '-v',
|
||||
action='store_true',
|
||||
help='Enable verbose logging'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--log-file',
|
||||
type=str,
|
||||
default='startpakket_processing.log',
|
||||
help='Path to log file (default: startpakket_processing.log)'
|
||||
)
|
||||
|
||||
return parser.parse_args()
|
||||
239
startpakketten/compare_sp.py
Normal file
239
startpakketten/compare_sp.py
Normal file
@@ -0,0 +1,239 @@
|
||||
import pandas as pd
|
||||
import logging
|
||||
from typing import List, Dict, Any, Optional
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||||
handlers=[
|
||||
logging.FileHandler('sp_comparison.log'),
|
||||
logging.StreamHandler()
|
||||
]
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def compare_sp_values(predelib_df: pd.DataFrame, dashboard_df: pd.DataFrame) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Compare 'Totaal aantal SP' from predelib_df with 'Ingeschr. SP (intern)' from dashboard_df
|
||||
for matching IDs between the two dataframes.
|
||||
|
||||
Args:
|
||||
predelib_df (pandas.DataFrame): Dataframe from predeliberation file with 'ID' and 'Totaal aantal SP' columns
|
||||
dashboard_df (pandas.DataFrame): Dataframe from dashboard file with 'ID' and 'Ingeschr. SP (intern)' columns
|
||||
|
||||
Returns:
|
||||
list: List of dictionaries containing mismatches, or empty list if all match
|
||||
|
||||
Raises:
|
||||
ValueError: If input dataframes are invalid
|
||||
KeyError: If required columns are missing
|
||||
"""
|
||||
logger.info("Starting SP values comparison")
|
||||
|
||||
try:
|
||||
# Validate input dataframes
|
||||
if predelib_df is None or predelib_df.empty:
|
||||
error_msg = "Predelib dataframe is None or empty"
|
||||
logger.error(error_msg)
|
||||
raise ValueError(error_msg)
|
||||
|
||||
if dashboard_df is None or dashboard_df.empty:
|
||||
error_msg = "Dashboard dataframe is None or empty"
|
||||
logger.error(error_msg)
|
||||
raise ValueError(error_msg)
|
||||
|
||||
# Check for required columns
|
||||
required_predelib_columns = ['ID', 'Totaal aantal SP']
|
||||
required_dashboard_columns = ['ID', 'Ingeschr. SP (intern)']
|
||||
|
||||
missing_predelib_cols = [col for col in required_predelib_columns if col not in predelib_df.columns]
|
||||
missing_dashboard_cols = [col for col in required_dashboard_columns if col not in dashboard_df.columns]
|
||||
|
||||
if missing_predelib_cols:
|
||||
error_msg = f"Missing required columns in predelib dataframe: {missing_predelib_cols}"
|
||||
logger.error(error_msg)
|
||||
raise KeyError(error_msg)
|
||||
|
||||
if missing_dashboard_cols:
|
||||
error_msg = f"Missing required columns in dashboard dataframe: {missing_dashboard_cols}"
|
||||
logger.error(error_msg)
|
||||
raise KeyError(error_msg)
|
||||
|
||||
logger.info("All required columns found in both dataframes")
|
||||
|
||||
# Debug ID columns
|
||||
logger.debug(f"Predelib ID column type: {predelib_df['ID'].dtype}")
|
||||
logger.debug(f"Dashboard ID column type: {dashboard_df['ID'].dtype}")
|
||||
logger.debug(f"Sample predelib IDs: {list(predelib_df['ID'].head())}")
|
||||
logger.debug(f"Sample dashboard IDs: {list(dashboard_df['ID'].head())}")
|
||||
|
||||
# Convert IDs to strings to ensure consistent comparison
|
||||
try:
|
||||
predelib_ids = set(str(x) for x in predelib_df['ID'] if pd.notna(x))
|
||||
dashboard_ids = set(str(x) for x in dashboard_df['ID'] if pd.notna(x))
|
||||
except Exception as e:
|
||||
error_msg = f"Error converting IDs to strings: {e}"
|
||||
logger.error(error_msg)
|
||||
raise ValueError(error_msg)
|
||||
|
||||
matching_ids = predelib_ids.intersection(dashboard_ids)
|
||||
logger.info(f"Found {len(matching_ids)} matching IDs between the two dataframes")
|
||||
logger.info(f"Total predelib IDs: {len(predelib_ids)}")
|
||||
logger.info(f"Total dashboard IDs: {len(dashboard_ids)}")
|
||||
|
||||
if len(matching_ids) == 0:
|
||||
logger.warning("No matching IDs found between the dataframes")
|
||||
return []
|
||||
|
||||
# Compare SP values for matching IDs
|
||||
mismatches = []
|
||||
processed_count = 0
|
||||
|
||||
for id_val in matching_ids:
|
||||
try:
|
||||
# Convert back to original type for filtering
|
||||
predelib_matches = predelib_df[predelib_df['ID'].astype(str) == id_val]
|
||||
dashboard_matches = dashboard_df[dashboard_df['ID'].astype(str) == id_val]
|
||||
|
||||
if len(predelib_matches) == 0:
|
||||
logger.warning(f"No predelib records found for ID: {id_val}")
|
||||
continue
|
||||
|
||||
if len(dashboard_matches) == 0:
|
||||
logger.warning(f"No dashboard records found for ID: {id_val}")
|
||||
continue
|
||||
|
||||
predelib_sp = predelib_matches['Totaal aantal SP'].iloc[0]
|
||||
dashboard_sp = dashboard_matches['Ingeschr. SP (intern)'].iloc[0]
|
||||
name_student = predelib_matches['Voornaam'].iloc[0] + ' ' + predelib_matches['Achternaam'].iloc[0]
|
||||
|
||||
# Handle potential NaN values
|
||||
if pd.isna(predelib_sp) or pd.isna(dashboard_sp):
|
||||
logger.warning(f"NaN values found for ID {id_val}: Predelib={predelib_sp}, Dashboard={dashboard_sp}")
|
||||
continue
|
||||
|
||||
# Convert to comparable types
|
||||
try:
|
||||
predelib_sp_num = float(predelib_sp) if not pd.isna(predelib_sp) else 0
|
||||
dashboard_sp_num = float(dashboard_sp) if not pd.isna(dashboard_sp) else 0
|
||||
except (ValueError, TypeError) as e:
|
||||
logger.warning(f"Error converting SP values to numbers for ID {id_val}: {e}")
|
||||
# Fall back to string comparison
|
||||
predelib_sp_num = str(predelib_sp)
|
||||
dashboard_sp_num = str(dashboard_sp)
|
||||
|
||||
if predelib_sp_num != dashboard_sp_num:
|
||||
mismatch = {
|
||||
'ID': id_val,
|
||||
'Name': name_student,
|
||||
'Predelib_SP': predelib_sp,
|
||||
'Dashboard_SP': dashboard_sp,
|
||||
|
||||
}
|
||||
mismatches.append(mismatch)
|
||||
logger.debug(f"Mismatch found for ID {id_val}: Predelib={predelib_sp}, Dashboard={dashboard_sp}")
|
||||
|
||||
processed_count += 1
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing ID {id_val}: {e}")
|
||||
continue
|
||||
|
||||
logger.info(f"Successfully processed {processed_count} matching records")
|
||||
|
||||
if len(mismatches) == 0:
|
||||
logger.info("All SP values match between the two dataframes!")
|
||||
else:
|
||||
logger.warning(f"Found {len(mismatches)} mismatches")
|
||||
for mismatch in mismatches:
|
||||
logger.info(f"Mismatch - ID {mismatch['ID']} ({mismatch['Name']}): Predeliberatierapport SP={mismatch['Predelib_SP']}, Dashboard Inschrijvingen SP={mismatch['Dashboard_SP']}")
|
||||
|
||||
return mismatches
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error in compare_sp_values: {e}")
|
||||
raise
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Example usage - can be used for testing
|
||||
logger.info("Starting SP comparison script")
|
||||
|
||||
try:
|
||||
from checkheaders import check_headers_predelibfile, check_headers_dashboard_inschrijvingenfile
|
||||
|
||||
# Read the Excel files
|
||||
logger.info("Reading Excel files")
|
||||
try:
|
||||
df_predelib = pd.read_excel('db.xlsx')
|
||||
logger.info(f"Successfully loaded predelib file with shape: {df_predelib.shape}")
|
||||
except FileNotFoundError:
|
||||
logger.error("db.xlsx file not found")
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error reading db.xlsx: {e}")
|
||||
raise
|
||||
|
||||
try:
|
||||
df_dashboard = pd.read_excel('dashboard_inschrijvingen.xlsx')
|
||||
logger.info(f"Successfully loaded dashboard file with shape: {df_dashboard.shape}")
|
||||
except FileNotFoundError:
|
||||
logger.error("dashboard_inschrijvingen.xlsx file not found")
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error reading dashboard_inschrijvingen.xlsx: {e}")
|
||||
raise
|
||||
|
||||
# Process the dataframes
|
||||
logger.info("Processing dataframes")
|
||||
try:
|
||||
processed_predelib_df = check_headers_predelibfile(df_predelib)
|
||||
logger.info(f"Processed predelib dataframe shape: {processed_predelib_df.shape}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing predelib file: {e}")
|
||||
raise
|
||||
|
||||
try:
|
||||
processed_dashboard_df = check_headers_dashboard_inschrijvingenfile(df_dashboard)
|
||||
logger.info(f"Processed dashboard dataframe shape: {processed_dashboard_df.shape}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing dashboard file: {e}")
|
||||
raise
|
||||
|
||||
# Compare SP values between the two processed dataframes
|
||||
logger.info("Starting SP values comparison")
|
||||
try:
|
||||
mismatches = compare_sp_values(processed_predelib_df, processed_dashboard_df)
|
||||
logger.info(f"SP comparison completed successfully. Found {len(mismatches)} mismatches.")
|
||||
|
||||
# Print summary for console output
|
||||
print(f"\n{'='*50}")
|
||||
print("SP COMPARISON SUMMARY")
|
||||
print(f"{'='*50}")
|
||||
print(f"Predelib records processed: {len(processed_predelib_df)}")
|
||||
print(f"Dashboard records processed: {len(processed_dashboard_df)}")
|
||||
print(f"Mismatches found: {len(mismatches)}")
|
||||
|
||||
if mismatches:
|
||||
print(f"\nDetailed mismatches:")
|
||||
for mismatch in mismatches:
|
||||
print(f" ID {mismatch['ID']}: Predelib={mismatch['Predelib_SP']}, Dashboard={mismatch['Dashboard_SP']}")
|
||||
else:
|
||||
print("\nAll SP values match perfectly!")
|
||||
print(f"{'='*50}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error during SP comparison: {e}")
|
||||
raise
|
||||
|
||||
except ImportError as e:
|
||||
logger.error(f"Import error: {e}")
|
||||
print("Error: Could not import required modules. Make sure checkheaders.py is in the same directory.")
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error in main execution: {e}")
|
||||
print(f"An error occurred: {e}")
|
||||
print("Check the log file 'sp_comparison.log' for detailed error information.")
|
||||
finally:
|
||||
logger.info("SP comparison script completed")
|
||||
55
startpakketten/config.py
Normal file
55
startpakketten/config.py
Normal file
@@ -0,0 +1,55 @@
|
||||
"""
|
||||
Configuration and logging setup for the startpakket processing application.
|
||||
"""
|
||||
import logging
|
||||
import sys
|
||||
from typing import Optional
|
||||
|
||||
|
||||
def setup_logging(log_file: str = 'startpakket_processing.log', verbose: bool = False) -> logging.Logger:
|
||||
"""
|
||||
Configure logging for the application.
|
||||
|
||||
Args:
|
||||
log_file: Path to the log file
|
||||
verbose: Enable debug logging if True
|
||||
|
||||
Returns:
|
||||
Configured logger instance
|
||||
"""
|
||||
# Remove existing handlers to avoid duplicates
|
||||
for handler in logging.root.handlers[:]:
|
||||
logging.root.removeHandler(handler)
|
||||
|
||||
# Set logging level
|
||||
level = logging.DEBUG if verbose else logging.INFO
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
level=level,
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||||
handlers=[
|
||||
logging.FileHandler(log_file),
|
||||
logging.StreamHandler(sys.stdout)
|
||||
]
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
if verbose:
|
||||
logger.debug("Verbose logging enabled")
|
||||
|
||||
return logger
|
||||
|
||||
|
||||
def get_exit_code(results: dict) -> int:
|
||||
"""
|
||||
Determine the appropriate exit code based on processing results.
|
||||
|
||||
Args:
|
||||
results: Processing results dictionary
|
||||
|
||||
Returns:
|
||||
Exit code (0 for success, 1 for mismatches found)
|
||||
"""
|
||||
return 0 if results.get('mismatches_count', 0) == 0 else 1
|
||||
76
startpakketten/data_processor.py
Normal file
76
startpakketten/data_processor.py
Normal file
@@ -0,0 +1,76 @@
|
||||
"""
|
||||
Core data processing functions for the startpakket processing script.
|
||||
"""
|
||||
import pandas as pd
|
||||
import logging
|
||||
from typing import Dict, Any, List
|
||||
|
||||
from checkheaders import check_headers_dashboard_inschrijvingenfile, check_headers_predelibfile
|
||||
from process_predelib_file import check_students_with_fail_adviesrapport, check_students_with_mismatching_SP_values
|
||||
from compare_sp import compare_sp_values
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def process_files(predelib_path: str, dashboard_path: str, verbose: bool = False) -> Dict[str, Any]:
|
||||
"""
|
||||
Process the Excel files and return results.
|
||||
|
||||
Args:
|
||||
predelib_path: Path to the predeliberation Excel file
|
||||
dashboard_path: Path to the dashboard Excel file
|
||||
verbose: Enable verbose logging
|
||||
|
||||
Returns:
|
||||
Dictionary containing processing results
|
||||
|
||||
Raises:
|
||||
Exception: If file processing fails
|
||||
"""
|
||||
try:
|
||||
# Read Excel files
|
||||
logger.info(f"Reading predeliberation file: {predelib_path}")
|
||||
df_predelib = pd.read_excel(predelib_path)
|
||||
logger.info(f"Predelib file loaded successfully. Shape: {df_predelib.shape}")
|
||||
|
||||
logger.info(f"Reading dashboard file: {dashboard_path}")
|
||||
df_dashboard = pd.read_excel(dashboard_path)
|
||||
logger.info(f"Dashboard file loaded successfully. Shape: {df_dashboard.shape}")
|
||||
|
||||
# Process the dataframes
|
||||
logger.info("Processing predeliberation file headers")
|
||||
processed_predelib_df = check_headers_predelibfile(df_predelib)
|
||||
|
||||
logger.info("Processing dashboard file headers")
|
||||
processed_dashboard_df = check_headers_dashboard_inschrijvingenfile(df_dashboard)
|
||||
|
||||
# Check the predeliberation file for students with a fail in 'Adviesrapport code'
|
||||
logger.info("Checking for students with FAIL status in predeliberation file")
|
||||
students_with_fail = check_students_with_fail_adviesrapport(processed_predelib_df)
|
||||
students_with_mismatching_SP_values_predelib = check_students_with_mismatching_SP_values(processed_predelib_df)
|
||||
|
||||
# Compare SP values
|
||||
logger.info("Comparing SP values between files")
|
||||
mismatches = compare_sp_values(processed_predelib_df, processed_dashboard_df)
|
||||
|
||||
# Prepare results
|
||||
results = {
|
||||
'predelib_file': predelib_path,
|
||||
'dashboard_file': dashboard_path,
|
||||
'predelib_records': len(processed_predelib_df),
|
||||
'dashboard_records': len(processed_dashboard_df),
|
||||
'students_with_fail_count': len(students_with_fail),
|
||||
'students_with_fail': students_with_fail,
|
||||
'students_with_mismatching_SP_values_predelib_count': len(students_with_mismatching_SP_values_predelib),
|
||||
'students_with_mismatching_SP_values_predelib': students_with_mismatching_SP_values_predelib,
|
||||
'mismatches_count': len(mismatches),
|
||||
'mismatches': mismatches,
|
||||
'status': 'completed'
|
||||
}
|
||||
|
||||
logger.info(f"Processing completed successfully.")
|
||||
return results
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing files: {e}")
|
||||
raise
|
||||
57
startpakketten/file_utils.py
Normal file
57
startpakketten/file_utils.py
Normal file
@@ -0,0 +1,57 @@
|
||||
"""
|
||||
File I/O utilities and output formatting for the startpakket processing script.
|
||||
"""
|
||||
import json
|
||||
import logging
|
||||
from typing import Dict, Any
|
||||
|
||||
from process_predelib_file import print_students_with_fail_ar_summary
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def save_results(results: Dict[str, Any], output_path: str) -> None:
|
||||
"""Save results to a JSON file"""
|
||||
try:
|
||||
with open(output_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(results, f, indent=2, ensure_ascii=False)
|
||||
logger.info(f"Results saved to: {output_path}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error saving results to {output_path}: {e}")
|
||||
raise
|
||||
|
||||
|
||||
def print_summary(results: Dict[str, Any]) -> None:
|
||||
"""Print a summary of the results to console"""
|
||||
print(f"\n{'='*60}")
|
||||
print("STARTPAKKET PROCESSING SUMMARY")
|
||||
print(f"{'='*60}")
|
||||
print(f"Predelib file: {results['predelib_file']}")
|
||||
print(f"Dashboard file: {results['dashboard_file']}")
|
||||
print(f"Predelib records processed: {results['predelib_records']}")
|
||||
print(f"Dashboard records processed: {results['dashboard_records']}")
|
||||
print(f"Students with FAIL adviesrapport found: {results['students_with_fail_count']}")
|
||||
print (f'Students with mismatching SP values in the predeliberation file found : {results['students_with_mismatching_SP_values_predelib_count']}')
|
||||
print(f"Mismatches found: {results['mismatches_count']}")
|
||||
|
||||
if results['students_with_fail_count'] > 0:
|
||||
print_students_with_fail_ar_summary(results['students_with_fail'], results['predelib_file'])
|
||||
|
||||
if results['students_with_mismatching_SP_values_predelib_count'] > 0:
|
||||
print(f"\nDetailed mismatches found in predeliberation file:")
|
||||
for student in results['students_with_mismatching_SP_values_predelib']:
|
||||
print(f"Student - ID {student['ID']} ({student['Voornaam'] + " " + student['Achternaam']}): "
|
||||
f"Totaal aantal SP={student['Totaal_aantal_SP']}, "
|
||||
f"Aantal SP Vereist={student['Aantal_SP_vereist']}")
|
||||
|
||||
if results['mismatches']:
|
||||
print(f"\nDetailed mismatches between SP predeliberatierapport and Dashboard Inschrijvingen:")
|
||||
for mismatch in results['mismatches']:
|
||||
print(f"Mismatch - ID {mismatch['ID']} ({mismatch['Name']}): "
|
||||
f"Predeliberatierapport SP={mismatch['Predelib_SP']}, "
|
||||
f"Dashboard Inschrijvingen SP={mismatch['Dashboard_SP']}")
|
||||
else:
|
||||
print("\n✅ All SP values match perfectly!")
|
||||
|
||||
print(f"Status: {results['status']}")
|
||||
print(f"{'='*60}")
|
||||
309
startpakketten/process_predelib_file.py
Normal file
309
startpakketten/process_predelib_file.py
Normal file
@@ -0,0 +1,309 @@
|
||||
import pandas as pd
|
||||
import logging
|
||||
from typing import List, Dict, Any, Optional
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||||
handlers=[
|
||||
logging.FileHandler('predelib_processing.log'),
|
||||
logging.StreamHandler()
|
||||
]
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def check_students_with_fail_adviesrapport(predelib_df: pd.DataFrame) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Check for students with 'FAIL' in 'Adviesrapport code' column and extract their details.
|
||||
|
||||
Args:
|
||||
predelib_df (pandas.DataFrame): Processed predeliberation dataframe
|
||||
|
||||
Returns:
|
||||
list: List of dictionaries containing failed student details
|
||||
|
||||
Raises:
|
||||
ValueError: If input dataframe is invalid
|
||||
KeyError: If required columns are missing
|
||||
"""
|
||||
logger.info("Starting failed students check")
|
||||
|
||||
try:
|
||||
# Validate input dataframe
|
||||
if predelib_df is None or predelib_df.empty:
|
||||
error_msg = "Predelib dataframe is None or empty"
|
||||
logger.error(error_msg)
|
||||
raise ValueError(error_msg)
|
||||
|
||||
logger.info(f"Predelib dataframe shape: {predelib_df.shape}")
|
||||
|
||||
# Define required columns
|
||||
required_columns = [
|
||||
'ID', 'Achternaam', 'Voornaam', 'E-mail',
|
||||
'Totaal aantal SP', 'Aantal SP vereist', 'Waarschuwing', 'Adviesrapport code'
|
||||
]
|
||||
|
||||
# Check for required columns
|
||||
missing_columns = [col for col in required_columns if col not in predelib_df.columns]
|
||||
|
||||
if missing_columns:
|
||||
error_msg = f"Missing required columns in predelib dataframe: {missing_columns}"
|
||||
logger.error(error_msg)
|
||||
logger.info(f"Available columns: {list(predelib_df.columns)}")
|
||||
raise KeyError(error_msg)
|
||||
|
||||
logger.info("All required columns found in dataframe")
|
||||
|
||||
# Debug Adviesrapport code column
|
||||
logger.debug(f"Adviesrapport code column type: {predelib_df['Adviesrapport code'].dtype}")
|
||||
unique_codes = predelib_df['Adviesrapport code'].unique()
|
||||
logger.debug(f"Unique Adviesrapport codes: {unique_codes}")
|
||||
|
||||
# Filter for FAIL cases
|
||||
try:
|
||||
# Convert to string and check for FAIL (case-insensitive)
|
||||
fail_mask = predelib_df['Adviesrapport code'].astype(str).str.upper() == 'FAIL'
|
||||
students_with_fail_ar_df = predelib_df[fail_mask].copy()
|
||||
|
||||
logger.info(f"Found {len(students_with_fail_ar_df)} students with FAIL status")
|
||||
|
||||
# Remove duplicate rows (exact same values in all columns)
|
||||
initial_count = len(students_with_fail_ar_df)
|
||||
students_with_fail_ar_df = students_with_fail_ar_df.drop_duplicates()
|
||||
final_count = len(students_with_fail_ar_df)
|
||||
|
||||
duplicates_removed = initial_count - final_count
|
||||
if duplicates_removed > 0:
|
||||
logger.info(f"Removed {duplicates_removed} duplicate rows")
|
||||
else:
|
||||
logger.info("No duplicate rows found")
|
||||
|
||||
logger.info(f"Final count after duplicate removal: {final_count} students with FAIL status")
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Error filtering for FAIL status: {e}"
|
||||
logger.error(error_msg)
|
||||
raise ValueError(error_msg)
|
||||
|
||||
if len(students_with_fail_ar_df) == 0:
|
||||
logger.info("No students with FAIL status found")
|
||||
return []
|
||||
|
||||
# Extract details for failed students
|
||||
students_with_fail_ar = []
|
||||
processed_count = 0
|
||||
|
||||
for index, row in students_with_fail_ar_df.iterrows():
|
||||
try:
|
||||
# Extract student details
|
||||
student_details = {
|
||||
'ID': row['ID'],
|
||||
'Achternaam': row['Achternaam'],
|
||||
'Voornaam': row['Voornaam'],
|
||||
'E-mail': row['E-mail'],
|
||||
'Totaal_aantal_SP': row['Totaal aantal SP'],
|
||||
'Aantal_SP_vereist': row['Aantal SP vereist'],
|
||||
'Waarschuwing': row['Waarschuwing'],
|
||||
'Adviesrapport_code': row['Adviesrapport code']
|
||||
}
|
||||
|
||||
# Handle potential NaN values
|
||||
for key, value in student_details.items():
|
||||
if pd.isna(value):
|
||||
student_details[key] = None
|
||||
logger.warning(f"NaN value found for {key} in student ID: {row['ID']}")
|
||||
|
||||
students_with_fail_ar.append(student_details)
|
||||
processed_count += 1
|
||||
|
||||
logger.debug(f"Processed failed student: ID={row['ID']}, "
|
||||
f"Name={row['Achternaam']}, {row['Voornaam']}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing student at index {index}: {e}")
|
||||
continue
|
||||
|
||||
logger.info(f"Successfully processed {processed_count} failed students")
|
||||
|
||||
# Log summary
|
||||
if students_with_fail_ar:
|
||||
logger.warning(f"Found {len(students_with_fail_ar)} students with FAIL status")
|
||||
for student in students_with_fail_ar:
|
||||
logger.info(f"Failed student - ID: {student['ID']}, "
|
||||
f"Name: {student['Achternaam']}, {student['Voornaam']}, "
|
||||
f"SP: {student['Totaal_aantal_SP']}/{student['Aantal_SP_vereist']}")
|
||||
else:
|
||||
logger.info("No failed students found")
|
||||
|
||||
return students_with_fail_ar
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error in check_students_with_fail_ar: {e}")
|
||||
raise
|
||||
|
||||
def check_students_with_mismatching_SP_values(predelib_df: pd.DataFrame) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Check for students with mismatching SP values in the predeliberation dataframe.
|
||||
|
||||
Args:
|
||||
predelib_df (pandas.DataFrame): Processed predeliberation dataframe
|
||||
|
||||
Returns:
|
||||
list: List of dictionaries containing student details with mismatching SP values
|
||||
"""
|
||||
logger.info("Starting check for students with mismatching SP values in the predeliberation file")
|
||||
try:
|
||||
# Validate input dataframe
|
||||
if predelib_df is None or predelib_df.empty:
|
||||
error_msg = "Predelib dataframe is None or empty"
|
||||
logger.error(error_msg)
|
||||
raise ValueError(error_msg)
|
||||
|
||||
logger.info(f"Predelib dataframe shape: {predelib_df.shape}")
|
||||
|
||||
# Define required columns
|
||||
required_columns = [
|
||||
'ID', 'Achternaam', 'Voornaam', 'E-mail',
|
||||
'Totaal aantal SP', 'Aantal SP vereist'
|
||||
]
|
||||
|
||||
# Check for required columns
|
||||
missing_columns = [col for col in required_columns if col not in predelib_df.columns]
|
||||
|
||||
if missing_columns:
|
||||
error_msg = f"Missing required columns in predelib dataframe: {missing_columns}"
|
||||
logger.error(error_msg)
|
||||
logger.info(f"Available columns: {list(predelib_df.columns)}")
|
||||
raise KeyError(error_msg)
|
||||
|
||||
logger.info("All required columns found in dataframe")
|
||||
|
||||
# Use vectorized comparison to find rows where the SP values differ
|
||||
sp_col = predelib_df['Totaal aantal SP']
|
||||
req_col = predelib_df['Aantal SP vereist']
|
||||
|
||||
# Simple inequality works for most cases; NaN != NaN will be True which is acceptable
|
||||
mask = sp_col != req_col
|
||||
mismatches_df = predelib_df[mask].copy()
|
||||
|
||||
logger.info(f"Found {len(mismatches_df)} raw rows with mismatching SP values")
|
||||
|
||||
if mismatches_df.empty:
|
||||
logger.info("No students with mismatching SP values found")
|
||||
return []
|
||||
|
||||
# Keep only unique students by 'ID' (first occurrence).
|
||||
if 'ID' in mismatches_df.columns:
|
||||
before_dedup = len(mismatches_df)
|
||||
mismatches_df = mismatches_df.drop_duplicates(subset=['ID'])
|
||||
after_dedup = len(mismatches_df)
|
||||
logger.info(f"Reduced from {before_dedup} rows to {after_dedup} unique students by ID")
|
||||
else:
|
||||
logger.warning("Column 'ID' not found - cannot deduplicate by student ID")
|
||||
|
||||
# Ensure optional columns exist to avoid KeyError when building dicts
|
||||
for optional_col in ('Waarschuwing', 'Adviesrapport code'):
|
||||
if optional_col not in mismatches_df.columns:
|
||||
mismatches_df[optional_col] = None
|
||||
|
||||
# Build the list of mismatching students
|
||||
mismatching_students = []
|
||||
for _, row in mismatches_df.iterrows():
|
||||
mismatching_students.append({
|
||||
'ID': row.get('ID'),
|
||||
'Achternaam': row.get('Achternaam'),
|
||||
'Voornaam': row.get('Voornaam'),
|
||||
'E-mail': row.get('E-mail'),
|
||||
'Totaal_aantal_SP': row.get('Totaal aantal SP'),
|
||||
'Aantal_SP_vereist': row.get('Aantal SP vereist'),
|
||||
'Waarschuwing': row.get('Waarschuwing'),
|
||||
'Adviesrapport_code': row.get('Adviesrapport code')
|
||||
})
|
||||
|
||||
logger.info(f"Returning {len(mismatching_students)} unique students with mismatching SP values")
|
||||
return mismatching_students
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error in check_students_with_mismatching_SP_values: {e}")
|
||||
raise
|
||||
|
||||
|
||||
def print_students_with_fail_ar_summary(students_with_fail_ar: List[Dict[str, Any]], predelib_df: pd.DataFrame):
|
||||
"""Print a formatted summary of students with FAIL status"""
|
||||
print(f"\n{'='*80}")
|
||||
print("Students with FAIL AR status report")
|
||||
print(f"{'='*80}")
|
||||
print(f"Total students processed: {len(predelib_df)}")
|
||||
print(f"Students with FAIL status: {len(students_with_fail_ar)}")
|
||||
|
||||
if students_with_fail_ar:
|
||||
print(f"\nDetailed failed students list:")
|
||||
print(f"{'ID':<10} {'Name':<25} {'Email':<30} {'SP':<15} {'Warning':<20}")
|
||||
print(f"{'-'*10} {'-'*25} {'-'*30} {'-'*15} {'-'*20}")
|
||||
|
||||
for student in students_with_fail_ar:
|
||||
name = f"{student['Achternaam']}, {student['Voornaam']}"
|
||||
sp_info = f"{student['Totaal_aantal_SP']}/{student['Aantal_SP_vereist']}"
|
||||
warning = str(student['Waarschuwing']) if student['Waarschuwing'] else "None"
|
||||
|
||||
print(f"{str(student['ID']):<10} {name[:25]:<25} {str(student['E-mail'])[:30]:<30} "
|
||||
f"{sp_info:<15} {warning[:20]:<20}")
|
||||
else:
|
||||
print("\n✅ No students with FAIL status found!")
|
||||
|
||||
print(f"{'='*80}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Example usage - can be used for testing
|
||||
logger.info("Starting failed students check script")
|
||||
|
||||
try:
|
||||
from checkheaders import check_headers_predelibfile
|
||||
|
||||
# Read the Excel file
|
||||
logger.info("Reading predelib Excel file")
|
||||
try:
|
||||
df_predelib = pd.read_excel('db.xlsx')
|
||||
logger.info(f"Successfully loaded predelib file with shape: {df_predelib.shape}")
|
||||
except FileNotFoundError:
|
||||
logger.error("db.xlsx file not found")
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Error reading db.xlsx: {e}")
|
||||
raise
|
||||
|
||||
# Process the dataframe
|
||||
logger.info("Processing predelib dataframe")
|
||||
try:
|
||||
processed_predelib_df = check_headers_predelibfile(df_predelib)
|
||||
logger.info(f"Processed predelib dataframe shape: {processed_predelib_df.shape}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing predelib file: {e}")
|
||||
raise
|
||||
|
||||
# Check for failed students
|
||||
logger.info("Checking for failed students")
|
||||
try:
|
||||
students_with_fail_ar = check_students_with_fail_adviesrapport(processed_predelib_df)
|
||||
logger.info(f"Failed students check completed. Found {len(students_with_fail_ar)} failed students.")
|
||||
|
||||
# Print summary for console output
|
||||
print_students_with_fail_ar_summary(students_with_fail_ar, processed_predelib_df)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error during failed students check: {e}")
|
||||
raise
|
||||
|
||||
except ImportError as e:
|
||||
logger.error(f"Import error: {e}")
|
||||
print("Error: Could not import required modules. Make sure checkheaders.py is in the same directory.")
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error in main execution: {e}")
|
||||
print(f"An error occurred: {e}")
|
||||
print("Check the log file 'predelib_processing.log' for detailed error information.")
|
||||
finally:
|
||||
logger.info("Failed students check script completed")
|
||||
51
startpakketten/script.py
Normal file
51
startpakketten/script.py
Normal file
@@ -0,0 +1,51 @@
|
||||
"""
|
||||
Main script for processing and comparing student data from predeliberation and dashboard Excel files.
|
||||
|
||||
"""
|
||||
import sys
|
||||
import logging
|
||||
|
||||
from cli_args import parse_arguments
|
||||
from config import setup_logging, get_exit_code
|
||||
from data_processor import process_files
|
||||
from file_utils import save_results, print_summary
|
||||
|
||||
|
||||
def main():
|
||||
"""Main function - orchestrates the entire processing pipeline"""
|
||||
try:
|
||||
# Parse command-line arguments
|
||||
args = parse_arguments()
|
||||
|
||||
# Set up logging configuration
|
||||
logger = setup_logging(args.log_file, args.verbose)
|
||||
|
||||
logger.info("Starting startpakket processing")
|
||||
logger.info(f"Predelib file: {args.predelib}")
|
||||
logger.info(f"Dashboard file: {args.dashboard}")
|
||||
|
||||
# Process the Excel files
|
||||
results = process_files(args.predelib, args.dashboard, args.verbose)
|
||||
|
||||
# Save results to file if specified
|
||||
if args.output:
|
||||
save_results(results, args.output)
|
||||
|
||||
# Print summary to console
|
||||
print_summary(results)
|
||||
|
||||
# Exit with appropriate code
|
||||
exit_code = get_exit_code(results)
|
||||
logger.info(f"Processing completed with exit code: {exit_code}")
|
||||
sys.exit(exit_code)
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("\nProcessing interrupted by user")
|
||||
sys.exit(130)
|
||||
except Exception as e:
|
||||
print(f"Fatal error: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
58
startpakketten/sp_comparison.log
Normal file
58
startpakketten/sp_comparison.log
Normal file
@@ -0,0 +1,58 @@
|
||||
2025-07-29 11:56:36,048 - __main__ - INFO - Starting SP comparison script
|
||||
2025-07-29 11:56:36,086 - __main__ - INFO - Reading Excel files
|
||||
2025-07-29 11:56:36,757 - __main__ - INFO - Successfully loaded predelib file with shape: (123, 22)
|
||||
2025-07-29 11:56:36,776 - __main__ - INFO - Successfully loaded dashboard file with shape: (40, 36)
|
||||
2025-07-29 11:56:36,776 - __main__ - INFO - Processing dataframes
|
||||
2025-07-29 11:56:36,778 - __main__ - INFO - Processed predelib dataframe shape: (123, 22)
|
||||
2025-07-29 11:56:36,780 - __main__ - INFO - Processed dashboard dataframe shape: (37, 36)
|
||||
2025-07-29 11:56:36,781 - __main__ - INFO - Starting SP values comparison
|
||||
2025-07-29 11:56:36,781 - __main__ - INFO - Starting SP values comparison
|
||||
2025-07-29 11:56:36,781 - __main__ - INFO - Predelib dataframe shape: (123, 22)
|
||||
2025-07-29 11:56:36,782 - __main__ - INFO - Dashboard dataframe shape: (37, 36)
|
||||
2025-07-29 11:56:36,782 - __main__ - INFO - All required columns found in both dataframes
|
||||
2025-07-29 11:56:36,782 - __main__ - INFO - Found 37 matching IDs between the two dataframes
|
||||
2025-07-29 11:56:36,783 - __main__ - INFO - Total predelib IDs: 37
|
||||
2025-07-29 11:56:36,783 - __main__ - INFO - Total dashboard IDs: 37
|
||||
2025-07-29 11:56:36,798 - __main__ - INFO - Successfully processed 37 matching records
|
||||
2025-07-29 11:56:36,798 - __main__ - WARNING - Found 1 mismatches
|
||||
2025-07-29 11:56:36,798 - __main__ - INFO - Mismatch - ID 20250706: Predelib=39, Dashboard=45
|
||||
2025-07-29 11:56:36,798 - __main__ - INFO - SP comparison completed successfully. Found 1 mismatches.
|
||||
2025-07-29 11:56:36,801 - __main__ - INFO - SP comparison script completed
|
||||
2025-07-29 13:29:44,971 - __main__ - INFO - Starting SP comparison script
|
||||
2025-07-29 13:29:45,011 - __main__ - INFO - Reading Excel files
|
||||
2025-07-29 13:29:48,429 - __main__ - INFO - Successfully loaded predelib file with shape: (123, 22)
|
||||
2025-07-29 13:29:48,456 - __main__ - INFO - Successfully loaded dashboard file with shape: (40, 36)
|
||||
2025-07-29 13:29:48,456 - __main__ - INFO - Processing dataframes
|
||||
2025-07-29 13:29:48,459 - __main__ - INFO - Processed predelib dataframe shape: (123, 22)
|
||||
2025-07-29 13:29:48,460 - __main__ - INFO - Processed dashboard dataframe shape: (37, 36)
|
||||
2025-07-29 13:29:48,460 - __main__ - INFO - Starting SP values comparison
|
||||
2025-07-29 13:29:48,460 - __main__ - INFO - Starting SP values comparison
|
||||
2025-07-29 13:29:48,460 - __main__ - INFO - All required columns found in both dataframes
|
||||
2025-07-29 13:29:48,460 - __main__ - INFO - Found 37 matching IDs between the two dataframes
|
||||
2025-07-29 13:29:48,460 - __main__ - INFO - Total predelib IDs: 37
|
||||
2025-07-29 13:29:48,461 - __main__ - INFO - Total dashboard IDs: 37
|
||||
2025-07-29 13:29:48,486 - __main__ - INFO - Successfully processed 37 matching records
|
||||
2025-07-29 13:29:48,487 - __main__ - WARNING - Found 1 mismatches
|
||||
2025-07-29 13:29:48,487 - __main__ - INFO - Mismatch - ID 20250706: Predeliberatierapport SP=39, Dashboard Inschrijvingen SP=45
|
||||
2025-07-29 13:29:48,487 - __main__ - INFO - SP comparison completed successfully. Found 1 mismatches.
|
||||
2025-07-29 13:29:48,488 - __main__ - INFO - SP comparison script completed
|
||||
2025-07-29 14:06:13,452 - __main__ - INFO - Starting startpakket processing
|
||||
2025-07-29 14:06:13,453 - __main__ - INFO - Predelib file: db.xlsx
|
||||
2025-07-29 14:06:13,453 - __main__ - INFO - Dashboard file: dashboard_inschrijvingen.xlsx
|
||||
2025-07-29 14:06:13,453 - __main__ - INFO - Reading predeliberation file: db.xlsx
|
||||
2025-07-29 14:06:14,888 - __main__ - INFO - Predelib file loaded successfully. Shape: (123, 22)
|
||||
2025-07-29 14:06:14,888 - __main__ - INFO - Reading dashboard file: dashboard_inschrijvingen.xlsx
|
||||
2025-07-29 14:06:14,948 - __main__ - INFO - Dashboard file loaded successfully. Shape: (40, 36)
|
||||
2025-07-29 14:06:14,948 - __main__ - INFO - Processing predeliberation file headers
|
||||
2025-07-29 14:06:14,952 - __main__ - INFO - Processing dashboard file headers
|
||||
2025-07-29 14:06:14,953 - __main__ - INFO - Comparing SP values between files
|
||||
2025-07-29 14:06:14,953 - compare_sp - INFO - Starting SP values comparison
|
||||
2025-07-29 14:06:14,953 - compare_sp - INFO - All required columns found in both dataframes
|
||||
2025-07-29 14:06:14,954 - compare_sp - INFO - Found 37 matching IDs between the two dataframes
|
||||
2025-07-29 14:06:14,955 - compare_sp - INFO - Total predelib IDs: 37
|
||||
2025-07-29 14:06:14,955 - compare_sp - INFO - Total dashboard IDs: 37
|
||||
2025-07-29 14:06:14,967 - compare_sp - INFO - Successfully processed 37 matching records
|
||||
2025-07-29 14:06:14,967 - compare_sp - WARNING - Found 1 mismatches
|
||||
2025-07-29 14:06:14,968 - compare_sp - INFO - Mismatch - ID 20250706: Predeliberatierapport SP=39, Dashboard Inschrijvingen SP=45
|
||||
2025-07-29 14:06:14,968 - __main__ - INFO - Processing completed successfully. Found 1 mismatches.
|
||||
2025-07-29 14:06:14,970 - __main__ - INFO - Processing completed with exit code: 1
|
||||
3293
startpakketten/startpakket_processing.log
Normal file
3293
startpakketten/startpakket_processing.log
Normal file
File diff suppressed because it is too large
Load Diff
0
startpakketten/todo.md
Normal file
0
startpakketten/todo.md
Normal file
Reference in New Issue
Block a user