ows-master/bascriptie studiepunten controle/script.py
bdaneels a2e9c6376e Implement script for checking study points compliance
- Added a Python script to read data from 'scriptie.xlsx' and 'dashboard_inschrijvingen.xlsx'.
- Implemented functions to find common ID columns and relevant columns based on keywords.
- Merged dataframes on the common ID column and filtered for entries with 'Target OO' containing '1070FLWGES'.
- Calculated thresholds for study points and identified violations based on specified criteria.
- Generated a report of violations saved as both CSV and Excel formats.
- Added sample violation data to 'violations_report.csv'.
2025-10-24 10:14:07 +02:00

104 lines
3.9 KiB
Python

import os
import sys
import pandas as pd
import numpy as np
SCRIPT_DIR = os.path.dirname(__file__)
SCRIPTIE_XLSX = os.path.join(SCRIPT_DIR, "scriptie.xlsx")
DASH_XLSX = os.path.join(SCRIPT_DIR, "dashboard_inschrijvingen.xlsx")
def find_common_id_col(df1, df2):
common = set(df1.columns).intersection(df2.columns)
# prefer obvious id-like names
for kw in ("ID"):
for c in common:
if kw in c.lower():
return c
if len(common) == 1:
return next(iter(common))
if common:
return sorted(common)[0]
raise ValueError(f"No common column found between files.\nFile1 cols: {df1.columns.tolist()}\nFile2 cols: {df2.columns.tolist()}")
def find_col_by_keywords(df, keywords):
for k in keywords:
for c in df.columns:
if k in c.lower():
return c
return None
def main():
if not os.path.exists(SCRIPTIE_XLSX) or not os.path.exists(DASH_XLSX):
print("Missing files. Make sure scriptie.xlsx and dashboard_inschrijvingen.xlsx are in the script folder.")
sys.exit(1)
df_scriptie = pd.read_excel(SCRIPTIE_XLSX)
df_dash = pd.read_excel(DASH_XLSX)
try:
id_col = find_common_id_col(df_scriptie, df_dash)
except ValueError as e:
print(e)
sys.exit(1)
# find relevant columns (best-effort)
col_target_oo = find_col_by_keywords(df_scriptie, ["target oo", "target_oo", "targetoo", "target oo"])
col_target_plan = find_col_by_keywords(df_scriptie, ["target plan", "target_plan", "targetplan", "target"])
col_sp = find_col_by_keywords(df_dash, ["sp", "punten", "ects", "study points"])
if col_target_oo is None or col_target_plan is None or col_sp is None:
print("Could not locate required columns. Detected:")
print("scriptie columns:", df_scriptie.columns.tolist())
print("dashboard columns:", df_dash.columns.tolist())
print(f"Found -> target_oo: {col_target_oo}, target_plan: {col_target_plan}, SP: {col_sp}")
sys.exit(1)
merged = df_scriptie.merge(df_dash, on=id_col, how="inner", suffixes=("_scriptie", "_dash"))
# filter rows where Target OO contains 1070FLWGES
mask_oo = merged[col_target_oo].astype(str).str.contains("1070FLWGES", na=False)
subset = merged[mask_oo].copy()
if subset.empty:
print("No rows with Target OO containing '1070FLWGES'. No violations.")
return
# determine thresholds per row: default 180-9 = 171, if Target plan contains 'Ba geschiedenis (major)' then 180-12 = 168
plan_contains = subset[col_target_plan].astype(str).str.contains("ba geschiedenis (major)", case=False, na=False)
subset["threshold"] = np.where(plan_contains, 180 - 12, 180 - 9)
# coerce SP to numeric (non-numeric become NaN)
subset["SP_value"] = pd.to_numeric(subset[col_sp], errors="coerce")
# violation: SP is NaN or SP < threshold
violations = subset[subset["SP_value"] < subset["threshold"]]
# also consider NaN as violation
nan_viol = subset[subset["SP_value"].isna()]
violations = pd.concat([violations, nan_viol]).drop_duplicates()
if violations.empty:
print("No violations found for entries with Target OO == 1070FLWGES.")
return
report_cols = [id_col, col_sp, "SP_value", "threshold", col_target_plan, col_target_oo]
report = violations.loc[:, [id_col, col_sp, "SP_value", "threshold", col_target_plan, col_target_oo]]
report = report.rename(columns={col_sp: "SP_raw", col_target_plan: "Target_plan", col_target_oo: "Target_OO"})
out_csv = os.path.join(SCRIPT_DIR, "violations_report.csv")
out_xlsx = os.path.join(SCRIPT_DIR, "violations_report.xlsx")
report.to_csv(out_csv, index=False)
try:
report.to_excel(out_xlsx, index=False)
except Exception:
pass
print(f"Found {len(report)} violation(s). Saved to: {out_csv} (and {out_xlsx} if Excel write succeeded).")
print(report.to_string(index=False))
if __name__ == "__main__":
main()