Implement script for checking study points compliance
- Added a Python script to read data from 'scriptie.xlsx' and 'dashboard_inschrijvingen.xlsx'. - Implemented functions to find common ID columns and relevant columns based on keywords. - Merged dataframes on the common ID column and filtered for entries with 'Target OO' containing '1070FLWGES'. - Calculated thresholds for study points and identified violations based on specified criteria. - Generated a report of violations saved as both CSV and Excel formats. - Added sample violation data to 'violations_report.csv'.
This commit is contained in:
parent
468df81386
commit
a2e9c6376e
104
bascriptie studiepunten controle/script.py
Normal file
104
bascriptie studiepunten controle/script.py
Normal file
|
|
@ -0,0 +1,104 @@
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
SCRIPT_DIR = os.path.dirname(__file__)
|
||||||
|
SCRIPTIE_XLSX = os.path.join(SCRIPT_DIR, "scriptie.xlsx")
|
||||||
|
DASH_XLSX = os.path.join(SCRIPT_DIR, "dashboard_inschrijvingen.xlsx")
|
||||||
|
|
||||||
|
|
||||||
|
def find_common_id_col(df1, df2):
|
||||||
|
common = set(df1.columns).intersection(df2.columns)
|
||||||
|
# prefer obvious id-like names
|
||||||
|
for kw in ("ID"):
|
||||||
|
for c in common:
|
||||||
|
if kw in c.lower():
|
||||||
|
return c
|
||||||
|
if len(common) == 1:
|
||||||
|
return next(iter(common))
|
||||||
|
if common:
|
||||||
|
return sorted(common)[0]
|
||||||
|
raise ValueError(f"No common column found between files.\nFile1 cols: {df1.columns.tolist()}\nFile2 cols: {df2.columns.tolist()}")
|
||||||
|
|
||||||
|
|
||||||
|
def find_col_by_keywords(df, keywords):
|
||||||
|
for k in keywords:
|
||||||
|
for c in df.columns:
|
||||||
|
if k in c.lower():
|
||||||
|
return c
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
if not os.path.exists(SCRIPTIE_XLSX) or not os.path.exists(DASH_XLSX):
|
||||||
|
print("Missing files. Make sure scriptie.xlsx and dashboard_inschrijvingen.xlsx are in the script folder.")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
df_scriptie = pd.read_excel(SCRIPTIE_XLSX)
|
||||||
|
df_dash = pd.read_excel(DASH_XLSX)
|
||||||
|
|
||||||
|
try:
|
||||||
|
id_col = find_common_id_col(df_scriptie, df_dash)
|
||||||
|
except ValueError as e:
|
||||||
|
print(e)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
# find relevant columns (best-effort)
|
||||||
|
col_target_oo = find_col_by_keywords(df_scriptie, ["target oo", "target_oo", "targetoo", "target oo"])
|
||||||
|
col_target_plan = find_col_by_keywords(df_scriptie, ["target plan", "target_plan", "targetplan", "target"])
|
||||||
|
col_sp = find_col_by_keywords(df_dash, ["sp", "punten", "ects", "study points"])
|
||||||
|
|
||||||
|
if col_target_oo is None or col_target_plan is None or col_sp is None:
|
||||||
|
print("Could not locate required columns. Detected:")
|
||||||
|
print("scriptie columns:", df_scriptie.columns.tolist())
|
||||||
|
print("dashboard columns:", df_dash.columns.tolist())
|
||||||
|
print(f"Found -> target_oo: {col_target_oo}, target_plan: {col_target_plan}, SP: {col_sp}")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
merged = df_scriptie.merge(df_dash, on=id_col, how="inner", suffixes=("_scriptie", "_dash"))
|
||||||
|
|
||||||
|
# filter rows where Target OO contains 1070FLWGES
|
||||||
|
mask_oo = merged[col_target_oo].astype(str).str.contains("1070FLWGES", na=False)
|
||||||
|
|
||||||
|
subset = merged[mask_oo].copy()
|
||||||
|
if subset.empty:
|
||||||
|
print("No rows with Target OO containing '1070FLWGES'. No violations.")
|
||||||
|
return
|
||||||
|
|
||||||
|
# determine thresholds per row: default 180-9 = 171, if Target plan contains 'Ba geschiedenis (major)' then 180-12 = 168
|
||||||
|
plan_contains = subset[col_target_plan].astype(str).str.contains("ba geschiedenis (major)", case=False, na=False)
|
||||||
|
subset["threshold"] = np.where(plan_contains, 180 - 12, 180 - 9)
|
||||||
|
|
||||||
|
# coerce SP to numeric (non-numeric become NaN)
|
||||||
|
subset["SP_value"] = pd.to_numeric(subset[col_sp], errors="coerce")
|
||||||
|
|
||||||
|
# violation: SP is NaN or SP < threshold
|
||||||
|
violations = subset[subset["SP_value"] < subset["threshold"]]
|
||||||
|
|
||||||
|
# also consider NaN as violation
|
||||||
|
nan_viol = subset[subset["SP_value"].isna()]
|
||||||
|
violations = pd.concat([violations, nan_viol]).drop_duplicates()
|
||||||
|
|
||||||
|
if violations.empty:
|
||||||
|
print("No violations found for entries with Target OO == 1070FLWGES.")
|
||||||
|
return
|
||||||
|
|
||||||
|
report_cols = [id_col, col_sp, "SP_value", "threshold", col_target_plan, col_target_oo]
|
||||||
|
report = violations.loc[:, [id_col, col_sp, "SP_value", "threshold", col_target_plan, col_target_oo]]
|
||||||
|
report = report.rename(columns={col_sp: "SP_raw", col_target_plan: "Target_plan", col_target_oo: "Target_OO"})
|
||||||
|
|
||||||
|
out_csv = os.path.join(SCRIPT_DIR, "violations_report.csv")
|
||||||
|
out_xlsx = os.path.join(SCRIPT_DIR, "violations_report.xlsx")
|
||||||
|
report.to_csv(out_csv, index=False)
|
||||||
|
try:
|
||||||
|
report.to_excel(out_xlsx, index=False)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
print(f"Found {len(report)} violation(s). Saved to: {out_csv} (and {out_xlsx} if Excel write succeeded).")
|
||||||
|
print(report.to_string(index=False))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
3
bascriptie studiepunten controle/violations_report.csv
Normal file
3
bascriptie studiepunten controle/violations_report.csv
Normal file
|
|
@ -0,0 +1,3 @@
|
||||||
|
ID,SP_raw,SP_value,threshold,Target_plan,Target_OO
|
||||||
|
20224729,168,168,171,Ba geschiedenis,1070FLWGES
|
||||||
|
20224915,162,162,171,Ba geschiedenis,1070FLWGES
|
||||||
|
|
|
@ -37,13 +37,26 @@ def check_headers_predelibfile(df):
|
||||||
df = df[existing_columns]
|
df = df[existing_columns]
|
||||||
|
|
||||||
print(f"Deleted {header_row} rows, set proper headers, and kept {len(existing_columns)} columns")
|
print(f"Deleted {header_row} rows, set proper headers, and kept {len(existing_columns)} columns")
|
||||||
return df
|
|
||||||
else:
|
else:
|
||||||
print("Headers 'Achternaam' and 'Voornaam' not found in the file")
|
print("Headers 'Achternaam' and 'Voornaam' not found in the file")
|
||||||
return df
|
return df
|
||||||
|
|
||||||
|
if 'Programma status omschrijving' in df.columns:
|
||||||
|
before = len(df)
|
||||||
|
mask = df['Programma status omschrijving'].astype(str).str.contains(r'\bBeëindigd\b', case=False, na=False)
|
||||||
|
df = df[~mask].reset_index(drop=True)
|
||||||
|
removed = before - len(df)
|
||||||
|
print(f"Removed {removed} rows where Programma status omschrijving contains 'Beëindigd'")
|
||||||
|
else:
|
||||||
|
print("Column 'Programma status omschrijving' not found; no rows removed")
|
||||||
|
|
||||||
|
return df
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def check_headers_dashboard_inschrijvingenfile(df):
|
def check_headers_dashboard_inschrijvingenfile(df):
|
||||||
# Check if the headers are already in the column names (first row)
|
# Check if the headers are already in the column names (first row)
|
||||||
if 'Naam' in df.columns and 'Voornaam' in df.columns:
|
if 'Naam' in df.columns and 'Voornaam' in df.columns:
|
||||||
print("Headers found in first row of dashboard_inschrijvingen - no need to search for header row")
|
print("Headers found in first row of dashboard_inschrijvingen - no need to search for header row")
|
||||||
header_row = -1 # Indicates headers are already set
|
header_row = -1 # Indicates headers are already set
|
||||||
|
|
@ -55,7 +68,8 @@ def check_headers_dashboard_inschrijvingenfile(df):
|
||||||
header_row = i
|
header_row = i
|
||||||
break
|
break
|
||||||
|
|
||||||
if header_row is not None:
|
# Apply headers only when a valid header row was found (>= 0)
|
||||||
|
if header_row is not None and header_row >= 0:
|
||||||
# Delete all rows before the header row
|
# Delete all rows before the header row
|
||||||
df = df.iloc[header_row:].reset_index(drop=True)
|
df = df.iloc[header_row:].reset_index(drop=True)
|
||||||
|
|
||||||
|
|
@ -63,16 +77,26 @@ def check_headers_dashboard_inschrijvingenfile(df):
|
||||||
df.columns = df.iloc[0]
|
df.columns = df.iloc[0]
|
||||||
df = df.iloc[1:].reset_index(drop=True)
|
df = df.iloc[1:].reset_index(drop=True)
|
||||||
|
|
||||||
if header_row is not None and header_row >= 0:
|
print(f"Deleted {header_row} rows in dashboard_file, set proper headers")
|
||||||
print(f"Deleted {header_row} rows in dashboard_file, set proper headers")
|
elif header_row == -1:
|
||||||
elif header_row == -1:
|
# Headers were already correct; nothing to change
|
||||||
print(f"Headers were already correct in dashboard_file.")
|
print("Headers were already correct in dashboard_file.")
|
||||||
|
|
||||||
return df
|
|
||||||
else:
|
else:
|
||||||
print("Headers 'Achternaam' and 'Voornaam' not found in the file")
|
print("Headers 'Naam' and 'Voornaam' not found in the file")
|
||||||
return df
|
return df
|
||||||
|
|
||||||
|
# Remove rows where Status contains 'Beëindigd'
|
||||||
|
if 'Status' in df.columns:
|
||||||
|
before = len(df)
|
||||||
|
mask = df['Status'].astype(str).str.contains(r'\bBeëindigd\b', case=False, na=False)
|
||||||
|
df = df[~mask].reset_index(drop=True)
|
||||||
|
removed = before - len(df)
|
||||||
|
print(f"Removed {removed} rows where Status contains 'Beëindigd'")
|
||||||
|
else:
|
||||||
|
print("Column 'Status' not found; no rows removed")
|
||||||
|
|
||||||
|
return df
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
# Read the Excel files
|
# Read the Excel files
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user