Implement script for checking study points compliance
- Added a Python script to read data from 'scriptie.xlsx' and 'dashboard_inschrijvingen.xlsx'. - Implemented functions to find common ID columns and relevant columns based on keywords. - Merged dataframes on the common ID column and filtered for entries with 'Target OO' containing '1070FLWGES'. - Calculated thresholds for study points and identified violations based on specified criteria. - Generated a report of violations saved as both CSV and Excel formats. - Added sample violation data to 'violations_report.csv'.
This commit is contained in:
@@ -37,13 +37,26 @@ def check_headers_predelibfile(df):
|
||||
df = df[existing_columns]
|
||||
|
||||
print(f"Deleted {header_row} rows, set proper headers, and kept {len(existing_columns)} columns")
|
||||
return df
|
||||
else:
|
||||
print("Headers 'Achternaam' and 'Voornaam' not found in the file")
|
||||
return df
|
||||
|
||||
if 'Programma status omschrijving' in df.columns:
|
||||
before = len(df)
|
||||
mask = df['Programma status omschrijving'].astype(str).str.contains(r'\bBeëindigd\b', case=False, na=False)
|
||||
df = df[~mask].reset_index(drop=True)
|
||||
removed = before - len(df)
|
||||
print(f"Removed {removed} rows where Programma status omschrijving contains 'Beëindigd'")
|
||||
else:
|
||||
print("Column 'Programma status omschrijving' not found; no rows removed")
|
||||
|
||||
return df
|
||||
|
||||
|
||||
|
||||
|
||||
def check_headers_dashboard_inschrijvingenfile(df):
|
||||
# Check if the headers are already in the column names (first row)
|
||||
# Check if the headers are already in the column names (first row)
|
||||
if 'Naam' in df.columns and 'Voornaam' in df.columns:
|
||||
print("Headers found in first row of dashboard_inschrijvingen - no need to search for header row")
|
||||
header_row = -1 # Indicates headers are already set
|
||||
@@ -55,7 +68,8 @@ def check_headers_dashboard_inschrijvingenfile(df):
|
||||
header_row = i
|
||||
break
|
||||
|
||||
if header_row is not None:
|
||||
# Apply headers only when a valid header row was found (>= 0)
|
||||
if header_row is not None and header_row >= 0:
|
||||
# Delete all rows before the header row
|
||||
df = df.iloc[header_row:].reset_index(drop=True)
|
||||
|
||||
@@ -63,16 +77,26 @@ def check_headers_dashboard_inschrijvingenfile(df):
|
||||
df.columns = df.iloc[0]
|
||||
df = df.iloc[1:].reset_index(drop=True)
|
||||
|
||||
if header_row is not None and header_row >= 0:
|
||||
print(f"Deleted {header_row} rows in dashboard_file, set proper headers")
|
||||
elif header_row == -1:
|
||||
print(f"Headers were already correct in dashboard_file.")
|
||||
|
||||
return df
|
||||
print(f"Deleted {header_row} rows in dashboard_file, set proper headers")
|
||||
elif header_row == -1:
|
||||
# Headers were already correct; nothing to change
|
||||
print("Headers were already correct in dashboard_file.")
|
||||
else:
|
||||
print("Headers 'Achternaam' and 'Voornaam' not found in the file")
|
||||
print("Headers 'Naam' and 'Voornaam' not found in the file")
|
||||
return df
|
||||
|
||||
# Remove rows where Status contains 'Beëindigd'
|
||||
if 'Status' in df.columns:
|
||||
before = len(df)
|
||||
mask = df['Status'].astype(str).str.contains(r'\bBeëindigd\b', case=False, na=False)
|
||||
df = df[~mask].reset_index(drop=True)
|
||||
removed = before - len(df)
|
||||
print(f"Removed {removed} rows where Status contains 'Beëindigd'")
|
||||
else:
|
||||
print("Column 'Status' not found; no rows removed")
|
||||
|
||||
return df
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Read the Excel files
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user