2024-11-18 10:59:21 +00:00
|
|
|
import pandas as pd
|
|
|
|
|
2024-11-18 13:03:25 +00:00
|
|
|
# Constants
|
|
|
|
FILE_PATH = 'file.xlsx'
|
|
|
|
SHEET_NAME = 'ps (32)'
|
|
|
|
OUTPUT_FILE_PATH = 'filtered_grote_lokalen.xlsx'
|
|
|
|
EXAM_FORM_COLUMN = 'Examenvorm'
|
|
|
|
REGISTRATION_COLUMN = 'Aant. inschr.'
|
|
|
|
BEGIN_TIME_COLUMN = 'Beginuur S+'
|
|
|
|
END_TIME_COLUMN = 'Einduur S+'
|
|
|
|
TEACHERS_COLUMN = 'Docenten'
|
|
|
|
LOCATION_COLUMNS = ['Datum S+', BEGIN_TIME_COLUMN, END_TIME_COLUMN, 'Studiegidsnr.', 'Omschrijving', TEACHERS_COLUMN, REGISTRATION_COLUMN]
|
2024-11-18 10:59:21 +00:00
|
|
|
|
2024-11-18 13:03:25 +00:00
|
|
|
# Read the Excel file
|
|
|
|
def read_excel(file_path, sheet_name):
|
|
|
|
return pd.read_excel(file_path, sheet_name=sheet_name)
|
2024-11-18 10:59:21 +00:00
|
|
|
|
2024-11-18 13:03:25 +00:00
|
|
|
# Filter DataFrame
|
|
|
|
def filter_dataframe(df):
|
|
|
|
df = df[df[EXAM_FORM_COLUMN] == 'Schriftelijk']
|
|
|
|
df = df[df[REGISTRATION_COLUMN] > 65]
|
|
|
|
return df[LOCATION_COLUMNS]
|
2024-11-18 10:59:21 +00:00
|
|
|
|
2024-11-18 13:03:25 +00:00
|
|
|
# Format time strings
|
|
|
|
def format_time_strings(df):
|
|
|
|
df[BEGIN_TIME_COLUMN] = df[BEGIN_TIME_COLUMN].apply(lambda x: x.strftime('%H:%M') if pd.notnull(x) else '')
|
|
|
|
df[END_TIME_COLUMN] = df[END_TIME_COLUMN].apply(lambda x: x.strftime('%H:%M') if pd.notnull(x) else '')
|
|
|
|
return df
|
2024-11-18 10:59:21 +00:00
|
|
|
|
2024-11-18 13:03:25 +00:00
|
|
|
# Clean up teacher titles
|
|
|
|
def clean_teacher_titles(df):
|
|
|
|
df[TEACHERS_COLUMN] = df[TEACHERS_COLUMN].str.replace(r'\b(Titularis|Co-Titularis|Medewerker)\b', '', regex=True).str.strip()
|
|
|
|
return df
|
|
|
|
|
|
|
|
# Save DataFrame to Excel
|
|
|
|
def save_to_excel(df, file_path):
|
|
|
|
df.to_excel(file_path, index=False)
|
|
|
|
|
|
|
|
# Main process
|
|
|
|
def main():
|
|
|
|
df = read_excel(FILE_PATH, SHEET_NAME)
|
|
|
|
filtered_df = filter_dataframe(df)
|
|
|
|
filtered_df = format_time_strings(filtered_df)
|
|
|
|
filtered_df = clean_teacher_titles(filtered_df)
|
|
|
|
save_to_excel(filtered_df, OUTPUT_FILE_PATH)
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
main()
|