import pandas as pd # Constants FILE_PATH = 'file.xlsx' SHEET_NAME = 'ps (32)' OUTPUT_FILE_PATH = 'filtered_grote_lokalen.xlsx' EXAM_FORM_COLUMN = 'Examenvorm' REGISTRATION_COLUMN = 'Aant. inschr.' BEGIN_TIME_COLUMN = 'Beginuur S+' END_TIME_COLUMN = 'Einduur S+' TEACHERS_COLUMN = 'Docenten' LOCATION_COLUMNS = ['Datum S+', BEGIN_TIME_COLUMN, END_TIME_COLUMN, 'Studiegidsnr.', 'Omschrijving', TEACHERS_COLUMN, REGISTRATION_COLUMN] # Read the Excel file def read_excel(file_path, sheet_name): return pd.read_excel(file_path, sheet_name=sheet_name) # Filter DataFrame def filter_dataframe(df): df = df[df[EXAM_FORM_COLUMN] == 'Schriftelijk'] df = df[df[REGISTRATION_COLUMN] > 65] return df[LOCATION_COLUMNS] # Format time strings def format_time_strings(df): df[BEGIN_TIME_COLUMN] = df[BEGIN_TIME_COLUMN].apply(lambda x: x.strftime('%H:%M') if pd.notnull(x) else '') df[END_TIME_COLUMN] = df[END_TIME_COLUMN].apply(lambda x: x.strftime('%H:%M') if pd.notnull(x) else '') return df # Clean up teacher titles def clean_teacher_titles(df): df[TEACHERS_COLUMN] = df[TEACHERS_COLUMN].str.replace(r'\b(Titularis|Co-Titularis|Medewerker)\b', '', regex=True).str.strip() return df # Save DataFrame to Excel def save_to_excel(df, file_path): df.to_excel(file_path, index=False) # Main process def main(): df = read_excel(FILE_PATH, SHEET_NAME) filtered_df = filter_dataframe(df) filtered_df = format_time_strings(filtered_df) filtered_df = clean_teacher_titles(filtered_df) save_to_excel(filtered_df, OUTPUT_FILE_PATH) if __name__ == "__main__": main()