b021eabdab
Refactored `script.py` by adding detailed docstrings and organizing functions. Created `.idea` configuration files and `gotodashboard.js` for `sisa_crawl` project. Added `readme.md` files with usage instructions and context for multiple scripts, and set up `package.json` for `sisa_crawl` dependencies.
113 lines
4.9 KiB
Python
113 lines
4.9 KiB
Python
import pandas as pd
|
|
|
|
def read_excel_file(file_path):
|
|
"""
|
|
:param file_path: The path to the Excel file to be read.
|
|
:return: The contents of the Excel file as a DataFrame if successful, otherwise None.
|
|
"""
|
|
try:
|
|
return pd.read_excel(file_path)
|
|
except Exception as e:
|
|
print(f"Error reading the Excel file: {e}")
|
|
return None
|
|
|
|
def filter_studiegidsnummer(df):
|
|
"""
|
|
:param df: Input DataFrame that contains various columns including 'Studiegidsnummer'.
|
|
:return: DataFrame filtered to include only rows where the 'Studiegidsnummer' column contains 'GES'. Returns an empty DataFrame if 'Studiegidsnummer' column is not found.
|
|
"""
|
|
if 'Studiegidsnummer' not in df.columns:
|
|
print("Column 'studiegidsnummer' not found in the DataFrame.")
|
|
print("Available columns:", df.columns)
|
|
return pd.DataFrame() # Return an empty DataFrame
|
|
return df[df['Studiegidsnummer'].str.contains('GES', na=False)].copy()
|
|
|
|
def filter_opmerkingen(df):
|
|
"""
|
|
:param df: The input DataFrame containing various columns including 'Opmerkingen'
|
|
:return: A filtered DataFrame excluding rows where the 'Opmerkingen' column contains the string '24-25'. If the 'Opmerkingen' column is not found, returns an empty DataFrame and prints available columns.
|
|
"""
|
|
if 'Opmerkingen' not in df.columns:
|
|
print("Column 'Opmerkingen' not found in the DataFrame.")
|
|
print("Available columns:", df.columns)
|
|
return pd.DataFrame() # Return an empty DataFrame
|
|
return df[~df['Opmerkingen'].str.contains('24-25', na=False)].copy()
|
|
|
|
def create_message_column(df):
|
|
"""
|
|
:param df: A pandas DataFrame containing examination details.
|
|
:return: A pandas DataFrame with additional 'Message' and 'subject' columns
|
|
for communication with teaching staff regarding examination details.
|
|
"""
|
|
df.loc[:, 'Message'] = df.apply(lambda row: (
|
|
f"Beste docent,\n\n"
|
|
f"Ik ben de examengegevens aan het controleren van {row['Omschrijving']} {row['Studiegidsnummer']}. De huidige gegevens zijn als volgt:\n\n"
|
|
f"{row['Examenvorm']} examen voor zowel eerste als tweede zit, {row['Examenduur']} minuten, tussen {row['Beginuur voormiddag']} en {row['Einduur voormiddag']} of {row['Beginuur namiddag']} en {row['Einduur namiddag']}.\n\n"
|
|
f"Gelden dezelfde gegevens voor dit academiejaar of moeten er nog wijzigingen doorgevoerd worden? Alvast dank voor je reactie!"
|
|
), axis=1)
|
|
df.loc[:, 'subject'] = df.apply(lambda row: (
|
|
f"Examengegevens {row['Omschrijving']} {row['Studiegidsnummer']}"
|
|
), axis=1)
|
|
return df
|
|
|
|
def save_to_excel(df, output_file_path):
|
|
"""
|
|
:param df: The DataFrame to be saved to an Excel file.
|
|
:type df: pandas.DataFrame
|
|
:param output_file_path: The path where the Excel file will be saved.
|
|
:type output_file_path: str
|
|
:return: None
|
|
"""
|
|
try:
|
|
df.to_excel(output_file_path, index=False)
|
|
except Exception as e:
|
|
print(f"Error saving the Excel file: {e}")
|
|
|
|
def convert_time_format(time_str):
|
|
"""
|
|
:param time_str: A string representing the time to be converted.
|
|
:return: A string representing the time in 'HH:MM' format, or the original string if conversion fails.
|
|
"""
|
|
try:
|
|
return pd.to_datetime(time_str).strftime('%H:%M')
|
|
except Exception as e:
|
|
print(f"Error converting time format: {e}")
|
|
return time_str
|
|
|
|
def apply_time_format_conversion(df, columns):
|
|
"""
|
|
:param df: The DataFrame containing the columns to be formatted.
|
|
:type df: pandas.DataFrame
|
|
:param columns: A list of column names in the DataFrame to apply the time format conversion.
|
|
:type columns: list of str
|
|
:return: A DataFrame with the specified columns converted to the '%H:%M' format.
|
|
:rtype: pandas.DataFrame
|
|
"""
|
|
for column in columns:
|
|
df[column] = pd.to_datetime(df[column], format='%H:%M:%S', errors='coerce').dt.strftime('%H:%M')
|
|
return df
|
|
|
|
# Example usage within the main function
|
|
def main():
|
|
"""
|
|
Reads an Excel file, filters data based on specific criteria, converts time formats for specified columns,
|
|
creates a message column, and saves the filtered data to a new Excel file.
|
|
|
|
:return: None
|
|
"""
|
|
file_path = 'examengegevens2425.xlsx'
|
|
output_file_path = 'filtered_examengegevens2425.xlsx'
|
|
|
|
df = read_excel_file(file_path)
|
|
if df is not None:
|
|
filtered_df = filter_studiegidsnummer(df)
|
|
if not filtered_df.empty:
|
|
final_filtered_df = filter_opmerkingen(filtered_df)
|
|
# Convert time format for specified columns
|
|
time_columns = ['Beginuur voormiddag', 'Einduur voormiddag', 'Beginuur namiddag', 'Einduur namiddag']
|
|
final_filtered_df = apply_time_format_conversion(final_filtered_df, time_columns)
|
|
final_filtered_df = create_message_column(final_filtered_df)
|
|
save_to_excel(final_filtered_df, output_file_path)
|
|
|
|
if __name__ == "__main__":
|
|
main() |