Refactor and document code; add new files
Refactored `script.py` by adding detailed docstrings and organizing functions. Created `.idea` configuration files and `gotodashboard.js` for `sisa_crawl` project. Added `readme.md` files with usage instructions and context for multiple scripts, and set up `package.json` for `sisa_crawl` dependencies.
This commit is contained in:
@@ -1,18 +1,48 @@
|
||||
import pandas as pd
|
||||
|
||||
file_path = 'file.xlsx'
|
||||
sheet_name = 'ps (32)'
|
||||
# Constants
|
||||
FILE_PATH = 'file.xlsx'
|
||||
SHEET_NAME = 'ps (32)'
|
||||
OUTPUT_FILE_PATH = 'filtered_grote_lokalen.xlsx'
|
||||
EXAM_FORM_COLUMN = 'Examenvorm'
|
||||
REGISTRATION_COLUMN = 'Aant. inschr.'
|
||||
BEGIN_TIME_COLUMN = 'Beginuur S+'
|
||||
END_TIME_COLUMN = 'Einduur S+'
|
||||
TEACHERS_COLUMN = 'Docenten'
|
||||
LOCATION_COLUMNS = ['Datum S+', BEGIN_TIME_COLUMN, END_TIME_COLUMN, 'Studiegidsnr.', 'Omschrijving', TEACHERS_COLUMN, REGISTRATION_COLUMN]
|
||||
|
||||
df = pd.read_excel(file_path, sheet_name=sheet_name)
|
||||
filtered_df = df[df['Examenvorm'] == 'Schriftelijk' ]
|
||||
filtered_df = filtered_df[filtered_df['Aant. inschr.'] > 65]
|
||||
filtered_df = filtered_df[['Datum S+','Beginuur S+','Einduur S+', 'Studiegidsnr.', 'Omschrijving', 'Docenten', 'Aant. inschr.']]
|
||||
# Read the Excel file
|
||||
def read_excel(file_path, sheet_name):
|
||||
return pd.read_excel(file_path, sheet_name=sheet_name)
|
||||
|
||||
# Filter DataFrame
|
||||
def filter_dataframe(df):
|
||||
df = df[df[EXAM_FORM_COLUMN] == 'Schriftelijk']
|
||||
df = df[df[REGISTRATION_COLUMN] > 65]
|
||||
return df[LOCATION_COLUMNS]
|
||||
|
||||
#formatting the timestrings
|
||||
filtered_df['Beginuur S+'] = filtered_df['Beginuur S+'].apply(lambda x: x.strftime('%H:%M'))
|
||||
filtered_df['Einduur S+'] = filtered_df['Einduur S+'].apply(lambda x: x.strftime('%H:%M'))
|
||||
filtered_df['Docenten'] = filtered_df['Docenten'].str.replace(r'\b(Titularis|Co-Titularis|Medewerker)\b', '',
|
||||
regex=True).str.strip()
|
||||
# Format time strings
|
||||
def format_time_strings(df):
|
||||
df[BEGIN_TIME_COLUMN] = df[BEGIN_TIME_COLUMN].apply(lambda x: x.strftime('%H:%M') if pd.notnull(x) else '')
|
||||
df[END_TIME_COLUMN] = df[END_TIME_COLUMN].apply(lambda x: x.strftime('%H:%M') if pd.notnull(x) else '')
|
||||
return df
|
||||
|
||||
filtered_df.to_excel('filtered_grote_lokalen.xlsx', index=False)
|
||||
# Clean up teacher titles
|
||||
def clean_teacher_titles(df):
|
||||
df[TEACHERS_COLUMN] = df[TEACHERS_COLUMN].str.replace(r'\b(Titularis|Co-Titularis|Medewerker)\b', '', regex=True).str.strip()
|
||||
return df
|
||||
|
||||
# Save DataFrame to Excel
|
||||
def save_to_excel(df, file_path):
|
||||
df.to_excel(file_path, index=False)
|
||||
|
||||
# Main process
|
||||
def main():
|
||||
df = read_excel(FILE_PATH, SHEET_NAME)
|
||||
filtered_df = filter_dataframe(df)
|
||||
filtered_df = format_time_strings(filtered_df)
|
||||
filtered_df = clean_teacher_titles(filtered_df)
|
||||
save_to_excel(filtered_df, OUTPUT_FILE_PATH)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
79
examen grote lokalen/readme.md
Normal file
79
examen grote lokalen/readme.md
Normal file
@@ -0,0 +1,79 @@
|
||||
# Excel Filtering and Formatting Script
|
||||
|
||||
The file in this repository filters has the intent to filter all written exams that require a 'large room'
|
||||
(>65 inschrijvingen) and thus need to be brought to the meeting which assigns large rooms to written exams on campus. The output has
|
||||
the layout in mind of the master file provided by E-campus but may need changes if the master file changes.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Make sure you have the following software installed:
|
||||
- Python 3.x
|
||||
- Pip (Python package installer)
|
||||
|
||||
## Required Packages
|
||||
|
||||
The script depends on the following Python packages:
|
||||
- `pandas`
|
||||
|
||||
You can install the required package using pip:
|
||||
```bash
|
||||
pip install pandas
|
||||
```
|
||||
|
||||
## Description
|
||||
|
||||
The script performs the following operations:
|
||||
|
||||
1. Reads data from the specified Excel file and sheet.
|
||||
2. Filters rows based on the value of the 'Examenvorm' column and the count of 'Aant. inschr.' column.
|
||||
3. Selects specific columns from the filtered DataFrame.
|
||||
4. Formats time strings in the columns 'Beginuur S+' and 'Einduur S+'.
|
||||
5. Cleans the 'Docenten' column by removing specific keywords and trimming whitespace.
|
||||
6. Writes the processed DataFrame to a new Excel file.
|
||||
|
||||
## Usage
|
||||
|
||||
1. Place your Excel file in the same directory as the script.
|
||||
2. Update the `file_path` and `sheet_name` variables in the script with your specific file path and sheet name.
|
||||
3. Run the script:
|
||||
|
||||
```bash
|
||||
python script.py
|
||||
```
|
||||
|
||||
## Code
|
||||
|
||||
```python
|
||||
import pandas as pd
|
||||
|
||||
file_path = 'file.xlsx'
|
||||
sheet_name = 'ps (32)'
|
||||
|
||||
# Read the data from the Excel file
|
||||
df = pd.read_excel(file_path, sheet_name=sheet_name)
|
||||
|
||||
# Filter the data based on certain criteria
|
||||
filtered_df = df[df['Examenvorm'] == 'Schriftelijk']
|
||||
filtered_df = filtered_df[filtered_df['Aant. inschr.'] > 65]
|
||||
filtered_df = filtered_df[['Datum S+', 'Beginuur S+', 'Einduur S+', 'Studiegidsnr.', 'Omschrijving', 'Docenten', 'Aant. inschr.']]
|
||||
|
||||
# Format the time strings
|
||||
filtered_df['Beginuur S+'] = filtered_df['Beginuur S+'].apply(lambda x: x.strftime('%H:%M'))
|
||||
filtered_df['Einduur S+'] = filtered_df['Einduur S+'].apply(lambda x: x.strftime('%H:%M'))
|
||||
filtered_df['Docenten'] = filtered_df['Docenten'].str.replace(r'\b(Titularis|Co-Titularis|Medewerker)\b', '', regex=True).str.strip()
|
||||
|
||||
# Save the filtered and formatted data to a new Excel file
|
||||
filtered_df.to_excel('filtered_grote_lokalen.xlsx', index=False)
|
||||
```
|
||||
|
||||
## Additional Notes
|
||||
|
||||
- This script assumes that the input Excel file has specific columns like 'Examenvorm', 'Aant. inschr.', 'Datum S+', 'Beginuur S+', 'Einduur S+', 'Studiegidsnr.', 'Omschrijving', and 'Docenten'.
|
||||
- Make sure that the time columns ('Beginuur S+' and 'Einduur S+') are in datetime format in the original Excel file for the `.strftime('%H:%M')` method to work correctly.
|
||||
- The `Docenten` column will be cleaned by removing occurrences of the keywords 'Titularis', 'Co-Titularis', and 'Medewerker'.
|
||||
|
||||
Feel free to adjust the script according to your specific needs.
|
||||
|
||||
## License
|
||||
|
||||
This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details.
|
||||
Reference in New Issue
Block a user