first commit
This commit is contained in:
parent
d51c60d76d
commit
e3e65a9c51
11
.gitignore
vendored
Normal file
11
.gitignore
vendored
Normal file
|
@ -0,0 +1,11 @@
|
||||||
|
# Ignore .idea directories
|
||||||
|
*.idea/
|
||||||
|
|
||||||
|
# Ignore compiled Python files
|
||||||
|
*.pyc
|
||||||
|
__pycache__/
|
||||||
|
|
||||||
|
# Ignore Excel files
|
||||||
|
*.xlsx
|
||||||
|
|
||||||
|
sisa_crawl/
|
20
examen dubbels/script.py
Normal file
20
examen dubbels/script.py
Normal file
|
@ -0,0 +1,20 @@
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
#variables
|
||||||
|
file_path = 'ps (30).xlsx'
|
||||||
|
sheet_name = 'ps (30)'
|
||||||
|
column_name = 'Student-ID'
|
||||||
|
|
||||||
|
df = pd.read_excel(file_path, sheet_name=sheet_name)
|
||||||
|
|
||||||
|
duplicate_ids= df[df.duplicated(subset=[column_name], keep=False)][column_name]
|
||||||
|
|
||||||
|
unique_duplicate_ids = duplicate_ids.drop_duplicates()
|
||||||
|
|
||||||
|
num_duplicates = len(unique_duplicate_ids)
|
||||||
|
|
||||||
|
if not unique_duplicate_ids.empty:
|
||||||
|
print(f"Duplicated Student-ID values (count: {num_duplicates}) :")
|
||||||
|
print(unique_duplicate_ids)
|
||||||
|
else:
|
||||||
|
print("No duplicates found.")
|
18
examen grote lokalen/main.py
Normal file
18
examen grote lokalen/main.py
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
file_path = 'file.xlsx'
|
||||||
|
sheet_name = 'ps (32)'
|
||||||
|
|
||||||
|
df = pd.read_excel(file_path, sheet_name=sheet_name)
|
||||||
|
filtered_df = df[df['Examenvorm'] == 'Schriftelijk' ]
|
||||||
|
filtered_df = filtered_df[filtered_df['Aant. inschr.'] > 65]
|
||||||
|
filtered_df = filtered_df[['Datum S+','Beginuur S+','Einduur S+', 'Studiegidsnr.', 'Omschrijving', 'Docenten', 'Aant. inschr.']]
|
||||||
|
|
||||||
|
|
||||||
|
#formatting the timestrings
|
||||||
|
filtered_df['Beginuur S+'] = filtered_df['Beginuur S+'].apply(lambda x: x.strftime('%H:%M'))
|
||||||
|
filtered_df['Einduur S+'] = filtered_df['Einduur S+'].apply(lambda x: x.strftime('%H:%M'))
|
||||||
|
filtered_df['Docenten'] = filtered_df['Docenten'].str.replace(r'\b(Titularis|Co-Titularis|Medewerker)\b', '',
|
||||||
|
regex=True).str.strip()
|
||||||
|
|
||||||
|
filtered_df.to_excel('filtered_grote_lokalen.xlsx', index=False)
|
7
examen grote lokalen/ps (32)_files/filelist.xml
Normal file
7
examen grote lokalen/ps (32)_files/filelist.xml
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
<xml xmlns:o="urn:schemas-microsoft-com:office:office">
|
||||||
|
<o:MainFile HRef="../ps%20(32).xls"/>
|
||||||
|
<o:File HRef="stylesheet.css"/>
|
||||||
|
<o:File HRef="tabstrip.htm"/>
|
||||||
|
<o:File HRef="sheet001.htm"/>
|
||||||
|
<o:File HRef="filelist.xml"/>
|
||||||
|
</xml>
|
1396
examen grote lokalen/ps (32)_files/sheet001.htm
Normal file
1396
examen grote lokalen/ps (32)_files/sheet001.htm
Normal file
File diff suppressed because it is too large
Load Diff
175
examen grote lokalen/ps (32)_files/stylesheet.css
Normal file
175
examen grote lokalen/ps (32)_files/stylesheet.css
Normal file
|
@ -0,0 +1,175 @@
|
||||||
|
tr
|
||||||
|
{mso-height-source:auto;}
|
||||||
|
col
|
||||||
|
{mso-width-source:auto;}
|
||||||
|
br
|
||||||
|
{mso-data-placement:same-cell;}
|
||||||
|
.style0
|
||||||
|
{mso-number-format:General;
|
||||||
|
text-align:general;
|
||||||
|
vertical-align:bottom;
|
||||||
|
white-space:nowrap;
|
||||||
|
mso-rotate:0;
|
||||||
|
mso-background-source:auto;
|
||||||
|
mso-pattern:auto;
|
||||||
|
color:black;
|
||||||
|
font-size:11.0pt;
|
||||||
|
font-weight:400;
|
||||||
|
font-style:normal;
|
||||||
|
text-decoration:none;
|
||||||
|
font-family:Calibri, sans-serif;
|
||||||
|
mso-font-charset:0;
|
||||||
|
border:none;
|
||||||
|
mso-protection:locked visible;
|
||||||
|
mso-style-name:Normal;
|
||||||
|
mso-style-id:0;}
|
||||||
|
td
|
||||||
|
{mso-style-parent:style0;
|
||||||
|
padding-top:1px;
|
||||||
|
padding-right:1px;
|
||||||
|
padding-left:1px;
|
||||||
|
mso-ignore:padding;
|
||||||
|
color:black;
|
||||||
|
font-size:11.0pt;
|
||||||
|
font-weight:400;
|
||||||
|
font-style:normal;
|
||||||
|
text-decoration:none;
|
||||||
|
font-family:Calibri, sans-serif;
|
||||||
|
mso-font-charset:0;
|
||||||
|
mso-number-format:General;
|
||||||
|
text-align:general;
|
||||||
|
vertical-align:bottom;
|
||||||
|
border:none;
|
||||||
|
mso-background-source:auto;
|
||||||
|
mso-pattern:auto;
|
||||||
|
mso-protection:locked visible;
|
||||||
|
white-space:nowrap;
|
||||||
|
mso-rotate:0;}
|
||||||
|
.xl65
|
||||||
|
{mso-style-parent:style0;
|
||||||
|
font-size:10.0pt;
|
||||||
|
font-weight:700;
|
||||||
|
text-align:center;
|
||||||
|
vertical-align:middle;
|
||||||
|
border:.5pt solid black;
|
||||||
|
white-space:normal;}
|
||||||
|
.xl66
|
||||||
|
{mso-style-parent:style0;
|
||||||
|
border:.5pt solid black;
|
||||||
|
white-space:normal;}
|
||||||
|
.xl67
|
||||||
|
{mso-style-parent:style0;
|
||||||
|
font-size:10.0pt;
|
||||||
|
border:.5pt solid black;
|
||||||
|
white-space:normal;}
|
||||||
|
.xl68
|
||||||
|
{mso-style-parent:style0;
|
||||||
|
font-size:10.0pt;
|
||||||
|
mso-number-format:"Short Date";
|
||||||
|
border:.5pt solid black;
|
||||||
|
white-space:normal;}
|
||||||
|
.xl69
|
||||||
|
{mso-style-parent:style0;
|
||||||
|
font-size:10.0pt;
|
||||||
|
mso-number-format:"Short Time";
|
||||||
|
border:.5pt solid black;
|
||||||
|
white-space:normal;}
|
||||||
|
.xl70
|
||||||
|
{mso-style-parent:style0;
|
||||||
|
font-size:10.0pt;
|
||||||
|
border-top:.5pt solid black;
|
||||||
|
border-right:.5pt solid black;
|
||||||
|
border-bottom:none;
|
||||||
|
border-left:.5pt solid black;
|
||||||
|
white-space:normal;}
|
||||||
|
.xl71
|
||||||
|
{mso-style-parent:style0;
|
||||||
|
font-size:10.0pt;
|
||||||
|
border-top:none;
|
||||||
|
border-right:.5pt solid black;
|
||||||
|
border-bottom:none;
|
||||||
|
border-left:.5pt solid black;
|
||||||
|
white-space:normal;}
|
||||||
|
.xl72
|
||||||
|
{mso-style-parent:style0;
|
||||||
|
font-size:10.0pt;
|
||||||
|
border-top:none;
|
||||||
|
border-right:.5pt solid black;
|
||||||
|
border-bottom:.5pt solid black;
|
||||||
|
border-left:.5pt solid black;
|
||||||
|
white-space:normal;}
|
||||||
|
.xl73
|
||||||
|
{mso-style-parent:style0;
|
||||||
|
border-top:.5pt solid black;
|
||||||
|
border-right:.5pt solid black;
|
||||||
|
border-bottom:none;
|
||||||
|
border-left:.5pt solid black;
|
||||||
|
white-space:normal;}
|
||||||
|
.xl74
|
||||||
|
{mso-style-parent:style0;
|
||||||
|
border-top:none;
|
||||||
|
border-right:.5pt solid black;
|
||||||
|
border-bottom:none;
|
||||||
|
border-left:.5pt solid black;
|
||||||
|
white-space:normal;}
|
||||||
|
.xl75
|
||||||
|
{mso-style-parent:style0;
|
||||||
|
border-top:none;
|
||||||
|
border-right:.5pt solid black;
|
||||||
|
border-bottom:.5pt solid black;
|
||||||
|
border-left:.5pt solid black;
|
||||||
|
white-space:normal;}
|
||||||
|
.xl76
|
||||||
|
{mso-style-parent:style0;
|
||||||
|
font-size:10.0pt;
|
||||||
|
mso-number-format:"Short Date";
|
||||||
|
border-top:.5pt solid black;
|
||||||
|
border-right:.5pt solid black;
|
||||||
|
border-bottom:none;
|
||||||
|
border-left:.5pt solid black;
|
||||||
|
white-space:normal;}
|
||||||
|
.xl77
|
||||||
|
{mso-style-parent:style0;
|
||||||
|
font-size:10.0pt;
|
||||||
|
mso-number-format:"Short Date";
|
||||||
|
border-top:none;
|
||||||
|
border-right:.5pt solid black;
|
||||||
|
border-bottom:none;
|
||||||
|
border-left:.5pt solid black;
|
||||||
|
white-space:normal;}
|
||||||
|
.xl78
|
||||||
|
{mso-style-parent:style0;
|
||||||
|
font-size:10.0pt;
|
||||||
|
mso-number-format:"Short Date";
|
||||||
|
border-top:none;
|
||||||
|
border-right:.5pt solid black;
|
||||||
|
border-bottom:.5pt solid black;
|
||||||
|
border-left:.5pt solid black;
|
||||||
|
white-space:normal;}
|
||||||
|
.xl79
|
||||||
|
{mso-style-parent:style0;
|
||||||
|
font-size:10.0pt;
|
||||||
|
mso-number-format:"Short Time";
|
||||||
|
border-top:.5pt solid black;
|
||||||
|
border-right:.5pt solid black;
|
||||||
|
border-bottom:none;
|
||||||
|
border-left:.5pt solid black;
|
||||||
|
white-space:normal;}
|
||||||
|
.xl80
|
||||||
|
{mso-style-parent:style0;
|
||||||
|
font-size:10.0pt;
|
||||||
|
mso-number-format:"Short Time";
|
||||||
|
border-top:none;
|
||||||
|
border-right:.5pt solid black;
|
||||||
|
border-bottom:none;
|
||||||
|
border-left:.5pt solid black;
|
||||||
|
white-space:normal;}
|
||||||
|
.xl81
|
||||||
|
{mso-style-parent:style0;
|
||||||
|
font-size:10.0pt;
|
||||||
|
mso-number-format:"Short Time";
|
||||||
|
border-top:none;
|
||||||
|
border-right:.5pt solid black;
|
||||||
|
border-bottom:.5pt solid black;
|
||||||
|
border-left:.5pt solid black;
|
||||||
|
white-space:normal;}
|
32
examen grote lokalen/ps (32)_files/tabstrip.htm
Normal file
32
examen grote lokalen/ps (32)_files/tabstrip.htm
Normal file
|
@ -0,0 +1,32 @@
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<meta http-equiv=Content-Type content="text/html; charset=utf-8">
|
||||||
|
<meta name=ProgId content=Excel.Sheet>
|
||||||
|
<meta name=Generator content="Microsoft Excel 15">
|
||||||
|
<link id=Main-File rel=Main-File href="../ps%20(32).xls">
|
||||||
|
|
||||||
|
<script language="JavaScript">
|
||||||
|
<!--
|
||||||
|
if (window.name!="frTabs")
|
||||||
|
window.location.replace(document.all.item("Main-File").href);
|
||||||
|
//-->
|
||||||
|
</script>
|
||||||
|
<style>
|
||||||
|
<!--
|
||||||
|
A {
|
||||||
|
text-decoration:none;
|
||||||
|
color:#000000;
|
||||||
|
font-size:9pt;
|
||||||
|
}
|
||||||
|
-->
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body topmargin=0 leftmargin=0 bgcolor="#808080">
|
||||||
|
<table border=0 cellspacing=1>
|
||||||
|
<tr>
|
||||||
|
<td bgcolor="#FFFFFF" nowrap><b><small><small> <a href="sheet001.htm" target="frSheet"><font face="Arial" color="#000000">ps (32)</font></a> </small></small></b></td>
|
||||||
|
|
||||||
|
</tr>
|
||||||
|
</table>
|
||||||
|
</body>
|
||||||
|
</html>
|
78
examengegevens template generator/script.py
Normal file
78
examengegevens template generator/script.py
Normal file
|
@ -0,0 +1,78 @@
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
def read_excel_file(file_path):
|
||||||
|
"""Read the Excel file and return a DataFrame."""
|
||||||
|
try:
|
||||||
|
return pd.read_excel(file_path)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error reading the Excel file: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def filter_studiegidsnummer(df):
|
||||||
|
"""Filter rows where 'studiegidsnummer' contains 'GES'."""
|
||||||
|
if 'Studiegidsnummer' not in df.columns:
|
||||||
|
print("Column 'studiegidsnummer' not found in the DataFrame.")
|
||||||
|
print("Available columns:", df.columns)
|
||||||
|
return pd.DataFrame() # Return an empty DataFrame
|
||||||
|
return df[df['Studiegidsnummer'].str.contains('GES', na=False)].copy()
|
||||||
|
|
||||||
|
def filter_opmerkingen(df):
|
||||||
|
"""Filter rows where 'Opmerkingen' does NOT contain '24-25'."""
|
||||||
|
if 'Opmerkingen' not in df.columns:
|
||||||
|
print("Column 'Opmerkingen' not found in the DataFrame.")
|
||||||
|
print("Available columns:", df.columns)
|
||||||
|
return pd.DataFrame() # Return an empty DataFrame
|
||||||
|
return df[~df['Opmerkingen'].str.contains('24-25', na=False)].copy()
|
||||||
|
|
||||||
|
def create_message_column(df):
|
||||||
|
"""Create 'Message' and 'subject' columns with the specified format."""
|
||||||
|
df.loc[:, 'Message'] = df.apply(lambda row: (
|
||||||
|
f"Beste docent,\n\n"
|
||||||
|
f"Ik ben de examengegevens aan het controleren van {row['Omschrijving']} {row['Studiegidsnummer']}. De huidige gegevens zijn als volgt:\n\n"
|
||||||
|
f"{row['Examenvorm']} examen voor zowel eerste als tweede zit, {row['Examenduur']} minuten, tussen {row['Beginuur voormiddag']} en {row['Einduur voormiddag']} of {row['Beginuur namiddag']} en {row['Einduur namiddag']}.\n\n"
|
||||||
|
f"Gelden dezelfde gegevens voor dit academiejaar of moeten er nog wijzigingen doorgevoerd worden? Alvast dank voor je reactie!"
|
||||||
|
), axis=1)
|
||||||
|
df.loc[:, 'subject'] = df.apply(lambda row: (
|
||||||
|
f"Examengegevens {row['Omschrijving']} {row['Studiegidsnummer']}"
|
||||||
|
), axis=1)
|
||||||
|
return df
|
||||||
|
|
||||||
|
def save_to_excel(df, output_file_path):
|
||||||
|
"""Save the DataFrame to a new Excel file."""
|
||||||
|
try:
|
||||||
|
df.to_excel(output_file_path, index=False)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error saving the Excel file: {e}")
|
||||||
|
|
||||||
|
def convert_time_format(time_str):
|
||||||
|
"""Convert time from 'HH:MM:SS' to 'HH:MM'."""
|
||||||
|
try:
|
||||||
|
return pd.to_datetime(time_str).strftime('%H:%M')
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error converting time format: {e}")
|
||||||
|
return time_str
|
||||||
|
|
||||||
|
def apply_time_format_conversion(df, columns):
|
||||||
|
"""Apply time format conversion to specified columns in the DataFrame."""
|
||||||
|
for column in columns:
|
||||||
|
df[column] = pd.to_datetime(df[column], format='%H:%M:%S', errors='coerce').dt.strftime('%H:%M')
|
||||||
|
return df
|
||||||
|
|
||||||
|
# Example usage within the main function
|
||||||
|
def main():
|
||||||
|
file_path = 'examengegevens2425.xlsx'
|
||||||
|
output_file_path = 'filtered_examengegevens2425.xlsx'
|
||||||
|
|
||||||
|
df = read_excel_file(file_path)
|
||||||
|
if df is not None:
|
||||||
|
filtered_df = filter_studiegidsnummer(df)
|
||||||
|
if not filtered_df.empty:
|
||||||
|
final_filtered_df = filter_opmerkingen(filtered_df)
|
||||||
|
# Convert time format for specified columns
|
||||||
|
time_columns = ['Beginuur voormiddag', 'Einduur voormiddag', 'Beginuur namiddag', 'Einduur namiddag']
|
||||||
|
final_filtered_df = apply_time_format_conversion(final_filtered_df, time_columns)
|
||||||
|
final_filtered_df = create_message_column(final_filtered_df)
|
||||||
|
save_to_excel(final_filtered_df, output_file_path)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
131
random/script 2.py
Normal file
131
random/script 2.py
Normal file
|
@ -0,0 +1,131 @@
|
||||||
|
import pandas as pd
|
||||||
|
from openpyxl import load_workbook
|
||||||
|
from dateutil import parser
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def list_sheets(file):
|
||||||
|
try:
|
||||||
|
workbook = load_workbook(filename=file, read_only=True)
|
||||||
|
sheets = workbook.sheetnames
|
||||||
|
return sheets
|
||||||
|
except Exception as e:
|
||||||
|
print(e)
|
||||||
|
raise ValueError(f"Could not open the file '{file}'. Please check the file and try again.")
|
||||||
|
|
||||||
|
|
||||||
|
def dutch_date_parser(date_str):
|
||||||
|
# Remove Dutch day names
|
||||||
|
day_name_pattern = r'\b(maandag|dinsdag|woensdag|donderdag|vrijdag|zaterdag|zondag)\b'
|
||||||
|
date_str = re.sub(day_name_pattern, '', date_str, flags=re.IGNORECASE).strip()
|
||||||
|
|
||||||
|
# Translate Dutch month names to English
|
||||||
|
month_translation = {
|
||||||
|
'januari': 'January', 'februari': 'February', 'maart': 'March',
|
||||||
|
'april': 'April', 'mei': 'May', 'juni': 'June', 'juli': 'July',
|
||||||
|
'augustus': 'August', 'september': 'September', 'oktober': 'October',
|
||||||
|
'november': 'November', 'december': 'December'
|
||||||
|
}
|
||||||
|
|
||||||
|
for dutch_month, english_month in month_translation.items():
|
||||||
|
date_str = re.sub(r'\b' + dutch_month + r'\b', english_month, date_str, flags=re.IGNORECASE)
|
||||||
|
|
||||||
|
# Try parsing the modified date string
|
||||||
|
try:
|
||||||
|
return parser.parse(date_str, dayfirst=True)
|
||||||
|
except ValueError:
|
||||||
|
return pd.NaT
|
||||||
|
|
||||||
|
def compare_roosters(base_file, comparison_file, output_file):
|
||||||
|
# Print the sheets available in both Excel files
|
||||||
|
base_sheets = list_sheets(base_file)
|
||||||
|
comparison_sheets = list_sheets(comparison_file)
|
||||||
|
print(f"Sheets in '{base_file}': {base_sheets}")
|
||||||
|
print(f"Sheets in '{comparison_file}': {comparison_sheets}")
|
||||||
|
|
||||||
|
# Function to load an Excel file with error handling
|
||||||
|
def load_excel(file):
|
||||||
|
try:
|
||||||
|
df = pd.read_excel(file, engine='openpyxl')
|
||||||
|
if df.empty:
|
||||||
|
raise ValueError(f"The file '{file}' has no sheets or is empty.")
|
||||||
|
return df
|
||||||
|
except Exception as e:
|
||||||
|
print(e)
|
||||||
|
raise ValueError(f"Could not load the file '{file}'. Please check the file and try again.")
|
||||||
|
|
||||||
|
# Load the Excel files
|
||||||
|
base_df = load_excel(base_file)
|
||||||
|
comparison_df = load_excel(comparison_file)
|
||||||
|
|
||||||
|
# Ensure the columns we need are present in both files
|
||||||
|
required_columns = ['Code examenrooster', 'Beginuur S+', 'Datum S+', 'Einduur S+']
|
||||||
|
for column in required_columns:
|
||||||
|
if column not in base_df.columns or column not in comparison_df.columns:
|
||||||
|
raise ValueError(f"Column '{column}' is missing from one of the files")
|
||||||
|
|
||||||
|
# Convert 'Datum S+' in comparison_df to the universal format
|
||||||
|
comparison_df['Datum S+'] = comparison_df['Datum S+'].apply(
|
||||||
|
lambda x: dutch_date_parser(x) if isinstance(x, str) else x
|
||||||
|
)
|
||||||
|
|
||||||
|
# Merge the dataframes on 'Code examenrooster' to compare the rows with matching codes
|
||||||
|
merged_df = base_df.merge(
|
||||||
|
comparison_df,
|
||||||
|
on='Code examenrooster',
|
||||||
|
suffixes=('_base', '_comp'),
|
||||||
|
how='outer', # Outer join to capture all differences
|
||||||
|
indicator=True # Indicator to show if the row was in one or both files
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create an empty list to store rows with differences
|
||||||
|
differences = []
|
||||||
|
|
||||||
|
# Iterate over each row to find discrepancies
|
||||||
|
for _, row in merged_df.iterrows():
|
||||||
|
row_data = {}
|
||||||
|
# Only compare rows that exist in both files
|
||||||
|
if row['_merge'] == 'both':
|
||||||
|
differences_in_row = []
|
||||||
|
|
||||||
|
# Compare the columns
|
||||||
|
for column in ['Beginuur S+', 'Datum S+', 'Einduur S+']:
|
||||||
|
base_value = row.get(f"{column}_base", pd.NA)
|
||||||
|
comp_value = row.get(f"{column}_comp", pd.NA)
|
||||||
|
|
||||||
|
if pd.isna(base_value) and pd.isna(comp_value):
|
||||||
|
continue # Skip comparison if both are NaN
|
||||||
|
elif base_value != comp_value:
|
||||||
|
differences_in_row.append(f"{column} differs (Base: {base_value}, Comp: {comp_value})")
|
||||||
|
|
||||||
|
# If there are any differences in this row, add them to the differences list
|
||||||
|
if differences_in_row:
|
||||||
|
for col in required_columns:
|
||||||
|
row_data[col] = row.get(col, pd.NA)
|
||||||
|
row_data[f"{col}_comp"] = row.get(f"{col}_comp", pd.NA)
|
||||||
|
row_data['Difference'] = "; ".join(differences_in_row)
|
||||||
|
differences.append(row_data)
|
||||||
|
|
||||||
|
elif row['_merge'] == 'left_only':
|
||||||
|
differences.append({
|
||||||
|
'Code examenrooster': row['Code examenrooster'],
|
||||||
|
'Difference': "Row missing in comparison file"
|
||||||
|
})
|
||||||
|
elif row['_merge'] == 'right_only':
|
||||||
|
differences.append({
|
||||||
|
'Code examenrooster': row['Code examenrooster'],
|
||||||
|
'Difference': "Row missing in base file"
|
||||||
|
})
|
||||||
|
|
||||||
|
# Create a DataFrame from the differences list
|
||||||
|
differences_df = pd.DataFrame(differences)
|
||||||
|
|
||||||
|
# Save the differences to an Excel file
|
||||||
|
differences_df.to_excel(output_file, index=False)
|
||||||
|
|
||||||
|
print(f"Differences saved to {output_file}")
|
||||||
|
|
||||||
|
|
||||||
|
# Example usage:
|
||||||
|
compare_roosters('afgewerkte.xlsx', 'bages rooster voor s.xlsx', 'differences_output.xlsx')
|
66
random/script.py
Normal file
66
random/script.py
Normal file
|
@ -0,0 +1,66 @@
|
||||||
|
import pandas as pd
|
||||||
|
from datetime import datetime
|
||||||
|
import locale
|
||||||
|
|
||||||
|
file_path = 'bages rooster voor s.xlsx'
|
||||||
|
sheet_name = 'rooster'
|
||||||
|
|
||||||
|
df = pd.read_excel(file_path, sheet_name=sheet_name)
|
||||||
|
|
||||||
|
date_ranges = {
|
||||||
|
(pd.Timestamp('2025-01-06'), pd.Timestamp('2025-01-12')): 16,
|
||||||
|
(pd.Timestamp('2025-01-13'), pd.Timestamp('2025-01-19')): 17,
|
||||||
|
(pd.Timestamp('2025-01-20'), pd.Timestamp('2025-01-26')): 18,
|
||||||
|
(pd.Timestamp('2025-01-27'), pd.Timestamp('2025-02-02')): 19,
|
||||||
|
|
||||||
|
# add more ranges as needed
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# Custom date parser function
|
||||||
|
def parse_custom_date(date_str):
|
||||||
|
if pd.isna(date_str):
|
||||||
|
return pd.NaT # Return pandas NaT for missing dates
|
||||||
|
if isinstance(date_str, str):
|
||||||
|
try:
|
||||||
|
# Set locale to Dutch
|
||||||
|
locale.setlocale(locale.LC_TIME, 'nl_NL.UTF-8')
|
||||||
|
return datetime.strptime(date_str, '%A %d %B %Y')
|
||||||
|
except ValueError as e:
|
||||||
|
raise ValueError(f"Date conversion error: {e} for date string: {date_str}")
|
||||||
|
finally:
|
||||||
|
# Reset locale to the default setting
|
||||||
|
locale.setlocale(locale.LC_TIME, 'C')
|
||||||
|
else:
|
||||||
|
raise TypeError(f"Expected string, got {type(date_str).__name__}: {date_str}")
|
||||||
|
|
||||||
|
|
||||||
|
# Ensure the column 'Datum S+' exists and is processed correctly
|
||||||
|
if 'Datum S+' in df.columns:
|
||||||
|
try:
|
||||||
|
# Convert 'Datum S+' column to datetime using the custom parser
|
||||||
|
df['Datum S+'] = df['Datum S+'].apply(parse_custom_date)
|
||||||
|
except (ValueError, TypeError) as e:
|
||||||
|
print(f"Error: {e}")
|
||||||
|
# Optionally, re-raise the exception if you want to stop execution
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
# Function to update Lesweek based on date ranges
|
||||||
|
def update_lesweek(date):
|
||||||
|
if pd.isna(date): # Handle NaT values
|
||||||
|
return 0
|
||||||
|
for date_range, lesweek_value in date_ranges.items():
|
||||||
|
if date_range[0] <= date <= date_range[1]:
|
||||||
|
return lesweek_value
|
||||||
|
return 0 # Default value if date doesn't fall in any range
|
||||||
|
|
||||||
|
|
||||||
|
# Apply the function to 'Datum S+' column
|
||||||
|
df['Lesweek'] = df['Datum S+'].apply(update_lesweek)
|
||||||
|
|
||||||
|
# Check the results
|
||||||
|
print("\nFirst few rows of the DataFrame to verify date formatting:\n", df.head())
|
||||||
|
|
||||||
|
# If needed, you can save the DataFrame to a new Excel file to verify changes
|
||||||
|
df.to_excel('updated_rooster.xlsx', index=False)
|
63
webcrawler studieprogramma's/login.py
Normal file
63
webcrawler studieprogramma's/login.py
Normal file
|
@ -0,0 +1,63 @@
|
||||||
|
import asyncio
|
||||||
|
from pyppeteer import launch
|
||||||
|
import logging
|
||||||
|
|
||||||
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
|
||||||
|
|
||||||
|
async def crawl(url):
|
||||||
|
try:
|
||||||
|
# Launch a new Chromium browser with a visible window
|
||||||
|
print('browser launching')
|
||||||
|
browser = await launch(headless=False)
|
||||||
|
# Open a new page
|
||||||
|
page = await browser.newPage()
|
||||||
|
print('browser opened')
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Navigate to the specified URL
|
||||||
|
await page.goto(url)
|
||||||
|
logging.info(f"Accessed {url}")
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"Failed to navigate to {url}: {e}")
|
||||||
|
await browser.close()
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Wait for the page to fully load
|
||||||
|
await page.waitForSelector('body')
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"Failed to load the page properly: {e}")
|
||||||
|
await browser.close()
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Extract the content of the page
|
||||||
|
content = await page.content()
|
||||||
|
# (Optional) Extract and print all links as an example
|
||||||
|
links = await page.evaluate('''() => {
|
||||||
|
return Array.from(document.querySelectorAll('a')).map(link => ({
|
||||||
|
text: link.innerText,
|
||||||
|
url: link.href
|
||||||
|
}));
|
||||||
|
}''')
|
||||||
|
|
||||||
|
for link in links:
|
||||||
|
print(f"Link text: {link['text']}, URL: {link['url']}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"Error extracting or processing the content: {e}")
|
||||||
|
|
||||||
|
finally:
|
||||||
|
# Ensure the browser closes after execution
|
||||||
|
await browser.close()
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logging.critical(f"Critical error occurred: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
# Specify the URL of the web page you want to crawl
|
||||||
|
url = 'https://www.google.com/'
|
||||||
|
|
||||||
|
# Run the crawl function
|
||||||
|
asyncio.get_event_loop().run_until_complete(crawl(url))
|
0
webcrawler studieprogramma's/main.py
Normal file
0
webcrawler studieprogramma's/main.py
Normal file
Loading…
Reference in New Issue
Block a user