first commit
This commit is contained in:
parent
d51c60d76d
commit
e3e65a9c51
11
.gitignore
vendored
Normal file
11
.gitignore
vendored
Normal file
|
@ -0,0 +1,11 @@
|
|||
# Ignore .idea directories
|
||||
*.idea/
|
||||
|
||||
# Ignore compiled Python files
|
||||
*.pyc
|
||||
__pycache__/
|
||||
|
||||
# Ignore Excel files
|
||||
*.xlsx
|
||||
|
||||
sisa_crawl/
|
20
examen dubbels/script.py
Normal file
20
examen dubbels/script.py
Normal file
|
@ -0,0 +1,20 @@
|
|||
import pandas as pd
|
||||
|
||||
#variables
|
||||
file_path = 'ps (30).xlsx'
|
||||
sheet_name = 'ps (30)'
|
||||
column_name = 'Student-ID'
|
||||
|
||||
df = pd.read_excel(file_path, sheet_name=sheet_name)
|
||||
|
||||
duplicate_ids= df[df.duplicated(subset=[column_name], keep=False)][column_name]
|
||||
|
||||
unique_duplicate_ids = duplicate_ids.drop_duplicates()
|
||||
|
||||
num_duplicates = len(unique_duplicate_ids)
|
||||
|
||||
if not unique_duplicate_ids.empty:
|
||||
print(f"Duplicated Student-ID values (count: {num_duplicates}) :")
|
||||
print(unique_duplicate_ids)
|
||||
else:
|
||||
print("No duplicates found.")
|
18
examen grote lokalen/main.py
Normal file
18
examen grote lokalen/main.py
Normal file
|
@ -0,0 +1,18 @@
|
|||
import pandas as pd
|
||||
|
||||
file_path = 'file.xlsx'
|
||||
sheet_name = 'ps (32)'
|
||||
|
||||
df = pd.read_excel(file_path, sheet_name=sheet_name)
|
||||
filtered_df = df[df['Examenvorm'] == 'Schriftelijk' ]
|
||||
filtered_df = filtered_df[filtered_df['Aant. inschr.'] > 65]
|
||||
filtered_df = filtered_df[['Datum S+','Beginuur S+','Einduur S+', 'Studiegidsnr.', 'Omschrijving', 'Docenten', 'Aant. inschr.']]
|
||||
|
||||
|
||||
#formatting the timestrings
|
||||
filtered_df['Beginuur S+'] = filtered_df['Beginuur S+'].apply(lambda x: x.strftime('%H:%M'))
|
||||
filtered_df['Einduur S+'] = filtered_df['Einduur S+'].apply(lambda x: x.strftime('%H:%M'))
|
||||
filtered_df['Docenten'] = filtered_df['Docenten'].str.replace(r'\b(Titularis|Co-Titularis|Medewerker)\b', '',
|
||||
regex=True).str.strip()
|
||||
|
||||
filtered_df.to_excel('filtered_grote_lokalen.xlsx', index=False)
|
7
examen grote lokalen/ps (32)_files/filelist.xml
Normal file
7
examen grote lokalen/ps (32)_files/filelist.xml
Normal file
|
@ -0,0 +1,7 @@
|
|||
<xml xmlns:o="urn:schemas-microsoft-com:office:office">
|
||||
<o:MainFile HRef="../ps%20(32).xls"/>
|
||||
<o:File HRef="stylesheet.css"/>
|
||||
<o:File HRef="tabstrip.htm"/>
|
||||
<o:File HRef="sheet001.htm"/>
|
||||
<o:File HRef="filelist.xml"/>
|
||||
</xml>
|
1396
examen grote lokalen/ps (32)_files/sheet001.htm
Normal file
1396
examen grote lokalen/ps (32)_files/sheet001.htm
Normal file
File diff suppressed because it is too large
Load Diff
175
examen grote lokalen/ps (32)_files/stylesheet.css
Normal file
175
examen grote lokalen/ps (32)_files/stylesheet.css
Normal file
|
@ -0,0 +1,175 @@
|
|||
tr
|
||||
{mso-height-source:auto;}
|
||||
col
|
||||
{mso-width-source:auto;}
|
||||
br
|
||||
{mso-data-placement:same-cell;}
|
||||
.style0
|
||||
{mso-number-format:General;
|
||||
text-align:general;
|
||||
vertical-align:bottom;
|
||||
white-space:nowrap;
|
||||
mso-rotate:0;
|
||||
mso-background-source:auto;
|
||||
mso-pattern:auto;
|
||||
color:black;
|
||||
font-size:11.0pt;
|
||||
font-weight:400;
|
||||
font-style:normal;
|
||||
text-decoration:none;
|
||||
font-family:Calibri, sans-serif;
|
||||
mso-font-charset:0;
|
||||
border:none;
|
||||
mso-protection:locked visible;
|
||||
mso-style-name:Normal;
|
||||
mso-style-id:0;}
|
||||
td
|
||||
{mso-style-parent:style0;
|
||||
padding-top:1px;
|
||||
padding-right:1px;
|
||||
padding-left:1px;
|
||||
mso-ignore:padding;
|
||||
color:black;
|
||||
font-size:11.0pt;
|
||||
font-weight:400;
|
||||
font-style:normal;
|
||||
text-decoration:none;
|
||||
font-family:Calibri, sans-serif;
|
||||
mso-font-charset:0;
|
||||
mso-number-format:General;
|
||||
text-align:general;
|
||||
vertical-align:bottom;
|
||||
border:none;
|
||||
mso-background-source:auto;
|
||||
mso-pattern:auto;
|
||||
mso-protection:locked visible;
|
||||
white-space:nowrap;
|
||||
mso-rotate:0;}
|
||||
.xl65
|
||||
{mso-style-parent:style0;
|
||||
font-size:10.0pt;
|
||||
font-weight:700;
|
||||
text-align:center;
|
||||
vertical-align:middle;
|
||||
border:.5pt solid black;
|
||||
white-space:normal;}
|
||||
.xl66
|
||||
{mso-style-parent:style0;
|
||||
border:.5pt solid black;
|
||||
white-space:normal;}
|
||||
.xl67
|
||||
{mso-style-parent:style0;
|
||||
font-size:10.0pt;
|
||||
border:.5pt solid black;
|
||||
white-space:normal;}
|
||||
.xl68
|
||||
{mso-style-parent:style0;
|
||||
font-size:10.0pt;
|
||||
mso-number-format:"Short Date";
|
||||
border:.5pt solid black;
|
||||
white-space:normal;}
|
||||
.xl69
|
||||
{mso-style-parent:style0;
|
||||
font-size:10.0pt;
|
||||
mso-number-format:"Short Time";
|
||||
border:.5pt solid black;
|
||||
white-space:normal;}
|
||||
.xl70
|
||||
{mso-style-parent:style0;
|
||||
font-size:10.0pt;
|
||||
border-top:.5pt solid black;
|
||||
border-right:.5pt solid black;
|
||||
border-bottom:none;
|
||||
border-left:.5pt solid black;
|
||||
white-space:normal;}
|
||||
.xl71
|
||||
{mso-style-parent:style0;
|
||||
font-size:10.0pt;
|
||||
border-top:none;
|
||||
border-right:.5pt solid black;
|
||||
border-bottom:none;
|
||||
border-left:.5pt solid black;
|
||||
white-space:normal;}
|
||||
.xl72
|
||||
{mso-style-parent:style0;
|
||||
font-size:10.0pt;
|
||||
border-top:none;
|
||||
border-right:.5pt solid black;
|
||||
border-bottom:.5pt solid black;
|
||||
border-left:.5pt solid black;
|
||||
white-space:normal;}
|
||||
.xl73
|
||||
{mso-style-parent:style0;
|
||||
border-top:.5pt solid black;
|
||||
border-right:.5pt solid black;
|
||||
border-bottom:none;
|
||||
border-left:.5pt solid black;
|
||||
white-space:normal;}
|
||||
.xl74
|
||||
{mso-style-parent:style0;
|
||||
border-top:none;
|
||||
border-right:.5pt solid black;
|
||||
border-bottom:none;
|
||||
border-left:.5pt solid black;
|
||||
white-space:normal;}
|
||||
.xl75
|
||||
{mso-style-parent:style0;
|
||||
border-top:none;
|
||||
border-right:.5pt solid black;
|
||||
border-bottom:.5pt solid black;
|
||||
border-left:.5pt solid black;
|
||||
white-space:normal;}
|
||||
.xl76
|
||||
{mso-style-parent:style0;
|
||||
font-size:10.0pt;
|
||||
mso-number-format:"Short Date";
|
||||
border-top:.5pt solid black;
|
||||
border-right:.5pt solid black;
|
||||
border-bottom:none;
|
||||
border-left:.5pt solid black;
|
||||
white-space:normal;}
|
||||
.xl77
|
||||
{mso-style-parent:style0;
|
||||
font-size:10.0pt;
|
||||
mso-number-format:"Short Date";
|
||||
border-top:none;
|
||||
border-right:.5pt solid black;
|
||||
border-bottom:none;
|
||||
border-left:.5pt solid black;
|
||||
white-space:normal;}
|
||||
.xl78
|
||||
{mso-style-parent:style0;
|
||||
font-size:10.0pt;
|
||||
mso-number-format:"Short Date";
|
||||
border-top:none;
|
||||
border-right:.5pt solid black;
|
||||
border-bottom:.5pt solid black;
|
||||
border-left:.5pt solid black;
|
||||
white-space:normal;}
|
||||
.xl79
|
||||
{mso-style-parent:style0;
|
||||
font-size:10.0pt;
|
||||
mso-number-format:"Short Time";
|
||||
border-top:.5pt solid black;
|
||||
border-right:.5pt solid black;
|
||||
border-bottom:none;
|
||||
border-left:.5pt solid black;
|
||||
white-space:normal;}
|
||||
.xl80
|
||||
{mso-style-parent:style0;
|
||||
font-size:10.0pt;
|
||||
mso-number-format:"Short Time";
|
||||
border-top:none;
|
||||
border-right:.5pt solid black;
|
||||
border-bottom:none;
|
||||
border-left:.5pt solid black;
|
||||
white-space:normal;}
|
||||
.xl81
|
||||
{mso-style-parent:style0;
|
||||
font-size:10.0pt;
|
||||
mso-number-format:"Short Time";
|
||||
border-top:none;
|
||||
border-right:.5pt solid black;
|
||||
border-bottom:.5pt solid black;
|
||||
border-left:.5pt solid black;
|
||||
white-space:normal;}
|
32
examen grote lokalen/ps (32)_files/tabstrip.htm
Normal file
32
examen grote lokalen/ps (32)_files/tabstrip.htm
Normal file
|
@ -0,0 +1,32 @@
|
|||
<html>
|
||||
<head>
|
||||
<meta http-equiv=Content-Type content="text/html; charset=utf-8">
|
||||
<meta name=ProgId content=Excel.Sheet>
|
||||
<meta name=Generator content="Microsoft Excel 15">
|
||||
<link id=Main-File rel=Main-File href="../ps%20(32).xls">
|
||||
|
||||
<script language="JavaScript">
|
||||
<!--
|
||||
if (window.name!="frTabs")
|
||||
window.location.replace(document.all.item("Main-File").href);
|
||||
//-->
|
||||
</script>
|
||||
<style>
|
||||
<!--
|
||||
A {
|
||||
text-decoration:none;
|
||||
color:#000000;
|
||||
font-size:9pt;
|
||||
}
|
||||
-->
|
||||
</style>
|
||||
</head>
|
||||
<body topmargin=0 leftmargin=0 bgcolor="#808080">
|
||||
<table border=0 cellspacing=1>
|
||||
<tr>
|
||||
<td bgcolor="#FFFFFF" nowrap><b><small><small> <a href="sheet001.htm" target="frSheet"><font face="Arial" color="#000000">ps (32)</font></a> </small></small></b></td>
|
||||
|
||||
</tr>
|
||||
</table>
|
||||
</body>
|
||||
</html>
|
78
examengegevens template generator/script.py
Normal file
78
examengegevens template generator/script.py
Normal file
|
@ -0,0 +1,78 @@
|
|||
import pandas as pd
|
||||
|
||||
def read_excel_file(file_path):
|
||||
"""Read the Excel file and return a DataFrame."""
|
||||
try:
|
||||
return pd.read_excel(file_path)
|
||||
except Exception as e:
|
||||
print(f"Error reading the Excel file: {e}")
|
||||
return None
|
||||
|
||||
def filter_studiegidsnummer(df):
|
||||
"""Filter rows where 'studiegidsnummer' contains 'GES'."""
|
||||
if 'Studiegidsnummer' not in df.columns:
|
||||
print("Column 'studiegidsnummer' not found in the DataFrame.")
|
||||
print("Available columns:", df.columns)
|
||||
return pd.DataFrame() # Return an empty DataFrame
|
||||
return df[df['Studiegidsnummer'].str.contains('GES', na=False)].copy()
|
||||
|
||||
def filter_opmerkingen(df):
|
||||
"""Filter rows where 'Opmerkingen' does NOT contain '24-25'."""
|
||||
if 'Opmerkingen' not in df.columns:
|
||||
print("Column 'Opmerkingen' not found in the DataFrame.")
|
||||
print("Available columns:", df.columns)
|
||||
return pd.DataFrame() # Return an empty DataFrame
|
||||
return df[~df['Opmerkingen'].str.contains('24-25', na=False)].copy()
|
||||
|
||||
def create_message_column(df):
|
||||
"""Create 'Message' and 'subject' columns with the specified format."""
|
||||
df.loc[:, 'Message'] = df.apply(lambda row: (
|
||||
f"Beste docent,\n\n"
|
||||
f"Ik ben de examengegevens aan het controleren van {row['Omschrijving']} {row['Studiegidsnummer']}. De huidige gegevens zijn als volgt:\n\n"
|
||||
f"{row['Examenvorm']} examen voor zowel eerste als tweede zit, {row['Examenduur']} minuten, tussen {row['Beginuur voormiddag']} en {row['Einduur voormiddag']} of {row['Beginuur namiddag']} en {row['Einduur namiddag']}.\n\n"
|
||||
f"Gelden dezelfde gegevens voor dit academiejaar of moeten er nog wijzigingen doorgevoerd worden? Alvast dank voor je reactie!"
|
||||
), axis=1)
|
||||
df.loc[:, 'subject'] = df.apply(lambda row: (
|
||||
f"Examengegevens {row['Omschrijving']} {row['Studiegidsnummer']}"
|
||||
), axis=1)
|
||||
return df
|
||||
|
||||
def save_to_excel(df, output_file_path):
|
||||
"""Save the DataFrame to a new Excel file."""
|
||||
try:
|
||||
df.to_excel(output_file_path, index=False)
|
||||
except Exception as e:
|
||||
print(f"Error saving the Excel file: {e}")
|
||||
|
||||
def convert_time_format(time_str):
|
||||
"""Convert time from 'HH:MM:SS' to 'HH:MM'."""
|
||||
try:
|
||||
return pd.to_datetime(time_str).strftime('%H:%M')
|
||||
except Exception as e:
|
||||
print(f"Error converting time format: {e}")
|
||||
return time_str
|
||||
|
||||
def apply_time_format_conversion(df, columns):
|
||||
"""Apply time format conversion to specified columns in the DataFrame."""
|
||||
for column in columns:
|
||||
df[column] = pd.to_datetime(df[column], format='%H:%M:%S', errors='coerce').dt.strftime('%H:%M')
|
||||
return df
|
||||
|
||||
# Example usage within the main function
|
||||
def main():
|
||||
file_path = 'examengegevens2425.xlsx'
|
||||
output_file_path = 'filtered_examengegevens2425.xlsx'
|
||||
|
||||
df = read_excel_file(file_path)
|
||||
if df is not None:
|
||||
filtered_df = filter_studiegidsnummer(df)
|
||||
if not filtered_df.empty:
|
||||
final_filtered_df = filter_opmerkingen(filtered_df)
|
||||
# Convert time format for specified columns
|
||||
time_columns = ['Beginuur voormiddag', 'Einduur voormiddag', 'Beginuur namiddag', 'Einduur namiddag']
|
||||
final_filtered_df = apply_time_format_conversion(final_filtered_df, time_columns)
|
||||
final_filtered_df = create_message_column(final_filtered_df)
|
||||
save_to_excel(final_filtered_df, output_file_path)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
131
random/script 2.py
Normal file
131
random/script 2.py
Normal file
|
@ -0,0 +1,131 @@
|
|||
import pandas as pd
|
||||
from openpyxl import load_workbook
|
||||
from dateutil import parser
|
||||
import re
|
||||
|
||||
|
||||
|
||||
def list_sheets(file):
|
||||
try:
|
||||
workbook = load_workbook(filename=file, read_only=True)
|
||||
sheets = workbook.sheetnames
|
||||
return sheets
|
||||
except Exception as e:
|
||||
print(e)
|
||||
raise ValueError(f"Could not open the file '{file}'. Please check the file and try again.")
|
||||
|
||||
|
||||
def dutch_date_parser(date_str):
|
||||
# Remove Dutch day names
|
||||
day_name_pattern = r'\b(maandag|dinsdag|woensdag|donderdag|vrijdag|zaterdag|zondag)\b'
|
||||
date_str = re.sub(day_name_pattern, '', date_str, flags=re.IGNORECASE).strip()
|
||||
|
||||
# Translate Dutch month names to English
|
||||
month_translation = {
|
||||
'januari': 'January', 'februari': 'February', 'maart': 'March',
|
||||
'april': 'April', 'mei': 'May', 'juni': 'June', 'juli': 'July',
|
||||
'augustus': 'August', 'september': 'September', 'oktober': 'October',
|
||||
'november': 'November', 'december': 'December'
|
||||
}
|
||||
|
||||
for dutch_month, english_month in month_translation.items():
|
||||
date_str = re.sub(r'\b' + dutch_month + r'\b', english_month, date_str, flags=re.IGNORECASE)
|
||||
|
||||
# Try parsing the modified date string
|
||||
try:
|
||||
return parser.parse(date_str, dayfirst=True)
|
||||
except ValueError:
|
||||
return pd.NaT
|
||||
|
||||
def compare_roosters(base_file, comparison_file, output_file):
|
||||
# Print the sheets available in both Excel files
|
||||
base_sheets = list_sheets(base_file)
|
||||
comparison_sheets = list_sheets(comparison_file)
|
||||
print(f"Sheets in '{base_file}': {base_sheets}")
|
||||
print(f"Sheets in '{comparison_file}': {comparison_sheets}")
|
||||
|
||||
# Function to load an Excel file with error handling
|
||||
def load_excel(file):
|
||||
try:
|
||||
df = pd.read_excel(file, engine='openpyxl')
|
||||
if df.empty:
|
||||
raise ValueError(f"The file '{file}' has no sheets or is empty.")
|
||||
return df
|
||||
except Exception as e:
|
||||
print(e)
|
||||
raise ValueError(f"Could not load the file '{file}'. Please check the file and try again.")
|
||||
|
||||
# Load the Excel files
|
||||
base_df = load_excel(base_file)
|
||||
comparison_df = load_excel(comparison_file)
|
||||
|
||||
# Ensure the columns we need are present in both files
|
||||
required_columns = ['Code examenrooster', 'Beginuur S+', 'Datum S+', 'Einduur S+']
|
||||
for column in required_columns:
|
||||
if column not in base_df.columns or column not in comparison_df.columns:
|
||||
raise ValueError(f"Column '{column}' is missing from one of the files")
|
||||
|
||||
# Convert 'Datum S+' in comparison_df to the universal format
|
||||
comparison_df['Datum S+'] = comparison_df['Datum S+'].apply(
|
||||
lambda x: dutch_date_parser(x) if isinstance(x, str) else x
|
||||
)
|
||||
|
||||
# Merge the dataframes on 'Code examenrooster' to compare the rows with matching codes
|
||||
merged_df = base_df.merge(
|
||||
comparison_df,
|
||||
on='Code examenrooster',
|
||||
suffixes=('_base', '_comp'),
|
||||
how='outer', # Outer join to capture all differences
|
||||
indicator=True # Indicator to show if the row was in one or both files
|
||||
)
|
||||
|
||||
# Create an empty list to store rows with differences
|
||||
differences = []
|
||||
|
||||
# Iterate over each row to find discrepancies
|
||||
for _, row in merged_df.iterrows():
|
||||
row_data = {}
|
||||
# Only compare rows that exist in both files
|
||||
if row['_merge'] == 'both':
|
||||
differences_in_row = []
|
||||
|
||||
# Compare the columns
|
||||
for column in ['Beginuur S+', 'Datum S+', 'Einduur S+']:
|
||||
base_value = row.get(f"{column}_base", pd.NA)
|
||||
comp_value = row.get(f"{column}_comp", pd.NA)
|
||||
|
||||
if pd.isna(base_value) and pd.isna(comp_value):
|
||||
continue # Skip comparison if both are NaN
|
||||
elif base_value != comp_value:
|
||||
differences_in_row.append(f"{column} differs (Base: {base_value}, Comp: {comp_value})")
|
||||
|
||||
# If there are any differences in this row, add them to the differences list
|
||||
if differences_in_row:
|
||||
for col in required_columns:
|
||||
row_data[col] = row.get(col, pd.NA)
|
||||
row_data[f"{col}_comp"] = row.get(f"{col}_comp", pd.NA)
|
||||
row_data['Difference'] = "; ".join(differences_in_row)
|
||||
differences.append(row_data)
|
||||
|
||||
elif row['_merge'] == 'left_only':
|
||||
differences.append({
|
||||
'Code examenrooster': row['Code examenrooster'],
|
||||
'Difference': "Row missing in comparison file"
|
||||
})
|
||||
elif row['_merge'] == 'right_only':
|
||||
differences.append({
|
||||
'Code examenrooster': row['Code examenrooster'],
|
||||
'Difference': "Row missing in base file"
|
||||
})
|
||||
|
||||
# Create a DataFrame from the differences list
|
||||
differences_df = pd.DataFrame(differences)
|
||||
|
||||
# Save the differences to an Excel file
|
||||
differences_df.to_excel(output_file, index=False)
|
||||
|
||||
print(f"Differences saved to {output_file}")
|
||||
|
||||
|
||||
# Example usage:
|
||||
compare_roosters('afgewerkte.xlsx', 'bages rooster voor s.xlsx', 'differences_output.xlsx')
|
66
random/script.py
Normal file
66
random/script.py
Normal file
|
@ -0,0 +1,66 @@
|
|||
import pandas as pd
|
||||
from datetime import datetime
|
||||
import locale
|
||||
|
||||
file_path = 'bages rooster voor s.xlsx'
|
||||
sheet_name = 'rooster'
|
||||
|
||||
df = pd.read_excel(file_path, sheet_name=sheet_name)
|
||||
|
||||
date_ranges = {
|
||||
(pd.Timestamp('2025-01-06'), pd.Timestamp('2025-01-12')): 16,
|
||||
(pd.Timestamp('2025-01-13'), pd.Timestamp('2025-01-19')): 17,
|
||||
(pd.Timestamp('2025-01-20'), pd.Timestamp('2025-01-26')): 18,
|
||||
(pd.Timestamp('2025-01-27'), pd.Timestamp('2025-02-02')): 19,
|
||||
|
||||
# add more ranges as needed
|
||||
}
|
||||
|
||||
|
||||
# Custom date parser function
|
||||
def parse_custom_date(date_str):
|
||||
if pd.isna(date_str):
|
||||
return pd.NaT # Return pandas NaT for missing dates
|
||||
if isinstance(date_str, str):
|
||||
try:
|
||||
# Set locale to Dutch
|
||||
locale.setlocale(locale.LC_TIME, 'nl_NL.UTF-8')
|
||||
return datetime.strptime(date_str, '%A %d %B %Y')
|
||||
except ValueError as e:
|
||||
raise ValueError(f"Date conversion error: {e} for date string: {date_str}")
|
||||
finally:
|
||||
# Reset locale to the default setting
|
||||
locale.setlocale(locale.LC_TIME, 'C')
|
||||
else:
|
||||
raise TypeError(f"Expected string, got {type(date_str).__name__}: {date_str}")
|
||||
|
||||
|
||||
# Ensure the column 'Datum S+' exists and is processed correctly
|
||||
if 'Datum S+' in df.columns:
|
||||
try:
|
||||
# Convert 'Datum S+' column to datetime using the custom parser
|
||||
df['Datum S+'] = df['Datum S+'].apply(parse_custom_date)
|
||||
except (ValueError, TypeError) as e:
|
||||
print(f"Error: {e}")
|
||||
# Optionally, re-raise the exception if you want to stop execution
|
||||
raise
|
||||
|
||||
|
||||
# Function to update Lesweek based on date ranges
|
||||
def update_lesweek(date):
|
||||
if pd.isna(date): # Handle NaT values
|
||||
return 0
|
||||
for date_range, lesweek_value in date_ranges.items():
|
||||
if date_range[0] <= date <= date_range[1]:
|
||||
return lesweek_value
|
||||
return 0 # Default value if date doesn't fall in any range
|
||||
|
||||
|
||||
# Apply the function to 'Datum S+' column
|
||||
df['Lesweek'] = df['Datum S+'].apply(update_lesweek)
|
||||
|
||||
# Check the results
|
||||
print("\nFirst few rows of the DataFrame to verify date formatting:\n", df.head())
|
||||
|
||||
# If needed, you can save the DataFrame to a new Excel file to verify changes
|
||||
df.to_excel('updated_rooster.xlsx', index=False)
|
63
webcrawler studieprogramma's/login.py
Normal file
63
webcrawler studieprogramma's/login.py
Normal file
|
@ -0,0 +1,63 @@
|
|||
import asyncio
|
||||
from pyppeteer import launch
|
||||
import logging
|
||||
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
|
||||
|
||||
async def crawl(url):
|
||||
try:
|
||||
# Launch a new Chromium browser with a visible window
|
||||
print('browser launching')
|
||||
browser = await launch(headless=False)
|
||||
# Open a new page
|
||||
page = await browser.newPage()
|
||||
print('browser opened')
|
||||
|
||||
try:
|
||||
# Navigate to the specified URL
|
||||
await page.goto(url)
|
||||
logging.info(f"Accessed {url}")
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to navigate to {url}: {e}")
|
||||
await browser.close()
|
||||
return
|
||||
|
||||
try:
|
||||
# Wait for the page to fully load
|
||||
await page.waitForSelector('body')
|
||||
except Exception as e:
|
||||
logging.error(f"Failed to load the page properly: {e}")
|
||||
await browser.close()
|
||||
return
|
||||
|
||||
try:
|
||||
# Extract the content of the page
|
||||
content = await page.content()
|
||||
# (Optional) Extract and print all links as an example
|
||||
links = await page.evaluate('''() => {
|
||||
return Array.from(document.querySelectorAll('a')).map(link => ({
|
||||
text: link.innerText,
|
||||
url: link.href
|
||||
}));
|
||||
}''')
|
||||
|
||||
for link in links:
|
||||
print(f"Link text: {link['text']}, URL: {link['url']}")
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error extracting or processing the content: {e}")
|
||||
|
||||
finally:
|
||||
# Ensure the browser closes after execution
|
||||
await browser.close()
|
||||
|
||||
except Exception as e:
|
||||
logging.critical(f"Critical error occurred: {e}")
|
||||
|
||||
|
||||
# Specify the URL of the web page you want to crawl
|
||||
url = 'https://www.google.com/'
|
||||
|
||||
# Run the crawl function
|
||||
asyncio.get_event_loop().run_until_complete(crawl(url))
|
0
webcrawler studieprogramma's/main.py
Normal file
0
webcrawler studieprogramma's/main.py
Normal file
Loading…
Reference in New Issue
Block a user