From e3e65a9c51880bf473832f30a47fc3318ddada4f Mon Sep 17 00:00:00 2001 From: bdaneels Date: Mon, 18 Nov 2024 11:59:21 +0100 Subject: [PATCH] first commit --- .gitignore | 11 + examen dubbels/script.py | 20 + examen grote lokalen/main.py | 18 + .../ps (32)_files/filelist.xml | 7 + .../ps (32)_files/sheet001.htm | 1396 +++++++++++++++++ .../ps (32)_files/stylesheet.css | 175 +++ .../ps (32)_files/tabstrip.htm | 32 + examengegevens template generator/script.py | 78 + random/script 2.py | 131 ++ random/script.py | 66 + webcrawler studieprogramma's/login.py | 63 + webcrawler studieprogramma's/main.py | 0 12 files changed, 1997 insertions(+) create mode 100644 .gitignore create mode 100644 examen dubbels/script.py create mode 100644 examen grote lokalen/main.py create mode 100644 examen grote lokalen/ps (32)_files/filelist.xml create mode 100644 examen grote lokalen/ps (32)_files/sheet001.htm create mode 100644 examen grote lokalen/ps (32)_files/stylesheet.css create mode 100644 examen grote lokalen/ps (32)_files/tabstrip.htm create mode 100644 examengegevens template generator/script.py create mode 100644 random/script 2.py create mode 100644 random/script.py create mode 100644 webcrawler studieprogramma's/login.py create mode 100644 webcrawler studieprogramma's/main.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b83c864 --- /dev/null +++ b/.gitignore @@ -0,0 +1,11 @@ +# Ignore .idea directories +*.idea/ + +# Ignore compiled Python files +*.pyc +__pycache__/ + +# Ignore Excel files +*.xlsx + +sisa_crawl/ \ No newline at end of file diff --git a/examen dubbels/script.py b/examen dubbels/script.py new file mode 100644 index 0000000..2973e41 --- /dev/null +++ b/examen dubbels/script.py @@ -0,0 +1,20 @@ +import pandas as pd + +#variables +file_path = 'ps (30).xlsx' +sheet_name = 'ps (30)' +column_name = 'Student-ID' + +df = pd.read_excel(file_path, sheet_name=sheet_name) + +duplicate_ids= df[df.duplicated(subset=[column_name], keep=False)][column_name] + +unique_duplicate_ids = duplicate_ids.drop_duplicates() + +num_duplicates = len(unique_duplicate_ids) + +if not unique_duplicate_ids.empty: + print(f"Duplicated Student-ID values (count: {num_duplicates}) :") + print(unique_duplicate_ids) +else: + print("No duplicates found.") \ No newline at end of file diff --git a/examen grote lokalen/main.py b/examen grote lokalen/main.py new file mode 100644 index 0000000..81bc7c3 --- /dev/null +++ b/examen grote lokalen/main.py @@ -0,0 +1,18 @@ +import pandas as pd + +file_path = 'file.xlsx' +sheet_name = 'ps (32)' + +df = pd.read_excel(file_path, sheet_name=sheet_name) +filtered_df = df[df['Examenvorm'] == 'Schriftelijk' ] +filtered_df = filtered_df[filtered_df['Aant. inschr.'] > 65] +filtered_df = filtered_df[['Datum S+','Beginuur S+','Einduur S+', 'Studiegidsnr.', 'Omschrijving', 'Docenten', 'Aant. inschr.']] + + +#formatting the timestrings +filtered_df['Beginuur S+'] = filtered_df['Beginuur S+'].apply(lambda x: x.strftime('%H:%M')) +filtered_df['Einduur S+'] = filtered_df['Einduur S+'].apply(lambda x: x.strftime('%H:%M')) +filtered_df['Docenten'] = filtered_df['Docenten'].str.replace(r'\b(Titularis|Co-Titularis|Medewerker)\b', '', + regex=True).str.strip() + +filtered_df.to_excel('filtered_grote_lokalen.xlsx', index=False) \ No newline at end of file diff --git a/examen grote lokalen/ps (32)_files/filelist.xml b/examen grote lokalen/ps (32)_files/filelist.xml new file mode 100644 index 0000000..7f0a3c8 --- /dev/null +++ b/examen grote lokalen/ps (32)_files/filelist.xml @@ -0,0 +1,7 @@ + + + + + + + \ No newline at end of file diff --git a/examen grote lokalen/ps (32)_files/sheet001.htm b/examen grote lokalen/ps (32)_files/sheet001.htm new file mode 100644 index 0000000..1bdc372 --- /dev/null +++ b/examen grote lokalen/ps (32)_files/sheet001.htm @@ -0,0 +1,1396 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Studiegidsnr.OmschrijvingDocentenSPAant. inschr.ExamenvormTijdslots + aanvr.SSSExamen groepAant. stdnt. gr.Datum S+Beginuur S+Einduur S+Facilitieit S+Code + examenroosterExtra info voor + studenten
1003FLWGESHistorische + methodeMedewerker + Vermoesen,Reinoud - Titularis Stabel,Peter - Medewerker Bauwelinck,Egon - + Medewerker Dupont,Milan - Medewerker Schepers,Ingrid6253SchriftelijkN125310/01/20259:0012:00 1003FLWGES2491_224001_S1_1 
1010FLWGESGesch. + van de middeleeuwenTitularis + Soens,Tim6187SchriftelijkN11876/01/202510:0012:30 1010FLWGES2761_224001_S1_1 
1066FLWGESHistory + islamic worldTitularis + Dekkiche,Malika6186SchriftelijkN118613/01/202510:0013:00 1066FLWGES3875_224001_S1_1 
1054FLWGESInleiding + wijsbegeerteTitularis + Moormann,Emma6142PC + examenN114220/01/202510:0012:00 1054FLWGES3647_224001_D1_1 
1012FLWGESGesch. + van de nieuwste tijdTitularis + Greefs,Hilde6116SchriftelijkN11167/01/20259:0013:00 1012FLWGES2820_224001_S1_1 
1058FLWGESheuristiek + NSTTitularis + de Smaele,Henk - Titularis Shaery-Yazdi,Roschanack3105SchriftelijkN110522/01/202510:0012:00 1058FLWGES3712_224001_S1_1 
1082FLWGESGeschiedenis + van BelgiëTitularis + Saelens,Wout399SchriftelijkN19924/01/202510:0012:00 1082FLWGES4204_224001_S1_1 
1055FLWGESGeschiedenis + en taalTitularis + Scheltiens,Vincent398PC + examenN19815/01/20259:0014:00 1055FLWGES3658_224001_D1_1 
1086FLWGESGeschiedenis + en getalTitularis + Vermoesen,Reinoud396SchriftelijkN19630/01/20259:0011:00 1086FLWGES4284_224001_S1_1 
1070FLWGESBachelorscriptieCo-Titularis + Vermoesen,Reinoud988MondelingN5     1070FLWGES3962_224001_M5_5NIET TE ROOSTEREN
Titularis De Groot,Julie
Titularis Wynants,Nele
Titularis Puttevils,Jeroen
Titularis Greefs,Hilde
Titularis Gelderblom,Oscar
Titularis De Munck,Bert
Titularis Blondé,Bruno
1070FLWGESBachelorscriptieCo-Titularis + Vermoesen,Reinoud988MondelingN6     1070FLWGES3962_224001_M6_6NIET TE ROOSTEREN
Titularis De Groot,Julie
Titularis Wynants,Nele
Titularis Puttevils,Jeroen
Titularis Greefs,Hilde
Titularis Gelderblom,Oscar
Titularis De Munck,Bert
Titularis Blondé,Bruno
1070FLWGESBachelorscriptieCo-Titularis + Vermoesen,Reinoud988MondelingN4     1070FLWGES3962_224001_M4_4NIET TE ROOSTEREN
Titularis De Groot,Julie
Titularis Wynants,Nele
Titularis Puttevils,Jeroen
Titularis Greefs,Hilde
Titularis Gelderblom,Oscar
Titularis De Munck,Bert
Titularis Blondé,Bruno
1070FLWGESBachelorscriptieCo-Titularis + Vermoesen,Reinoud988MondelingN3     1070FLWGES3962_224001_M3_3NIET TE ROOSTEREN
Titularis De Groot,Julie
Titularis Wynants,Nele
Titularis Puttevils,Jeroen
Titularis Greefs,Hilde
Titularis Gelderblom,Oscar
Titularis De Munck,Bert
Titularis Blondé,Bruno
1070FLWGESBachelorscriptieCo-Titularis + Vermoesen,Reinoud - Titularis De Groot,Julie - Titularis Wynants,Nele - + Titularis Puttevils,Jeroen - Titularis Greefs,Hilde - Titularis + Gelderblom,Oscar - Titularis De Munck,Bert - Titularis Blondé,Bruno - + Medewerker Kole,Jeroen - Medewerker Tanis,Nelleke - Medewerker Van + Laer,Matthias - Medewerker Samoy,Gitte - Medewerker Welslau,Hannah - + Medewerker Heijmans,Elisabeth - Medewerker Schepers,Ingrid988MondelingN1 29/01/20259:0012:00 1070FLWGES3962_224001_M1_1Presentaties
1070FLWGESBachelorscriptieCo-Titularis + Vermoesen,Reinoud988MondelingN2     1070FLWGES3962_224001_M2_2NIET TE ROOSTEREN
Titularis De Groot,Julie
Titularis Wynants,Nele
Titularis Puttevils,Jeroen
Titularis Greefs,Hilde
Titularis Gelderblom,Oscar
Titularis De Munck,Bert
Titularis Blondé,Bruno
1070FLWGESBachelorscriptieCo-Titularis + Vermoesen,Reinoud988SchriftelijkN1 ########10:0016:00 1070FLWGES3962_224001_S1_7Deadline inleveren scriptie afstuderen in + februari
Titularis De Groot,Julie
Titularis Wynants,Nele
Titularis Puttevils,Jeroen
Titularis Greefs,Hilde
Titularis Gelderblom,Oscar
Titularis De Munck,Bert
Titularis Blondé,Bruno
1047FLWGESNST: + cultuurgeschiedenisTitularis + de Smaele,Henk657MondelingN41410/01/202513:3017:00 1047FLWGES3508_224001_M4_4studenten + bij beginuur aanwezig
1047FLWGESNST: + cultuurgeschiedenisTitularis + de Smaele,Henk657MondelingN31410/01/20259:3012:30 1047FLWGES3508_224001_M3_3studenten + bij beginuur aanwezig
1047FLWGESNST: + cultuurgeschiedenisTitularis + de Smaele,Henk657MondelingN2159/01/202513:3017:00 1047FLWGES3508_224001_M2_2studenten + bij beginuur aanwezig
1047FLWGESNST: + cultuurgeschiedenisTitularis + de Smaele,Henk657MondelingN1149/01/20259:3012:30 1047FLWGES3508_224001_M1_1studenten + bij beginuur aanwezig
1043FLWGESEME: + Politics and InstitutionsTitularis + Heinemann,Julia655MondelingN11216/01/20259:0012:00 1043FLWGES3423_224001_M1_1 
1043FLWGESEME: + Politics and InstitutionsTitularis + Heinemann,Julia655MondelingN21216/01/202513:3016:30 1043FLWGES3423_224001_M2_2 
1043FLWGESEME: + Politics and InstitutionsTitularis + Heinemann,Julia655MondelingN31217/01/20259:0012:00 1043FLWGES3423_224001_M3_3 
1043FLWGESEME: + Politics and InstitutionsTitularis + Heinemann,Julia655MondelingN4717/01/202513:3015:30 1043FLWGES3423_224001_M4_4 
1043FLWGESEME: + Politics and InstitutionsTitularis + Heinemann,Julia655MondelingN51215/01/20259:0012:00 1043FLWGES3423_224001_M5_5 
1046FLWGESNST: + politiek en instellingenTitularis + Beyen,Marnix653MondelingN2827/01/202513:0017:00 1046FLWGES3489_224001_M2_2 
1046FLWGESNST: + politiek en instellingenTitularis + Beyen,Marnix653MondelingN31228/01/20259:0014:20 1046FLWGES3489_224001_M3_3 
1046FLWGESNST: + politiek en instellingenTitularis + Beyen,Marnix653MondelingN4828/01/202513:0017:00 1046FLWGES3489_224001_M4_4 
1046FLWGESNST: + politiek en instellingenTitularis + Beyen,Marnix653MondelingN51314/01/20259:0014:40 1046FLWGES3489_224001_M5_5 
1046FLWGESNST: + politiek en instellingenTitularis + Beyen,Marnix653MondelingN11227/01/20259:0014:20 1046FLWGES3489_224001_M1_1 
1083FLWGESGesch. + van de NederlandenTitularis + Delsaerdt,Pierre352SchriftelijkN1529/01/20259:0012:00 1083FLWGES4225_224001_S1_1 
1041FLWGESME: + cultuurgeschiedenisTitularis + Stabel,Peter638MondelingN31030/01/202510:0012:00 1041FLWGES3387_224001_M3_5 
1041FLWGESME: + cultuurgeschiedenisTitularis + Stabel,Peter638MondelingN21029/01/202514:0016:00 1041FLWGES3387_224001_M2_3 
1041FLWGESME: + cultuurgeschiedenisTitularis + Stabel,Peter638MondelingN11029/01/202510:0012:00 1041FLWGES3387_224001_M1_2 
1041FLWGESME: + cultuurgeschiedenisTitularis + Stabel,Peter638MondelingN4830/01/202514:0016:00 1041FLWGES3387_224001_M4_4 
1041FLWGESME: + cultuurgeschiedenisTitularis + Stabel,Peter638SchriftelijkN1388/01/202510:0012:00 1041FLWGES3387_224001_S1_1 
1034FLWGESInl. + tot de joodse cultuurCo-Titularis + Dunkelgrün,Theodor - Titularis Hofmeester,Karin317MondelingN11124/01/20259:0013:00 1034FLWGES3240_224001_M1_1 
1034FLWGESInl. + tot de joodse cultuurCo-Titularis + Dunkelgrün,Theodor - Titularis Hofmeester,Karin317MondelingN2324/01/202514:0018:00 1034FLWGES3240_224001_M2_2 
1056FLWGESheuristiek + MECo-Titularis + Dekkiche,Malika - Titularis Zennaro,Nicolò315MondelingN11514/01/202514:0018:00 1056FLWGES3676_224001_M1_1 
1048FLWGESNST: + soc.-econ. geschiedenisTitularis + Gelderblom,Oscar - Titularis Greefs,Hilde613SchriftelijkN1126/01/20259:3012:30 1048FLWGES3525_224001_S1_1 
+ + + + diff --git a/examen grote lokalen/ps (32)_files/stylesheet.css b/examen grote lokalen/ps (32)_files/stylesheet.css new file mode 100644 index 0000000..f6c8308 --- /dev/null +++ b/examen grote lokalen/ps (32)_files/stylesheet.css @@ -0,0 +1,175 @@ +tr + {mso-height-source:auto;} +col + {mso-width-source:auto;} +br + {mso-data-placement:same-cell;} +.style0 + {mso-number-format:General; + text-align:general; + vertical-align:bottom; + white-space:nowrap; + mso-rotate:0; + mso-background-source:auto; + mso-pattern:auto; + color:black; + font-size:11.0pt; + font-weight:400; + font-style:normal; + text-decoration:none; + font-family:Calibri, sans-serif; + mso-font-charset:0; + border:none; + mso-protection:locked visible; + mso-style-name:Normal; + mso-style-id:0;} +td + {mso-style-parent:style0; + padding-top:1px; + padding-right:1px; + padding-left:1px; + mso-ignore:padding; + color:black; + font-size:11.0pt; + font-weight:400; + font-style:normal; + text-decoration:none; + font-family:Calibri, sans-serif; + mso-font-charset:0; + mso-number-format:General; + text-align:general; + vertical-align:bottom; + border:none; + mso-background-source:auto; + mso-pattern:auto; + mso-protection:locked visible; + white-space:nowrap; + mso-rotate:0;} +.xl65 + {mso-style-parent:style0; + font-size:10.0pt; + font-weight:700; + text-align:center; + vertical-align:middle; + border:.5pt solid black; + white-space:normal;} +.xl66 + {mso-style-parent:style0; + border:.5pt solid black; + white-space:normal;} +.xl67 + {mso-style-parent:style0; + font-size:10.0pt; + border:.5pt solid black; + white-space:normal;} +.xl68 + {mso-style-parent:style0; + font-size:10.0pt; + mso-number-format:"Short Date"; + border:.5pt solid black; + white-space:normal;} +.xl69 + {mso-style-parent:style0; + font-size:10.0pt; + mso-number-format:"Short Time"; + border:.5pt solid black; + white-space:normal;} +.xl70 + {mso-style-parent:style0; + font-size:10.0pt; + border-top:.5pt solid black; + border-right:.5pt solid black; + border-bottom:none; + border-left:.5pt solid black; + white-space:normal;} +.xl71 + {mso-style-parent:style0; + font-size:10.0pt; + border-top:none; + border-right:.5pt solid black; + border-bottom:none; + border-left:.5pt solid black; + white-space:normal;} +.xl72 + {mso-style-parent:style0; + font-size:10.0pt; + border-top:none; + border-right:.5pt solid black; + border-bottom:.5pt solid black; + border-left:.5pt solid black; + white-space:normal;} +.xl73 + {mso-style-parent:style0; + border-top:.5pt solid black; + border-right:.5pt solid black; + border-bottom:none; + border-left:.5pt solid black; + white-space:normal;} +.xl74 + {mso-style-parent:style0; + border-top:none; + border-right:.5pt solid black; + border-bottom:none; + border-left:.5pt solid black; + white-space:normal;} +.xl75 + {mso-style-parent:style0; + border-top:none; + border-right:.5pt solid black; + border-bottom:.5pt solid black; + border-left:.5pt solid black; + white-space:normal;} +.xl76 + {mso-style-parent:style0; + font-size:10.0pt; + mso-number-format:"Short Date"; + border-top:.5pt solid black; + border-right:.5pt solid black; + border-bottom:none; + border-left:.5pt solid black; + white-space:normal;} +.xl77 + {mso-style-parent:style0; + font-size:10.0pt; + mso-number-format:"Short Date"; + border-top:none; + border-right:.5pt solid black; + border-bottom:none; + border-left:.5pt solid black; + white-space:normal;} +.xl78 + {mso-style-parent:style0; + font-size:10.0pt; + mso-number-format:"Short Date"; + border-top:none; + border-right:.5pt solid black; + border-bottom:.5pt solid black; + border-left:.5pt solid black; + white-space:normal;} +.xl79 + {mso-style-parent:style0; + font-size:10.0pt; + mso-number-format:"Short Time"; + border-top:.5pt solid black; + border-right:.5pt solid black; + border-bottom:none; + border-left:.5pt solid black; + white-space:normal;} +.xl80 + {mso-style-parent:style0; + font-size:10.0pt; + mso-number-format:"Short Time"; + border-top:none; + border-right:.5pt solid black; + border-bottom:none; + border-left:.5pt solid black; + white-space:normal;} +.xl81 + {mso-style-parent:style0; + font-size:10.0pt; + mso-number-format:"Short Time"; + border-top:none; + border-right:.5pt solid black; + border-bottom:.5pt solid black; + border-left:.5pt solid black; + white-space:normal;} diff --git a/examen grote lokalen/ps (32)_files/tabstrip.htm b/examen grote lokalen/ps (32)_files/tabstrip.htm new file mode 100644 index 0000000..b44055e --- /dev/null +++ b/examen grote lokalen/ps (32)_files/tabstrip.htm @@ -0,0 +1,32 @@ + + + + + + + + + + + + + + + + +
 ps (32) 
+ + diff --git a/examengegevens template generator/script.py b/examengegevens template generator/script.py new file mode 100644 index 0000000..4f6fa4f --- /dev/null +++ b/examengegevens template generator/script.py @@ -0,0 +1,78 @@ +import pandas as pd + +def read_excel_file(file_path): + """Read the Excel file and return a DataFrame.""" + try: + return pd.read_excel(file_path) + except Exception as e: + print(f"Error reading the Excel file: {e}") + return None + +def filter_studiegidsnummer(df): + """Filter rows where 'studiegidsnummer' contains 'GES'.""" + if 'Studiegidsnummer' not in df.columns: + print("Column 'studiegidsnummer' not found in the DataFrame.") + print("Available columns:", df.columns) + return pd.DataFrame() # Return an empty DataFrame + return df[df['Studiegidsnummer'].str.contains('GES', na=False)].copy() + +def filter_opmerkingen(df): + """Filter rows where 'Opmerkingen' does NOT contain '24-25'.""" + if 'Opmerkingen' not in df.columns: + print("Column 'Opmerkingen' not found in the DataFrame.") + print("Available columns:", df.columns) + return pd.DataFrame() # Return an empty DataFrame + return df[~df['Opmerkingen'].str.contains('24-25', na=False)].copy() + +def create_message_column(df): + """Create 'Message' and 'subject' columns with the specified format.""" + df.loc[:, 'Message'] = df.apply(lambda row: ( + f"Beste docent,\n\n" + f"Ik ben de examengegevens aan het controleren van {row['Omschrijving']} {row['Studiegidsnummer']}. De huidige gegevens zijn als volgt:\n\n" + f"{row['Examenvorm']} examen voor zowel eerste als tweede zit, {row['Examenduur']} minuten, tussen {row['Beginuur voormiddag']} en {row['Einduur voormiddag']} of {row['Beginuur namiddag']} en {row['Einduur namiddag']}.\n\n" + f"Gelden dezelfde gegevens voor dit academiejaar of moeten er nog wijzigingen doorgevoerd worden? Alvast dank voor je reactie!" + ), axis=1) + df.loc[:, 'subject'] = df.apply(lambda row: ( + f"Examengegevens {row['Omschrijving']} {row['Studiegidsnummer']}" + ), axis=1) + return df + +def save_to_excel(df, output_file_path): + """Save the DataFrame to a new Excel file.""" + try: + df.to_excel(output_file_path, index=False) + except Exception as e: + print(f"Error saving the Excel file: {e}") + +def convert_time_format(time_str): + """Convert time from 'HH:MM:SS' to 'HH:MM'.""" + try: + return pd.to_datetime(time_str).strftime('%H:%M') + except Exception as e: + print(f"Error converting time format: {e}") + return time_str + +def apply_time_format_conversion(df, columns): + """Apply time format conversion to specified columns in the DataFrame.""" + for column in columns: + df[column] = pd.to_datetime(df[column], format='%H:%M:%S', errors='coerce').dt.strftime('%H:%M') + return df + +# Example usage within the main function +def main(): + file_path = 'examengegevens2425.xlsx' + output_file_path = 'filtered_examengegevens2425.xlsx' + + df = read_excel_file(file_path) + if df is not None: + filtered_df = filter_studiegidsnummer(df) + if not filtered_df.empty: + final_filtered_df = filter_opmerkingen(filtered_df) + # Convert time format for specified columns + time_columns = ['Beginuur voormiddag', 'Einduur voormiddag', 'Beginuur namiddag', 'Einduur namiddag'] + final_filtered_df = apply_time_format_conversion(final_filtered_df, time_columns) + final_filtered_df = create_message_column(final_filtered_df) + save_to_excel(final_filtered_df, output_file_path) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/random/script 2.py b/random/script 2.py new file mode 100644 index 0000000..ea1718b --- /dev/null +++ b/random/script 2.py @@ -0,0 +1,131 @@ +import pandas as pd +from openpyxl import load_workbook +from dateutil import parser +import re + + + +def list_sheets(file): + try: + workbook = load_workbook(filename=file, read_only=True) + sheets = workbook.sheetnames + return sheets + except Exception as e: + print(e) + raise ValueError(f"Could not open the file '{file}'. Please check the file and try again.") + + +def dutch_date_parser(date_str): + # Remove Dutch day names + day_name_pattern = r'\b(maandag|dinsdag|woensdag|donderdag|vrijdag|zaterdag|zondag)\b' + date_str = re.sub(day_name_pattern, '', date_str, flags=re.IGNORECASE).strip() + + # Translate Dutch month names to English + month_translation = { + 'januari': 'January', 'februari': 'February', 'maart': 'March', + 'april': 'April', 'mei': 'May', 'juni': 'June', 'juli': 'July', + 'augustus': 'August', 'september': 'September', 'oktober': 'October', + 'november': 'November', 'december': 'December' + } + + for dutch_month, english_month in month_translation.items(): + date_str = re.sub(r'\b' + dutch_month + r'\b', english_month, date_str, flags=re.IGNORECASE) + + # Try parsing the modified date string + try: + return parser.parse(date_str, dayfirst=True) + except ValueError: + return pd.NaT + +def compare_roosters(base_file, comparison_file, output_file): + # Print the sheets available in both Excel files + base_sheets = list_sheets(base_file) + comparison_sheets = list_sheets(comparison_file) + print(f"Sheets in '{base_file}': {base_sheets}") + print(f"Sheets in '{comparison_file}': {comparison_sheets}") + + # Function to load an Excel file with error handling + def load_excel(file): + try: + df = pd.read_excel(file, engine='openpyxl') + if df.empty: + raise ValueError(f"The file '{file}' has no sheets or is empty.") + return df + except Exception as e: + print(e) + raise ValueError(f"Could not load the file '{file}'. Please check the file and try again.") + + # Load the Excel files + base_df = load_excel(base_file) + comparison_df = load_excel(comparison_file) + + # Ensure the columns we need are present in both files + required_columns = ['Code examenrooster', 'Beginuur S+', 'Datum S+', 'Einduur S+'] + for column in required_columns: + if column not in base_df.columns or column not in comparison_df.columns: + raise ValueError(f"Column '{column}' is missing from one of the files") + + # Convert 'Datum S+' in comparison_df to the universal format + comparison_df['Datum S+'] = comparison_df['Datum S+'].apply( + lambda x: dutch_date_parser(x) if isinstance(x, str) else x + ) + + # Merge the dataframes on 'Code examenrooster' to compare the rows with matching codes + merged_df = base_df.merge( + comparison_df, + on='Code examenrooster', + suffixes=('_base', '_comp'), + how='outer', # Outer join to capture all differences + indicator=True # Indicator to show if the row was in one or both files + ) + + # Create an empty list to store rows with differences + differences = [] + + # Iterate over each row to find discrepancies + for _, row in merged_df.iterrows(): + row_data = {} + # Only compare rows that exist in both files + if row['_merge'] == 'both': + differences_in_row = [] + + # Compare the columns + for column in ['Beginuur S+', 'Datum S+', 'Einduur S+']: + base_value = row.get(f"{column}_base", pd.NA) + comp_value = row.get(f"{column}_comp", pd.NA) + + if pd.isna(base_value) and pd.isna(comp_value): + continue # Skip comparison if both are NaN + elif base_value != comp_value: + differences_in_row.append(f"{column} differs (Base: {base_value}, Comp: {comp_value})") + + # If there are any differences in this row, add them to the differences list + if differences_in_row: + for col in required_columns: + row_data[col] = row.get(col, pd.NA) + row_data[f"{col}_comp"] = row.get(f"{col}_comp", pd.NA) + row_data['Difference'] = "; ".join(differences_in_row) + differences.append(row_data) + + elif row['_merge'] == 'left_only': + differences.append({ + 'Code examenrooster': row['Code examenrooster'], + 'Difference': "Row missing in comparison file" + }) + elif row['_merge'] == 'right_only': + differences.append({ + 'Code examenrooster': row['Code examenrooster'], + 'Difference': "Row missing in base file" + }) + + # Create a DataFrame from the differences list + differences_df = pd.DataFrame(differences) + + # Save the differences to an Excel file + differences_df.to_excel(output_file, index=False) + + print(f"Differences saved to {output_file}") + + +# Example usage: +compare_roosters('afgewerkte.xlsx', 'bages rooster voor s.xlsx', 'differences_output.xlsx') diff --git a/random/script.py b/random/script.py new file mode 100644 index 0000000..f728f73 --- /dev/null +++ b/random/script.py @@ -0,0 +1,66 @@ +import pandas as pd +from datetime import datetime +import locale + +file_path = 'bages rooster voor s.xlsx' +sheet_name = 'rooster' + +df = pd.read_excel(file_path, sheet_name=sheet_name) + +date_ranges = { + (pd.Timestamp('2025-01-06'), pd.Timestamp('2025-01-12')): 16, + (pd.Timestamp('2025-01-13'), pd.Timestamp('2025-01-19')): 17, + (pd.Timestamp('2025-01-20'), pd.Timestamp('2025-01-26')): 18, + (pd.Timestamp('2025-01-27'), pd.Timestamp('2025-02-02')): 19, + + # add more ranges as needed +} + + +# Custom date parser function +def parse_custom_date(date_str): + if pd.isna(date_str): + return pd.NaT # Return pandas NaT for missing dates + if isinstance(date_str, str): + try: + # Set locale to Dutch + locale.setlocale(locale.LC_TIME, 'nl_NL.UTF-8') + return datetime.strptime(date_str, '%A %d %B %Y') + except ValueError as e: + raise ValueError(f"Date conversion error: {e} for date string: {date_str}") + finally: + # Reset locale to the default setting + locale.setlocale(locale.LC_TIME, 'C') + else: + raise TypeError(f"Expected string, got {type(date_str).__name__}: {date_str}") + + +# Ensure the column 'Datum S+' exists and is processed correctly +if 'Datum S+' in df.columns: + try: + # Convert 'Datum S+' column to datetime using the custom parser + df['Datum S+'] = df['Datum S+'].apply(parse_custom_date) + except (ValueError, TypeError) as e: + print(f"Error: {e}") + # Optionally, re-raise the exception if you want to stop execution + raise + + + # Function to update Lesweek based on date ranges + def update_lesweek(date): + if pd.isna(date): # Handle NaT values + return 0 + for date_range, lesweek_value in date_ranges.items(): + if date_range[0] <= date <= date_range[1]: + return lesweek_value + return 0 # Default value if date doesn't fall in any range + + + # Apply the function to 'Datum S+' column + df['Lesweek'] = df['Datum S+'].apply(update_lesweek) + +# Check the results +print("\nFirst few rows of the DataFrame to verify date formatting:\n", df.head()) + +# If needed, you can save the DataFrame to a new Excel file to verify changes +df.to_excel('updated_rooster.xlsx', index=False) \ No newline at end of file diff --git a/webcrawler studieprogramma's/login.py b/webcrawler studieprogramma's/login.py new file mode 100644 index 0000000..4a574a5 --- /dev/null +++ b/webcrawler studieprogramma's/login.py @@ -0,0 +1,63 @@ +import asyncio +from pyppeteer import launch +import logging + +logging.basicConfig(level=logging.INFO) + + +async def crawl(url): + try: + # Launch a new Chromium browser with a visible window + print('browser launching') + browser = await launch(headless=False) + # Open a new page + page = await browser.newPage() + print('browser opened') + + try: + # Navigate to the specified URL + await page.goto(url) + logging.info(f"Accessed {url}") + except Exception as e: + logging.error(f"Failed to navigate to {url}: {e}") + await browser.close() + return + + try: + # Wait for the page to fully load + await page.waitForSelector('body') + except Exception as e: + logging.error(f"Failed to load the page properly: {e}") + await browser.close() + return + + try: + # Extract the content of the page + content = await page.content() + # (Optional) Extract and print all links as an example + links = await page.evaluate('''() => { + return Array.from(document.querySelectorAll('a')).map(link => ({ + text: link.innerText, + url: link.href + })); + }''') + + for link in links: + print(f"Link text: {link['text']}, URL: {link['url']}") + + except Exception as e: + logging.error(f"Error extracting or processing the content: {e}") + + finally: + # Ensure the browser closes after execution + await browser.close() + + except Exception as e: + logging.critical(f"Critical error occurred: {e}") + + +# Specify the URL of the web page you want to crawl +url = 'https://www.google.com/' + +# Run the crawl function +asyncio.get_event_loop().run_until_complete(crawl(url)) \ No newline at end of file diff --git a/webcrawler studieprogramma's/main.py b/webcrawler studieprogramma's/main.py new file mode 100644 index 0000000..e69de29