first commit

2024-11-18 11:59:21 +01:00
parent d51c60d76d
commit e3e65a9c51
12 changed files with 1997 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,11 @@
+# Ignore .idea directories
+*.idea/
+
+# Ignore compiled Python files
+*.pyc
+__pycache__/
+
+# Ignore Excel files
+*.xlsx
+
+sisa_crawl/
--- a/dubbels/script.py
+++ b/dubbels/script.py
@@ -0,0 +1,20 @@
+import pandas as pd
+
+#variables
+file_path = 'ps (30).xlsx'
+sheet_name = 'ps (30)'
+column_name = 'Student-ID'
+
+df = pd.read_excel(file_path, sheet_name=sheet_name)
+
+duplicate_ids= df[df.duplicated(subset=[column_name], keep=False)][column_name]
+
+unique_duplicate_ids = duplicate_ids.drop_duplicates()
+
+num_duplicates = len(unique_duplicate_ids)
+
+if not unique_duplicate_ids.empty:
+    print(f"Duplicated Student-ID values (count: {num_duplicates})  :")
+    print(unique_duplicate_ids)
+else:
+    print("No duplicates found.")
--- a/lokalen/main.py
+++ b/lokalen/main.py
@@ -0,0 +1,18 @@
+import pandas as pd
+
+file_path = 'file.xlsx'
+sheet_name = 'ps (32)'
+
+df = pd.read_excel(file_path, sheet_name=sheet_name)
+filtered_df = df[df['Examenvorm'] == 'Schriftelijk' ]
+filtered_df = filtered_df[filtered_df['Aant. inschr.'] > 65]
+filtered_df = filtered_df[['Datum S+','Beginuur S+','Einduur S+', 'Studiegidsnr.', 'Omschrijving', 'Docenten', 'Aant. inschr.']]
+
+
+#formatting the timestrings
+filtered_df['Beginuur S+'] = filtered_df['Beginuur S+'].apply(lambda x: x.strftime('%H:%M'))
+filtered_df['Einduur S+'] = filtered_df['Einduur S+'].apply(lambda x: x.strftime('%H:%M'))
+filtered_df['Docenten'] = filtered_df['Docenten'].str.replace(r'\b(Titularis|Co-Titularis|Medewerker)\b', '',
+                                                              regex=True).str.strip()
+
+filtered_df.to_excel('filtered_grote_lokalen.xlsx', index=False)
--- a/(32)_files/filelist.xml
+++ b/(32)_files/filelist.xml
@@ -0,0 +1,7 @@
+<xml xmlns:o="urn:schemas-microsoft-com:office:office">
+ <o:MainFile HRef="../ps%20(32).xls"/>
+ <o:File HRef="stylesheet.css"/>
+ <o:File HRef="tabstrip.htm"/>
+ <o:File HRef="sheet001.htm"/>
+ <o:File HRef="filelist.xml"/>
+</xml>
--- a/(32)_files/sheet001.htm
+++ b/(32)_files/sheet001.htm
--- a/(32)_files/stylesheet.css
+++ b/(32)_files/stylesheet.css
@@ -0,0 +1,175 @@
+tr
+	{mso-height-source:auto;}
+col
+	{mso-width-source:auto;}
+br
+	{mso-data-placement:same-cell;}
+.style0
+	{mso-number-format:General;
+	text-align:general;
+	vertical-align:bottom;
+	white-space:nowrap;
+	mso-rotate:0;
+	mso-background-source:auto;
+	mso-pattern:auto;
+	color:black;
+	font-size:11.0pt;
+	font-weight:400;
+	font-style:normal;
+	text-decoration:none;
+	font-family:Calibri, sans-serif;
+	mso-font-charset:0;
+	border:none;
+	mso-protection:locked visible;
+	mso-style-name:Normal;
+	mso-style-id:0;}
+td
+	{mso-style-parent:style0;
+	padding-top:1px;
+	padding-right:1px;
+	padding-left:1px;
+	mso-ignore:padding;
+	color:black;
+	font-size:11.0pt;
+	font-weight:400;
+	font-style:normal;
+	text-decoration:none;
+	font-family:Calibri, sans-serif;
+	mso-font-charset:0;
+	mso-number-format:General;
+	text-align:general;
+	vertical-align:bottom;
+	border:none;
+	mso-background-source:auto;
+	mso-pattern:auto;
+	mso-protection:locked visible;
+	white-space:nowrap;
+	mso-rotate:0;}
+.xl65
+	{mso-style-parent:style0;
+	font-size:10.0pt;
+	font-weight:700;
+	text-align:center;
+	vertical-align:middle;
+	border:.5pt solid black;
+	white-space:normal;}
+.xl66
+	{mso-style-parent:style0;
+	border:.5pt solid black;
+	white-space:normal;}
+.xl67
+	{mso-style-parent:style0;
+	font-size:10.0pt;
+	border:.5pt solid black;
+	white-space:normal;}
+.xl68
+	{mso-style-parent:style0;
+	font-size:10.0pt;
+	mso-number-format:"Short Date";
+	border:.5pt solid black;
+	white-space:normal;}
+.xl69
+	{mso-style-parent:style0;
+	font-size:10.0pt;
+	mso-number-format:"Short Time";
+	border:.5pt solid black;
+	white-space:normal;}
+.xl70
+	{mso-style-parent:style0;
+	font-size:10.0pt;
+	border-top:.5pt solid black;
+	border-right:.5pt solid black;
+	border-bottom:none;
+	border-left:.5pt solid black;
+	white-space:normal;}
+.xl71
+	{mso-style-parent:style0;
+	font-size:10.0pt;
+	border-top:none;
+	border-right:.5pt solid black;
+	border-bottom:none;
+	border-left:.5pt solid black;
+	white-space:normal;}
+.xl72
+	{mso-style-parent:style0;
+	font-size:10.0pt;
+	border-top:none;
+	border-right:.5pt solid black;
+	border-bottom:.5pt solid black;
+	border-left:.5pt solid black;
+	white-space:normal;}
+.xl73
+	{mso-style-parent:style0;
+	border-top:.5pt solid black;
+	border-right:.5pt solid black;
+	border-bottom:none;
+	border-left:.5pt solid black;
+	white-space:normal;}
+.xl74
+	{mso-style-parent:style0;
+	border-top:none;
+	border-right:.5pt solid black;
+	border-bottom:none;
+	border-left:.5pt solid black;
+	white-space:normal;}
+.xl75
+	{mso-style-parent:style0;
+	border-top:none;
+	border-right:.5pt solid black;
+	border-bottom:.5pt solid black;
+	border-left:.5pt solid black;
+	white-space:normal;}
+.xl76
+	{mso-style-parent:style0;
+	font-size:10.0pt;
+	mso-number-format:"Short Date";
+	border-top:.5pt solid black;
+	border-right:.5pt solid black;
+	border-bottom:none;
+	border-left:.5pt solid black;
+	white-space:normal;}
+.xl77
+	{mso-style-parent:style0;
+	font-size:10.0pt;
+	mso-number-format:"Short Date";
+	border-top:none;
+	border-right:.5pt solid black;
+	border-bottom:none;
+	border-left:.5pt solid black;
+	white-space:normal;}
+.xl78
+	{mso-style-parent:style0;
+	font-size:10.0pt;
+	mso-number-format:"Short Date";
+	border-top:none;
+	border-right:.5pt solid black;
+	border-bottom:.5pt solid black;
+	border-left:.5pt solid black;
+	white-space:normal;}
+.xl79
+	{mso-style-parent:style0;
+	font-size:10.0pt;
+	mso-number-format:"Short Time";
+	border-top:.5pt solid black;
+	border-right:.5pt solid black;
+	border-bottom:none;
+	border-left:.5pt solid black;
+	white-space:normal;}
+.xl80
+	{mso-style-parent:style0;
+	font-size:10.0pt;
+	mso-number-format:"Short Time";
+	border-top:none;
+	border-right:.5pt solid black;
+	border-bottom:none;
+	border-left:.5pt solid black;
+	white-space:normal;}
+.xl81
+	{mso-style-parent:style0;
+	font-size:10.0pt;
+	mso-number-format:"Short Time";
+	border-top:none;
+	border-right:.5pt solid black;
+	border-bottom:.5pt solid black;
+	border-left:.5pt solid black;
+	white-space:normal;}
--- a/(32)_files/tabstrip.htm
+++ b/(32)_files/tabstrip.htm
@@ -0,0 +1,32 @@
+<html>
+<head>
+<meta http-equiv=Content-Type content="text/html; charset=utf-8">
+<meta name=ProgId content=Excel.Sheet>
+<meta name=Generator content="Microsoft Excel 15">
+<link id=Main-File rel=Main-File href="../ps%20(32).xls">
+
+<script language="JavaScript">
+<!--
+if (window.name!="frTabs")
+ window.location.replace(document.all.item("Main-File").href);
+//-->
+</script>
+<style>
+<!--
+A {
+    text-decoration:none;
+    color:#000000;
+    font-size:9pt;
+}
+-->
+</style>
+</head>
+<body topmargin=0 leftmargin=0 bgcolor="#808080">
+<table border=0 cellspacing=1>
+ <tr>
+ <td bgcolor="#FFFFFF" nowrap><b><small><small>&nbsp;<a href="sheet001.htm" target="frSheet"><font face="Arial" color="#000000">ps (32)</font></a>&nbsp;</small></small></b></td>
+
+ </tr>
+</table>
+</body>
+</html>
--- a/generator/script.py
+++ b/generator/script.py
@@ -0,0 +1,78 @@
+import pandas as pd
+
+def read_excel_file(file_path):
+    """Read the Excel file and return a DataFrame."""
+    try:
+        return pd.read_excel(file_path)
+    except Exception as e:
+        print(f"Error reading the Excel file: {e}")
+        return None
+
+def filter_studiegidsnummer(df):
+    """Filter rows where 'studiegidsnummer' contains 'GES'."""
+    if 'Studiegidsnummer' not in df.columns:
+        print("Column 'studiegidsnummer' not found in the DataFrame.")
+        print("Available columns:", df.columns)
+        return pd.DataFrame()  # Return an empty DataFrame
+    return df[df['Studiegidsnummer'].str.contains('GES', na=False)].copy()
+
+def filter_opmerkingen(df):
+    """Filter rows where 'Opmerkingen' does NOT contain '24-25'."""
+    if 'Opmerkingen' not in df.columns:
+        print("Column 'Opmerkingen' not found in the DataFrame.")
+        print("Available columns:", df.columns)
+        return pd.DataFrame()  # Return an empty DataFrame
+    return df[~df['Opmerkingen'].str.contains('24-25', na=False)].copy()
+
+def create_message_column(df):
+    """Create 'Message' and 'subject' columns with the specified format."""
+    df.loc[:, 'Message'] = df.apply(lambda row: (
+        f"Beste docent,\n\n"
+        f"Ik ben de examengegevens aan het controleren van {row['Omschrijving']} {row['Studiegidsnummer']}. De huidige gegevens zijn als volgt:\n\n"
+        f"{row['Examenvorm']} examen voor zowel eerste als tweede zit, {row['Examenduur']} minuten, tussen {row['Beginuur voormiddag']} en {row['Einduur voormiddag']} of {row['Beginuur namiddag']} en {row['Einduur namiddag']}.\n\n"
+        f"Gelden dezelfde gegevens voor dit academiejaar of moeten er nog wijzigingen doorgevoerd worden? Alvast dank voor je reactie!"
+    ), axis=1)
+    df.loc[:, 'subject'] = df.apply(lambda row: (
+        f"Examengegevens {row['Omschrijving']} {row['Studiegidsnummer']}"
+    ), axis=1)
+    return df
+
+def save_to_excel(df, output_file_path):
+    """Save the DataFrame to a new Excel file."""
+    try:
+        df.to_excel(output_file_path, index=False)
+    except Exception as e:
+        print(f"Error saving the Excel file: {e}")
+
+def convert_time_format(time_str):
+    """Convert time from 'HH:MM:SS' to 'HH:MM'."""
+    try:
+        return pd.to_datetime(time_str).strftime('%H:%M')
+    except Exception as e:
+        print(f"Error converting time format: {e}")
+        return time_str
+
+def apply_time_format_conversion(df, columns):
+    """Apply time format conversion to specified columns in the DataFrame."""
+    for column in columns:
+        df[column] = pd.to_datetime(df[column], format='%H:%M:%S', errors='coerce').dt.strftime('%H:%M')
+    return df
+
+# Example usage within the main function
+def main():
+    file_path = 'examengegevens2425.xlsx'
+    output_file_path = 'filtered_examengegevens2425.xlsx'
+    
+    df = read_excel_file(file_path)
+    if df is not None:
+        filtered_df = filter_studiegidsnummer(df)
+        if not filtered_df.empty:
+            final_filtered_df = filter_opmerkingen(filtered_df)
+            # Convert time format for specified columns
+            time_columns = ['Beginuur voormiddag', 'Einduur voormiddag', 'Beginuur namiddag', 'Einduur namiddag']
+            final_filtered_df = apply_time_format_conversion(final_filtered_df, time_columns)
+            final_filtered_df = create_message_column(final_filtered_df)
+            save_to_excel(final_filtered_df, output_file_path)
+
+if __name__ == "__main__":
+    main()
--- a/random/script
+++ b/random/script
@@ -0,0 +1,131 @@
+import pandas as pd
+from openpyxl import load_workbook
+from dateutil import parser
+import re
+
+
+
+def list_sheets(file):
+    try:
+        workbook = load_workbook(filename=file, read_only=True)
+        sheets = workbook.sheetnames
+        return sheets
+    except Exception as e:
+        print(e)
+        raise ValueError(f"Could not open the file '{file}'. Please check the file and try again.")
+
+
+def dutch_date_parser(date_str):
+    # Remove Dutch day names
+    day_name_pattern = r'\b(maandag|dinsdag|woensdag|donderdag|vrijdag|zaterdag|zondag)\b'
+    date_str = re.sub(day_name_pattern, '', date_str, flags=re.IGNORECASE).strip()
+
+    # Translate Dutch month names to English
+    month_translation = {
+        'januari': 'January', 'februari': 'February', 'maart': 'March',
+        'april': 'April', 'mei': 'May', 'juni': 'June', 'juli': 'July',
+        'augustus': 'August', 'september': 'September', 'oktober': 'October',
+        'november': 'November', 'december': 'December'
+    }
+
+    for dutch_month, english_month in month_translation.items():
+        date_str = re.sub(r'\b' + dutch_month + r'\b', english_month, date_str, flags=re.IGNORECASE)
+
+    # Try parsing the modified date string
+    try:
+        return parser.parse(date_str, dayfirst=True)
+    except ValueError:
+        return pd.NaT
+
+def compare_roosters(base_file, comparison_file, output_file):
+    # Print the sheets available in both Excel files
+    base_sheets = list_sheets(base_file)
+    comparison_sheets = list_sheets(comparison_file)
+    print(f"Sheets in '{base_file}': {base_sheets}")
+    print(f"Sheets in '{comparison_file}': {comparison_sheets}")
+
+    # Function to load an Excel file with error handling
+    def load_excel(file):
+        try:
+            df = pd.read_excel(file, engine='openpyxl')
+            if df.empty:
+                raise ValueError(f"The file '{file}' has no sheets or is empty.")
+            return df
+        except Exception as e:
+            print(e)
+            raise ValueError(f"Could not load the file '{file}'. Please check the file and try again.")
+
+    # Load the Excel files
+    base_df = load_excel(base_file)
+    comparison_df = load_excel(comparison_file)
+
+    # Ensure the columns we need are present in both files
+    required_columns = ['Code examenrooster', 'Beginuur S+', 'Datum S+', 'Einduur S+']
+    for column in required_columns:
+        if column not in base_df.columns or column not in comparison_df.columns:
+            raise ValueError(f"Column '{column}' is missing from one of the files")
+
+     # Convert 'Datum S+' in comparison_df to the universal format
+    comparison_df['Datum S+'] = comparison_df['Datum S+'].apply(
+        lambda x: dutch_date_parser(x) if isinstance(x, str) else x
+    )
+
+    # Merge the dataframes on 'Code examenrooster' to compare the rows with matching codes
+    merged_df = base_df.merge(
+        comparison_df,
+        on='Code examenrooster',
+        suffixes=('_base', '_comp'),
+        how='outer',  # Outer join to capture all differences
+        indicator=True  # Indicator to show if the row was in one or both files
+    )
+
+    # Create an empty list to store rows with differences
+    differences = []
+
+    # Iterate over each row to find discrepancies
+    for _, row in merged_df.iterrows():
+        row_data = {}
+        # Only compare rows that exist in both files
+        if row['_merge'] == 'both':
+            differences_in_row = []
+
+            # Compare the columns
+            for column in ['Beginuur S+', 'Datum S+', 'Einduur S+']:
+                base_value = row.get(f"{column}_base", pd.NA)
+                comp_value = row.get(f"{column}_comp", pd.NA)
+
+                if pd.isna(base_value) and pd.isna(comp_value):
+                    continue  # Skip comparison if both are NaN
+                elif base_value != comp_value:
+                    differences_in_row.append(f"{column} differs (Base: {base_value}, Comp: {comp_value})")
+
+            # If there are any differences in this row, add them to the differences list
+            if differences_in_row:
+                for col in required_columns:
+                    row_data[col] = row.get(col, pd.NA)
+                    row_data[f"{col}_comp"] = row.get(f"{col}_comp", pd.NA)
+                row_data['Difference'] = "; ".join(differences_in_row)
+                differences.append(row_data)
+
+        elif row['_merge'] == 'left_only':
+            differences.append({
+                'Code examenrooster': row['Code examenrooster'],
+                'Difference': "Row missing in comparison file"
+            })
+        elif row['_merge'] == 'right_only':
+            differences.append({
+                'Code examenrooster': row['Code examenrooster'],
+                'Difference': "Row missing in base file"
+            })
+
+    # Create a DataFrame from the differences list
+    differences_df = pd.DataFrame(differences)
+
+    # Save the differences to an Excel file
+    differences_df.to_excel(output_file, index=False)
+
+    print(f"Differences saved to {output_file}")
+
+
+# Example usage:
+compare_roosters('afgewerkte.xlsx', 'bages rooster voor s.xlsx', 'differences_output.xlsx')
--- a/random/script.py
+++ b/random/script.py
@@ -0,0 +1,66 @@
+import pandas as pd
+from datetime import datetime
+import locale
+
+file_path = 'bages rooster voor s.xlsx'
+sheet_name = 'rooster'
+
+df = pd.read_excel(file_path, sheet_name=sheet_name)
+
+date_ranges = {
+    (pd.Timestamp('2025-01-06'), pd.Timestamp('2025-01-12')): 16,
+    (pd.Timestamp('2025-01-13'), pd.Timestamp('2025-01-19')): 17,
+    (pd.Timestamp('2025-01-20'), pd.Timestamp('2025-01-26')): 18,
+    (pd.Timestamp('2025-01-27'), pd.Timestamp('2025-02-02')): 19,
+
+    # add more ranges as needed
+}
+
+
+# Custom date parser function
+def parse_custom_date(date_str):
+    if pd.isna(date_str):
+        return pd.NaT  # Return pandas NaT for missing dates
+    if isinstance(date_str, str):
+        try:
+            # Set locale to Dutch
+            locale.setlocale(locale.LC_TIME, 'nl_NL.UTF-8')
+            return datetime.strptime(date_str, '%A %d %B %Y')
+        except ValueError as e:
+            raise ValueError(f"Date conversion error: {e} for date string: {date_str}")
+        finally:
+            # Reset locale to the default setting
+            locale.setlocale(locale.LC_TIME, 'C')
+    else:
+        raise TypeError(f"Expected string, got {type(date_str).__name__}: {date_str}")
+
+
+# Ensure the column 'Datum S+' exists and is processed correctly
+if 'Datum S+' in df.columns:
+    try:
+        # Convert 'Datum S+' column to datetime using the custom parser
+        df['Datum S+'] = df['Datum S+'].apply(parse_custom_date)
+    except (ValueError, TypeError) as e:
+        print(f"Error: {e}")
+        # Optionally, re-raise the exception if you want to stop execution
+        raise
+
+
+    # Function to update Lesweek based on date ranges
+    def update_lesweek(date):
+        if pd.isna(date):  # Handle NaT values
+            return 0
+        for date_range, lesweek_value in date_ranges.items():
+            if date_range[0] <= date <= date_range[1]:
+                return lesweek_value
+        return 0  # Default value if date doesn't fall in any range
+
+
+    # Apply the function to 'Datum S+' column
+    df['Lesweek'] = df['Datum S+'].apply(update_lesweek)
+
+# Check the results
+print("\nFirst few rows of the DataFrame to verify date formatting:\n", df.head())
+
+# If needed, you can save the DataFrame to a new Excel file to verify changes
+df.to_excel('updated_rooster.xlsx', index=False)
--- a/studieprogramma's/login.py
+++ b/studieprogramma's/login.py
@@ -0,0 +1,63 @@
+import asyncio
+from pyppeteer import launch
+import logging
+
+logging.basicConfig(level=logging.INFO)
+
+
+async def crawl(url):
+    try:
+        # Launch a new Chromium browser with a visible window
+        print('browser launching')
+        browser = await launch(headless=False)
+        # Open a new page
+        page = await browser.newPage()
+        print('browser opened')
+
+        try:
+            # Navigate to the specified URL
+            await page.goto(url)
+            logging.info(f"Accessed {url}")
+        except Exception as e:
+            logging.error(f"Failed to navigate to {url}: {e}")
+            await browser.close()
+            return
+
+        try:
+            # Wait for the page to fully load
+            await page.waitForSelector('body')
+        except Exception as e:
+            logging.error(f"Failed to load the page properly: {e}")
+            await browser.close()
+            return
+
+        try:
+            # Extract the content of the page
+            content = await page.content()
+            # (Optional) Extract and print all links as an example
+            links = await page.evaluate('''() => {
+                return Array.from(document.querySelectorAll('a')).map(link => ({
+                    text: link.innerText,
+                    url: link.href
+                }));
+            }''')
+
+            for link in links:
+                print(f"Link text: {link['text']}, URL: {link['url']}")
+
+        except Exception as e:
+            logging.error(f"Error extracting or processing the content: {e}")
+
+        finally:
+            # Ensure the browser closes after execution
+            await browser.close()
+
+    except Exception as e:
+        logging.critical(f"Critical error occurred: {e}")
+
+
+# Specify the URL of the web page you want to crawl
+url = 'https://www.google.com/'
+
+# Run the crawl function
+asyncio.get_event_loop().run_until_complete(crawl(url))
--- a/studieprogramma's/main.py
+++ b/studieprogramma's/main.py