first commit

2024-11-18 11:59:21 +01:00 · 2024-11-18 11:59:21 +01:00 · e3e65a9c51
commit e3e65a9c51
parent d51c60d76d
12 changed files with 1997 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,11 @@
 # Ignore .idea directories
 *.idea/
 # Ignore compiled Python files
 *.pyc
 __pycache__/
 # Ignore Excel files
 *.xlsx
 sisa_crawl/
--- a/dubbels/script.py
+++ b/dubbels/script.py
@ -0,0 +1,20 @@
 import pandas as pd
 #variables
 file_path = 'ps (30).xlsx'
 sheet_name = 'ps (30)'
 column_name = 'Student-ID'
 df = pd.read_excel(file_path, sheet_name=sheet_name)
 duplicate_ids= df[df.duplicated(subset=[column_name], keep=False)][column_name]
 unique_duplicate_ids = duplicate_ids.drop_duplicates()
 num_duplicates = len(unique_duplicate_ids)
 if not unique_duplicate_ids.empty:
    print(f"Duplicated Student-ID values (count: {num_duplicates})  :")
    print(unique_duplicate_ids)
 else:
    print("No duplicates found.")
--- a/lokalen/main.py
+++ b/lokalen/main.py
@ -0,0 +1,18 @@
 import pandas as pd
 file_path = 'file.xlsx'
 sheet_name = 'ps (32)'
 df = pd.read_excel(file_path, sheet_name=sheet_name)
 filtered_df = df[df['Examenvorm'] == 'Schriftelijk' ]
 filtered_df = filtered_df[filtered_df['Aant. inschr.'] > 65]
 filtered_df = filtered_df[['Datum S+','Beginuur S+','Einduur S+', 'Studiegidsnr.', 'Omschrijving', 'Docenten', 'Aant. inschr.']]
 #formatting the timestrings
 filtered_df['Beginuur S+'] = filtered_df['Beginuur S+'].apply(lambda x: x.strftime('%H:%M'))
 filtered_df['Einduur S+'] = filtered_df['Einduur S+'].apply(lambda x: x.strftime('%H:%M'))
 filtered_df['Docenten'] = filtered_df['Docenten'].str.replace(r'\b(Titularis|Co-Titularis|Medewerker)\b', '',
                                                              regex=True).str.strip()
 filtered_df.to_excel('filtered_grote_lokalen.xlsx', index=False)
--- a/(32)_files/filelist.xml
+++ b/(32)_files/filelist.xml
@ -0,0 +1,7 @@
 <xml xmlns:o="urn:schemas-microsoft-com:office:office">
 <o:MainFile HRef="../ps%20(32).xls"/>
 <o:File HRef="stylesheet.css"/>
 <o:File HRef="tabstrip.htm"/>
 <o:File HRef="sheet001.htm"/>
 <o:File HRef="filelist.xml"/>
 </xml>
--- a/(32)_files/sheet001.htm
+++ b/(32)_files/sheet001.htm
--- a/(32)_files/stylesheet.css
+++ b/(32)_files/stylesheet.css
@ -0,0 +1,175 @@
 tr
 	{mso-height-source:auto;}
 col
 	{mso-width-source:auto;}
 br
 	{mso-data-placement:same-cell;}
 .style0
 	{mso-number-format:General;
 	text-align:general;
 	vertical-align:bottom;
 	white-space:nowrap;
 	mso-rotate:0;
 	mso-background-source:auto;
 	mso-pattern:auto;
 	color:black;
 	font-size:11.0pt;
 	font-weight:400;
 	font-style:normal;
 	text-decoration:none;
 	font-family:Calibri, sans-serif;
 	mso-font-charset:0;
 	border:none;
 	mso-protection:locked visible;
 	mso-style-name:Normal;
 	mso-style-id:0;}
 td
 	{mso-style-parent:style0;
 	padding-top:1px;
 	padding-right:1px;
 	padding-left:1px;
 	mso-ignore:padding;
 	color:black;
 	font-size:11.0pt;
 	font-weight:400;
 	font-style:normal;
 	text-decoration:none;
 	font-family:Calibri, sans-serif;
 	mso-font-charset:0;
 	mso-number-format:General;
 	text-align:general;
 	vertical-align:bottom;
 	border:none;
 	mso-background-source:auto;
 	mso-pattern:auto;
 	mso-protection:locked visible;
 	white-space:nowrap;
 	mso-rotate:0;}
 .xl65
 	{mso-style-parent:style0;
 	font-size:10.0pt;
 	font-weight:700;
 	text-align:center;
 	vertical-align:middle;
 	border:.5pt solid black;
 	white-space:normal;}
 .xl66
 	{mso-style-parent:style0;
 	border:.5pt solid black;
 	white-space:normal;}
 .xl67
 	{mso-style-parent:style0;
 	font-size:10.0pt;
 	border:.5pt solid black;
 	white-space:normal;}
 .xl68
 	{mso-style-parent:style0;
 	font-size:10.0pt;
 	mso-number-format:"Short Date";
 	border:.5pt solid black;
 	white-space:normal;}
 .xl69
 	{mso-style-parent:style0;
 	font-size:10.0pt;
 	mso-number-format:"Short Time";
 	border:.5pt solid black;
 	white-space:normal;}
 .xl70
 	{mso-style-parent:style0;
 	font-size:10.0pt;
 	border-top:.5pt solid black;
 	border-right:.5pt solid black;
 	border-bottom:none;
 	border-left:.5pt solid black;
 	white-space:normal;}
 .xl71
 	{mso-style-parent:style0;
 	font-size:10.0pt;
 	border-top:none;
 	border-right:.5pt solid black;
 	border-bottom:none;
 	border-left:.5pt solid black;
 	white-space:normal;}
 .xl72
 	{mso-style-parent:style0;
 	font-size:10.0pt;
 	border-top:none;
 	border-right:.5pt solid black;
 	border-bottom:.5pt solid black;
 	border-left:.5pt solid black;
 	white-space:normal;}
 .xl73
 	{mso-style-parent:style0;
 	border-top:.5pt solid black;
 	border-right:.5pt solid black;
 	border-bottom:none;
 	border-left:.5pt solid black;
 	white-space:normal;}
 .xl74
 	{mso-style-parent:style0;
 	border-top:none;
 	border-right:.5pt solid black;
 	border-bottom:none;
 	border-left:.5pt solid black;
 	white-space:normal;}
 .xl75
 	{mso-style-parent:style0;
 	border-top:none;
 	border-right:.5pt solid black;
 	border-bottom:.5pt solid black;
 	border-left:.5pt solid black;
 	white-space:normal;}
 .xl76
 	{mso-style-parent:style0;
 	font-size:10.0pt;
 	mso-number-format:"Short Date";
 	border-top:.5pt solid black;
 	border-right:.5pt solid black;
 	border-bottom:none;
 	border-left:.5pt solid black;
 	white-space:normal;}
 .xl77
 	{mso-style-parent:style0;
 	font-size:10.0pt;
 	mso-number-format:"Short Date";
 	border-top:none;
 	border-right:.5pt solid black;
 	border-bottom:none;
 	border-left:.5pt solid black;
 	white-space:normal;}
 .xl78
 	{mso-style-parent:style0;
 	font-size:10.0pt;
 	mso-number-format:"Short Date";
 	border-top:none;
 	border-right:.5pt solid black;
 	border-bottom:.5pt solid black;
 	border-left:.5pt solid black;
 	white-space:normal;}
 .xl79
 	{mso-style-parent:style0;
 	font-size:10.0pt;
 	mso-number-format:"Short Time";
 	border-top:.5pt solid black;
 	border-right:.5pt solid black;
 	border-bottom:none;
 	border-left:.5pt solid black;
 	white-space:normal;}
 .xl80
 	{mso-style-parent:style0;
 	font-size:10.0pt;
 	mso-number-format:"Short Time";
 	border-top:none;
 	border-right:.5pt solid black;
 	border-bottom:none;
 	border-left:.5pt solid black;
 	white-space:normal;}
 .xl81
 	{mso-style-parent:style0;
 	font-size:10.0pt;
 	mso-number-format:"Short Time";
 	border-top:none;
 	border-right:.5pt solid black;
 	border-bottom:.5pt solid black;
 	border-left:.5pt solid black;
 	white-space:normal;}
--- a/(32)_files/tabstrip.htm
+++ b/(32)_files/tabstrip.htm
@ -0,0 +1,32 @@
 <html>
 <head>
 <meta http-equiv=Content-Type content="text/html; charset=utf-8">
 <meta name=ProgId content=Excel.Sheet>
 <meta name=Generator content="Microsoft Excel 15">
 <link id=Main-File rel=Main-File href="../ps%20(32).xls">
 <script language="JavaScript">
 <!--
 if (window.name!="frTabs")
 window.location.replace(document.all.item("Main-File").href);
 //-->
 </script>
 <style>
 <!--
 A {
    text-decoration:none;
    color:#000000;
    font-size:9pt;
 }
 -->
 </style>
 </head>
 <body topmargin=0 leftmargin=0 bgcolor="#808080">
 <table border=0 cellspacing=1>
 <tr>
 <td bgcolor="#FFFFFF" nowrap><b><small><small>&nbsp;<a href="sheet001.htm" target="frSheet"><font face="Arial" color="#000000">ps (32)</font></a>&nbsp;</small></small></b></td>
 </tr>
 </table>
 </body>
 </html>
--- a/generator/script.py
+++ b/generator/script.py
@ -0,0 +1,78 @@
 import pandas as pd
 def read_excel_file(file_path):
    """Read the Excel file and return a DataFrame."""
    try:
        return pd.read_excel(file_path)
    except Exception as e:
        print(f"Error reading the Excel file: {e}")
        return None
 def filter_studiegidsnummer(df):
    """Filter rows where 'studiegidsnummer' contains 'GES'."""
    if 'Studiegidsnummer' not in df.columns:
        print("Column 'studiegidsnummer' not found in the DataFrame.")
        print("Available columns:", df.columns)
        return pd.DataFrame()  # Return an empty DataFrame
    return df[df['Studiegidsnummer'].str.contains('GES', na=False)].copy()
 def filter_opmerkingen(df):
    """Filter rows where 'Opmerkingen' does NOT contain '24-25'."""
    if 'Opmerkingen' not in df.columns:
        print("Column 'Opmerkingen' not found in the DataFrame.")
        print("Available columns:", df.columns)
        return pd.DataFrame()  # Return an empty DataFrame
    return df[~df['Opmerkingen'].str.contains('24-25', na=False)].copy()
 def create_message_column(df):
    """Create 'Message' and 'subject' columns with the specified format."""
    df.loc[:, 'Message'] = df.apply(lambda row: (
        f"Beste docent,\n\n"
        f"Ik ben de examengegevens aan het controleren van {row['Omschrijving']} {row['Studiegidsnummer']}. De huidige gegevens zijn als volgt:\n\n"
        f"{row['Examenvorm']} examen voor zowel eerste als tweede zit, {row['Examenduur']} minuten, tussen {row['Beginuur voormiddag']} en {row['Einduur voormiddag']} of {row['Beginuur namiddag']} en {row['Einduur namiddag']}.\n\n"
        f"Gelden dezelfde gegevens voor dit academiejaar of moeten er nog wijzigingen doorgevoerd worden? Alvast dank voor je reactie!"
    ), axis=1)
    df.loc[:, 'subject'] = df.apply(lambda row: (
        f"Examengegevens {row['Omschrijving']} {row['Studiegidsnummer']}"
    ), axis=1)
    return df
 def save_to_excel(df, output_file_path):
    """Save the DataFrame to a new Excel file."""
    try:
        df.to_excel(output_file_path, index=False)
    except Exception as e:
        print(f"Error saving the Excel file: {e}")
 def convert_time_format(time_str):
    """Convert time from 'HH:MM:SS' to 'HH:MM'."""
    try:
        return pd.to_datetime(time_str).strftime('%H:%M')
    except Exception as e:
        print(f"Error converting time format: {e}")
        return time_str
 def apply_time_format_conversion(df, columns):
    """Apply time format conversion to specified columns in the DataFrame."""
    for column in columns:
        df[column] = pd.to_datetime(df[column], format='%H:%M:%S', errors='coerce').dt.strftime('%H:%M')
    return df
 # Example usage within the main function
 def main():
    file_path = 'examengegevens2425.xlsx'
    output_file_path = 'filtered_examengegevens2425.xlsx'
    df = read_excel_file(file_path)
    if df is not None:
        filtered_df = filter_studiegidsnummer(df)
        if not filtered_df.empty:
            final_filtered_df = filter_opmerkingen(filtered_df)
            # Convert time format for specified columns
            time_columns = ['Beginuur voormiddag', 'Einduur voormiddag', 'Beginuur namiddag', 'Einduur namiddag']
            final_filtered_df = apply_time_format_conversion(final_filtered_df, time_columns)
            final_filtered_df = create_message_column(final_filtered_df)
            save_to_excel(final_filtered_df, output_file_path)
 if __name__ == "__main__":
    main()
--- a/random/script
+++ b/random/script
@ -0,0 +1,131 @@
 import pandas as pd
 from openpyxl import load_workbook
 from dateutil import parser
 import re
 def list_sheets(file):
    try:
        workbook = load_workbook(filename=file, read_only=True)
        sheets = workbook.sheetnames
        return sheets
    except Exception as e:
        print(e)
        raise ValueError(f"Could not open the file '{file}'. Please check the file and try again.")
 def dutch_date_parser(date_str):
    # Remove Dutch day names
    day_name_pattern = r'\b(maandag|dinsdag|woensdag|donderdag|vrijdag|zaterdag|zondag)\b'
    date_str = re.sub(day_name_pattern, '', date_str, flags=re.IGNORECASE).strip()
    # Translate Dutch month names to English
    month_translation = {
        'januari': 'January', 'februari': 'February', 'maart': 'March',
        'april': 'April', 'mei': 'May', 'juni': 'June', 'juli': 'July',
        'augustus': 'August', 'september': 'September', 'oktober': 'October',
        'november': 'November', 'december': 'December'
    }
    for dutch_month, english_month in month_translation.items():
        date_str = re.sub(r'\b' + dutch_month + r'\b', english_month, date_str, flags=re.IGNORECASE)
    # Try parsing the modified date string
    try:
        return parser.parse(date_str, dayfirst=True)
    except ValueError:
        return pd.NaT
 def compare_roosters(base_file, comparison_file, output_file):
    # Print the sheets available in both Excel files
    base_sheets = list_sheets(base_file)
    comparison_sheets = list_sheets(comparison_file)
    print(f"Sheets in '{base_file}': {base_sheets}")
    print(f"Sheets in '{comparison_file}': {comparison_sheets}")
    # Function to load an Excel file with error handling
    def load_excel(file):
        try:
            df = pd.read_excel(file, engine='openpyxl')
            if df.empty:
                raise ValueError(f"The file '{file}' has no sheets or is empty.")
            return df
        except Exception as e:
            print(e)
            raise ValueError(f"Could not load the file '{file}'. Please check the file and try again.")
    # Load the Excel files
    base_df = load_excel(base_file)
    comparison_df = load_excel(comparison_file)
    # Ensure the columns we need are present in both files
    required_columns = ['Code examenrooster', 'Beginuur S+', 'Datum S+', 'Einduur S+']
    for column in required_columns:
        if column not in base_df.columns or column not in comparison_df.columns:
            raise ValueError(f"Column '{column}' is missing from one of the files")
     # Convert 'Datum S+' in comparison_df to the universal format
    comparison_df['Datum S+'] = comparison_df['Datum S+'].apply(
        lambda x: dutch_date_parser(x) if isinstance(x, str) else x
    )
    # Merge the dataframes on 'Code examenrooster' to compare the rows with matching codes
    merged_df = base_df.merge(
        comparison_df,
        on='Code examenrooster',
        suffixes=('_base', '_comp'),
        how='outer',  # Outer join to capture all differences
        indicator=True  # Indicator to show if the row was in one or both files
    )
    # Create an empty list to store rows with differences
    differences = []
    # Iterate over each row to find discrepancies
    for _, row in merged_df.iterrows():
        row_data = {}
        # Only compare rows that exist in both files
        if row['_merge'] == 'both':
            differences_in_row = []
            # Compare the columns
            for column in ['Beginuur S+', 'Datum S+', 'Einduur S+']:
                base_value = row.get(f"{column}_base", pd.NA)
                comp_value = row.get(f"{column}_comp", pd.NA)
                if pd.isna(base_value) and pd.isna(comp_value):
                    continue  # Skip comparison if both are NaN
                elif base_value != comp_value:
                    differences_in_row.append(f"{column} differs (Base: {base_value}, Comp: {comp_value})")
            # If there are any differences in this row, add them to the differences list
            if differences_in_row:
                for col in required_columns:
                    row_data[col] = row.get(col, pd.NA)
                    row_data[f"{col}_comp"] = row.get(f"{col}_comp", pd.NA)
                row_data['Difference'] = "; ".join(differences_in_row)
                differences.append(row_data)
        elif row['_merge'] == 'left_only':
            differences.append({
                'Code examenrooster': row['Code examenrooster'],
                'Difference': "Row missing in comparison file"
            })
        elif row['_merge'] == 'right_only':
            differences.append({
                'Code examenrooster': row['Code examenrooster'],
                'Difference': "Row missing in base file"
            })
    # Create a DataFrame from the differences list
    differences_df = pd.DataFrame(differences)
    # Save the differences to an Excel file
    differences_df.to_excel(output_file, index=False)
    print(f"Differences saved to {output_file}")
 # Example usage:
 compare_roosters('afgewerkte.xlsx', 'bages rooster voor s.xlsx', 'differences_output.xlsx')
--- a/random/script.py
+++ b/random/script.py
@ -0,0 +1,66 @@
 import pandas as pd
 from datetime import datetime
 import locale
 file_path = 'bages rooster voor s.xlsx'
 sheet_name = 'rooster'
 df = pd.read_excel(file_path, sheet_name=sheet_name)
 date_ranges = {
    (pd.Timestamp('2025-01-06'), pd.Timestamp('2025-01-12')): 16,
    (pd.Timestamp('2025-01-13'), pd.Timestamp('2025-01-19')): 17,
    (pd.Timestamp('2025-01-20'), pd.Timestamp('2025-01-26')): 18,
    (pd.Timestamp('2025-01-27'), pd.Timestamp('2025-02-02')): 19,
    # add more ranges as needed
 }
 # Custom date parser function
 def parse_custom_date(date_str):
    if pd.isna(date_str):
        return pd.NaT  # Return pandas NaT for missing dates
    if isinstance(date_str, str):
        try:
            # Set locale to Dutch
            locale.setlocale(locale.LC_TIME, 'nl_NL.UTF-8')
            return datetime.strptime(date_str, '%A %d %B %Y')
        except ValueError as e:
            raise ValueError(f"Date conversion error: {e} for date string: {date_str}")
        finally:
            # Reset locale to the default setting
            locale.setlocale(locale.LC_TIME, 'C')
    else:
        raise TypeError(f"Expected string, got {type(date_str).__name__}: {date_str}")
 # Ensure the column 'Datum S+' exists and is processed correctly
 if 'Datum S+' in df.columns:
    try:
        # Convert 'Datum S+' column to datetime using the custom parser
        df['Datum S+'] = df['Datum S+'].apply(parse_custom_date)
    except (ValueError, TypeError) as e:
        print(f"Error: {e}")
        # Optionally, re-raise the exception if you want to stop execution
        raise
    # Function to update Lesweek based on date ranges
    def update_lesweek(date):
        if pd.isna(date):  # Handle NaT values
            return 0
        for date_range, lesweek_value in date_ranges.items():
            if date_range[0] <= date <= date_range[1]:
                return lesweek_value
        return 0  # Default value if date doesn't fall in any range
    # Apply the function to 'Datum S+' column
    df['Lesweek'] = df['Datum S+'].apply(update_lesweek)
 # Check the results
 print("\nFirst few rows of the DataFrame to verify date formatting:\n", df.head())
 # If needed, you can save the DataFrame to a new Excel file to verify changes
 df.to_excel('updated_rooster.xlsx', index=False)
--- a/studieprogramma's/login.py
+++ b/studieprogramma's/login.py
@ -0,0 +1,63 @@
 import asyncio
 from pyppeteer import launch
 import logging
 logging.basicConfig(level=logging.INFO)
 async def crawl(url):
    try:
        # Launch a new Chromium browser with a visible window
        print('browser launching')
        browser = await launch(headless=False)
        # Open a new page
        page = await browser.newPage()
        print('browser opened')
        try:
            # Navigate to the specified URL
            await page.goto(url)
            logging.info(f"Accessed {url}")
        except Exception as e:
            logging.error(f"Failed to navigate to {url}: {e}")
            await browser.close()
            return
        try:
            # Wait for the page to fully load
            await page.waitForSelector('body')
        except Exception as e:
            logging.error(f"Failed to load the page properly: {e}")
            await browser.close()
            return
        try:
            # Extract the content of the page
            content = await page.content()
            # (Optional) Extract and print all links as an example
            links = await page.evaluate('''() => {
                return Array.from(document.querySelectorAll('a')).map(link => ({
                    text: link.innerText,
                    url: link.href
                }));
            }''')
            for link in links:
                print(f"Link text: {link['text']}, URL: {link['url']}")
        except Exception as e:
            logging.error(f"Error extracting or processing the content: {e}")
        finally:
            # Ensure the browser closes after execution
            await browser.close()
    except Exception as e:
        logging.critical(f"Critical error occurred: {e}")
 # Specify the URL of the web page you want to crawl
 url = 'https://www.google.com/'
 # Run the crawl function
 asyncio.get_event_loop().run_until_complete(crawl(url))
--- a/studieprogramma's/main.py
+++ b/studieprogramma's/main.py