first commit

2024-11-18 11:59:21 +01:00
parent d51c60d76d
commit e3e65a9c51
12 changed files with 1997 additions and 0 deletions
--- a/random/script
+++ b/random/script
@@ -0,0 +1,131 @@
+import pandas as pd
+from openpyxl import load_workbook
+from dateutil import parser
+import re
+
+
+
+def list_sheets(file):
+    try:
+        workbook = load_workbook(filename=file, read_only=True)
+        sheets = workbook.sheetnames
+        return sheets
+    except Exception as e:
+        print(e)
+        raise ValueError(f"Could not open the file '{file}'. Please check the file and try again.")
+
+
+def dutch_date_parser(date_str):
+    # Remove Dutch day names
+    day_name_pattern = r'\b(maandag|dinsdag|woensdag|donderdag|vrijdag|zaterdag|zondag)\b'
+    date_str = re.sub(day_name_pattern, '', date_str, flags=re.IGNORECASE).strip()
+
+    # Translate Dutch month names to English
+    month_translation = {
+        'januari': 'January', 'februari': 'February', 'maart': 'March',
+        'april': 'April', 'mei': 'May', 'juni': 'June', 'juli': 'July',
+        'augustus': 'August', 'september': 'September', 'oktober': 'October',
+        'november': 'November', 'december': 'December'
+    }
+
+    for dutch_month, english_month in month_translation.items():
+        date_str = re.sub(r'\b' + dutch_month + r'\b', english_month, date_str, flags=re.IGNORECASE)
+
+    # Try parsing the modified date string
+    try:
+        return parser.parse(date_str, dayfirst=True)
+    except ValueError:
+        return pd.NaT
+
+def compare_roosters(base_file, comparison_file, output_file):
+    # Print the sheets available in both Excel files
+    base_sheets = list_sheets(base_file)
+    comparison_sheets = list_sheets(comparison_file)
+    print(f"Sheets in '{base_file}': {base_sheets}")
+    print(f"Sheets in '{comparison_file}': {comparison_sheets}")
+
+    # Function to load an Excel file with error handling
+    def load_excel(file):
+        try:
+            df = pd.read_excel(file, engine='openpyxl')
+            if df.empty:
+                raise ValueError(f"The file '{file}' has no sheets or is empty.")
+            return df
+        except Exception as e:
+            print(e)
+            raise ValueError(f"Could not load the file '{file}'. Please check the file and try again.")
+
+    # Load the Excel files
+    base_df = load_excel(base_file)
+    comparison_df = load_excel(comparison_file)
+
+    # Ensure the columns we need are present in both files
+    required_columns = ['Code examenrooster', 'Beginuur S+', 'Datum S+', 'Einduur S+']
+    for column in required_columns:
+        if column not in base_df.columns or column not in comparison_df.columns:
+            raise ValueError(f"Column '{column}' is missing from one of the files")
+
+     # Convert 'Datum S+' in comparison_df to the universal format
+    comparison_df['Datum S+'] = comparison_df['Datum S+'].apply(
+        lambda x: dutch_date_parser(x) if isinstance(x, str) else x
+    )
+
+    # Merge the dataframes on 'Code examenrooster' to compare the rows with matching codes
+    merged_df = base_df.merge(
+        comparison_df,
+        on='Code examenrooster',
+        suffixes=('_base', '_comp'),
+        how='outer',  # Outer join to capture all differences
+        indicator=True  # Indicator to show if the row was in one or both files
+    )
+
+    # Create an empty list to store rows with differences
+    differences = []
+
+    # Iterate over each row to find discrepancies
+    for _, row in merged_df.iterrows():
+        row_data = {}
+        # Only compare rows that exist in both files
+        if row['_merge'] == 'both':
+            differences_in_row = []
+
+            # Compare the columns
+            for column in ['Beginuur S+', 'Datum S+', 'Einduur S+']:
+                base_value = row.get(f"{column}_base", pd.NA)
+                comp_value = row.get(f"{column}_comp", pd.NA)
+
+                if pd.isna(base_value) and pd.isna(comp_value):
+                    continue  # Skip comparison if both are NaN
+                elif base_value != comp_value:
+                    differences_in_row.append(f"{column} differs (Base: {base_value}, Comp: {comp_value})")
+
+            # If there are any differences in this row, add them to the differences list
+            if differences_in_row:
+                for col in required_columns:
+                    row_data[col] = row.get(col, pd.NA)
+                    row_data[f"{col}_comp"] = row.get(f"{col}_comp", pd.NA)
+                row_data['Difference'] = "; ".join(differences_in_row)
+                differences.append(row_data)
+
+        elif row['_merge'] == 'left_only':
+            differences.append({
+                'Code examenrooster': row['Code examenrooster'],
+                'Difference': "Row missing in comparison file"
+            })
+        elif row['_merge'] == 'right_only':
+            differences.append({
+                'Code examenrooster': row['Code examenrooster'],
+                'Difference': "Row missing in base file"
+            })
+
+    # Create a DataFrame from the differences list
+    differences_df = pd.DataFrame(differences)
+
+    # Save the differences to an Excel file
+    differences_df.to_excel(output_file, index=False)
+
+    print(f"Differences saved to {output_file}")
+
+
+# Example usage:
+compare_roosters('afgewerkte.xlsx', 'bages rooster voor s.xlsx', 'differences_output.xlsx')
--- a/random/script.py
+++ b/random/script.py
@@ -0,0 +1,66 @@
+import pandas as pd
+from datetime import datetime
+import locale
+
+file_path = 'bages rooster voor s.xlsx'
+sheet_name = 'rooster'
+
+df = pd.read_excel(file_path, sheet_name=sheet_name)
+
+date_ranges = {
+    (pd.Timestamp('2025-01-06'), pd.Timestamp('2025-01-12')): 16,
+    (pd.Timestamp('2025-01-13'), pd.Timestamp('2025-01-19')): 17,
+    (pd.Timestamp('2025-01-20'), pd.Timestamp('2025-01-26')): 18,
+    (pd.Timestamp('2025-01-27'), pd.Timestamp('2025-02-02')): 19,
+
+    # add more ranges as needed
+}
+
+
+# Custom date parser function
+def parse_custom_date(date_str):
+    if pd.isna(date_str):
+        return pd.NaT  # Return pandas NaT for missing dates
+    if isinstance(date_str, str):
+        try:
+            # Set locale to Dutch
+            locale.setlocale(locale.LC_TIME, 'nl_NL.UTF-8')
+            return datetime.strptime(date_str, '%A %d %B %Y')
+        except ValueError as e:
+            raise ValueError(f"Date conversion error: {e} for date string: {date_str}")
+        finally:
+            # Reset locale to the default setting
+            locale.setlocale(locale.LC_TIME, 'C')
+    else:
+        raise TypeError(f"Expected string, got {type(date_str).__name__}: {date_str}")
+
+
+# Ensure the column 'Datum S+' exists and is processed correctly
+if 'Datum S+' in df.columns:
+    try:
+        # Convert 'Datum S+' column to datetime using the custom parser
+        df['Datum S+'] = df['Datum S+'].apply(parse_custom_date)
+    except (ValueError, TypeError) as e:
+        print(f"Error: {e}")
+        # Optionally, re-raise the exception if you want to stop execution
+        raise
+
+
+    # Function to update Lesweek based on date ranges
+    def update_lesweek(date):
+        if pd.isna(date):  # Handle NaT values
+            return 0
+        for date_range, lesweek_value in date_ranges.items():
+            if date_range[0] <= date <= date_range[1]:
+                return lesweek_value
+        return 0  # Default value if date doesn't fall in any range
+
+
+    # Apply the function to 'Datum S+' column
+    df['Lesweek'] = df['Datum S+'].apply(update_lesweek)
+
+# Check the results
+print("\nFirst few rows of the DataFrame to verify date formatting:\n", df.head())
+
+# If needed, you can save the DataFrame to a new Excel file to verify changes
+df.to_excel('updated_rooster.xlsx', index=False)