first commit
This commit is contained in:
131
random/script 2.py
Normal file
131
random/script 2.py
Normal file
@@ -0,0 +1,131 @@
|
||||
import pandas as pd
|
||||
from openpyxl import load_workbook
|
||||
from dateutil import parser
|
||||
import re
|
||||
|
||||
|
||||
|
||||
def list_sheets(file):
|
||||
try:
|
||||
workbook = load_workbook(filename=file, read_only=True)
|
||||
sheets = workbook.sheetnames
|
||||
return sheets
|
||||
except Exception as e:
|
||||
print(e)
|
||||
raise ValueError(f"Could not open the file '{file}'. Please check the file and try again.")
|
||||
|
||||
|
||||
def dutch_date_parser(date_str):
|
||||
# Remove Dutch day names
|
||||
day_name_pattern = r'\b(maandag|dinsdag|woensdag|donderdag|vrijdag|zaterdag|zondag)\b'
|
||||
date_str = re.sub(day_name_pattern, '', date_str, flags=re.IGNORECASE).strip()
|
||||
|
||||
# Translate Dutch month names to English
|
||||
month_translation = {
|
||||
'januari': 'January', 'februari': 'February', 'maart': 'March',
|
||||
'april': 'April', 'mei': 'May', 'juni': 'June', 'juli': 'July',
|
||||
'augustus': 'August', 'september': 'September', 'oktober': 'October',
|
||||
'november': 'November', 'december': 'December'
|
||||
}
|
||||
|
||||
for dutch_month, english_month in month_translation.items():
|
||||
date_str = re.sub(r'\b' + dutch_month + r'\b', english_month, date_str, flags=re.IGNORECASE)
|
||||
|
||||
# Try parsing the modified date string
|
||||
try:
|
||||
return parser.parse(date_str, dayfirst=True)
|
||||
except ValueError:
|
||||
return pd.NaT
|
||||
|
||||
def compare_roosters(base_file, comparison_file, output_file):
|
||||
# Print the sheets available in both Excel files
|
||||
base_sheets = list_sheets(base_file)
|
||||
comparison_sheets = list_sheets(comparison_file)
|
||||
print(f"Sheets in '{base_file}': {base_sheets}")
|
||||
print(f"Sheets in '{comparison_file}': {comparison_sheets}")
|
||||
|
||||
# Function to load an Excel file with error handling
|
||||
def load_excel(file):
|
||||
try:
|
||||
df = pd.read_excel(file, engine='openpyxl')
|
||||
if df.empty:
|
||||
raise ValueError(f"The file '{file}' has no sheets or is empty.")
|
||||
return df
|
||||
except Exception as e:
|
||||
print(e)
|
||||
raise ValueError(f"Could not load the file '{file}'. Please check the file and try again.")
|
||||
|
||||
# Load the Excel files
|
||||
base_df = load_excel(base_file)
|
||||
comparison_df = load_excel(comparison_file)
|
||||
|
||||
# Ensure the columns we need are present in both files
|
||||
required_columns = ['Code examenrooster', 'Beginuur S+', 'Datum S+', 'Einduur S+']
|
||||
for column in required_columns:
|
||||
if column not in base_df.columns or column not in comparison_df.columns:
|
||||
raise ValueError(f"Column '{column}' is missing from one of the files")
|
||||
|
||||
# Convert 'Datum S+' in comparison_df to the universal format
|
||||
comparison_df['Datum S+'] = comparison_df['Datum S+'].apply(
|
||||
lambda x: dutch_date_parser(x) if isinstance(x, str) else x
|
||||
)
|
||||
|
||||
# Merge the dataframes on 'Code examenrooster' to compare the rows with matching codes
|
||||
merged_df = base_df.merge(
|
||||
comparison_df,
|
||||
on='Code examenrooster',
|
||||
suffixes=('_base', '_comp'),
|
||||
how='outer', # Outer join to capture all differences
|
||||
indicator=True # Indicator to show if the row was in one or both files
|
||||
)
|
||||
|
||||
# Create an empty list to store rows with differences
|
||||
differences = []
|
||||
|
||||
# Iterate over each row to find discrepancies
|
||||
for _, row in merged_df.iterrows():
|
||||
row_data = {}
|
||||
# Only compare rows that exist in both files
|
||||
if row['_merge'] == 'both':
|
||||
differences_in_row = []
|
||||
|
||||
# Compare the columns
|
||||
for column in ['Beginuur S+', 'Datum S+', 'Einduur S+']:
|
||||
base_value = row.get(f"{column}_base", pd.NA)
|
||||
comp_value = row.get(f"{column}_comp", pd.NA)
|
||||
|
||||
if pd.isna(base_value) and pd.isna(comp_value):
|
||||
continue # Skip comparison if both are NaN
|
||||
elif base_value != comp_value:
|
||||
differences_in_row.append(f"{column} differs (Base: {base_value}, Comp: {comp_value})")
|
||||
|
||||
# If there are any differences in this row, add them to the differences list
|
||||
if differences_in_row:
|
||||
for col in required_columns:
|
||||
row_data[col] = row.get(col, pd.NA)
|
||||
row_data[f"{col}_comp"] = row.get(f"{col}_comp", pd.NA)
|
||||
row_data['Difference'] = "; ".join(differences_in_row)
|
||||
differences.append(row_data)
|
||||
|
||||
elif row['_merge'] == 'left_only':
|
||||
differences.append({
|
||||
'Code examenrooster': row['Code examenrooster'],
|
||||
'Difference': "Row missing in comparison file"
|
||||
})
|
||||
elif row['_merge'] == 'right_only':
|
||||
differences.append({
|
||||
'Code examenrooster': row['Code examenrooster'],
|
||||
'Difference': "Row missing in base file"
|
||||
})
|
||||
|
||||
# Create a DataFrame from the differences list
|
||||
differences_df = pd.DataFrame(differences)
|
||||
|
||||
# Save the differences to an Excel file
|
||||
differences_df.to_excel(output_file, index=False)
|
||||
|
||||
print(f"Differences saved to {output_file}")
|
||||
|
||||
|
||||
# Example usage:
|
||||
compare_roosters('afgewerkte.xlsx', 'bages rooster voor s.xlsx', 'differences_output.xlsx')
|
||||
66
random/script.py
Normal file
66
random/script.py
Normal file
@@ -0,0 +1,66 @@
|
||||
import pandas as pd
|
||||
from datetime import datetime
|
||||
import locale
|
||||
|
||||
file_path = 'bages rooster voor s.xlsx'
|
||||
sheet_name = 'rooster'
|
||||
|
||||
df = pd.read_excel(file_path, sheet_name=sheet_name)
|
||||
|
||||
date_ranges = {
|
||||
(pd.Timestamp('2025-01-06'), pd.Timestamp('2025-01-12')): 16,
|
||||
(pd.Timestamp('2025-01-13'), pd.Timestamp('2025-01-19')): 17,
|
||||
(pd.Timestamp('2025-01-20'), pd.Timestamp('2025-01-26')): 18,
|
||||
(pd.Timestamp('2025-01-27'), pd.Timestamp('2025-02-02')): 19,
|
||||
|
||||
# add more ranges as needed
|
||||
}
|
||||
|
||||
|
||||
# Custom date parser function
|
||||
def parse_custom_date(date_str):
|
||||
if pd.isna(date_str):
|
||||
return pd.NaT # Return pandas NaT for missing dates
|
||||
if isinstance(date_str, str):
|
||||
try:
|
||||
# Set locale to Dutch
|
||||
locale.setlocale(locale.LC_TIME, 'nl_NL.UTF-8')
|
||||
return datetime.strptime(date_str, '%A %d %B %Y')
|
||||
except ValueError as e:
|
||||
raise ValueError(f"Date conversion error: {e} for date string: {date_str}")
|
||||
finally:
|
||||
# Reset locale to the default setting
|
||||
locale.setlocale(locale.LC_TIME, 'C')
|
||||
else:
|
||||
raise TypeError(f"Expected string, got {type(date_str).__name__}: {date_str}")
|
||||
|
||||
|
||||
# Ensure the column 'Datum S+' exists and is processed correctly
|
||||
if 'Datum S+' in df.columns:
|
||||
try:
|
||||
# Convert 'Datum S+' column to datetime using the custom parser
|
||||
df['Datum S+'] = df['Datum S+'].apply(parse_custom_date)
|
||||
except (ValueError, TypeError) as e:
|
||||
print(f"Error: {e}")
|
||||
# Optionally, re-raise the exception if you want to stop execution
|
||||
raise
|
||||
|
||||
|
||||
# Function to update Lesweek based on date ranges
|
||||
def update_lesweek(date):
|
||||
if pd.isna(date): # Handle NaT values
|
||||
return 0
|
||||
for date_range, lesweek_value in date_ranges.items():
|
||||
if date_range[0] <= date <= date_range[1]:
|
||||
return lesweek_value
|
||||
return 0 # Default value if date doesn't fall in any range
|
||||
|
||||
|
||||
# Apply the function to 'Datum S+' column
|
||||
df['Lesweek'] = df['Datum S+'].apply(update_lesweek)
|
||||
|
||||
# Check the results
|
||||
print("\nFirst few rows of the DataFrame to verify date formatting:\n", df.head())
|
||||
|
||||
# If needed, you can save the DataFrame to a new Excel file to verify changes
|
||||
df.to_excel('updated_rooster.xlsx', index=False)
|
||||
Reference in New Issue
Block a user