86 lines
3.4 KiB
Python
86 lines
3.4 KiB
Python
import pandas as pd
|
|
|
|
def check_headers_predelibfile(df):
|
|
# Check if the headers are already in the column names (first row)
|
|
if 'Achternaam' in df.columns and 'Voornaam' in df.columns:
|
|
print("Headers found in first row - file already processed, returning unchanged")
|
|
return df # Return the dataframe unchanged
|
|
else:
|
|
# Find the row index where 'Achternaam' and 'Voornaam' appear as headers
|
|
header_row = None
|
|
for i, row in df.iterrows():
|
|
if 'Achternaam' in row.values and 'Voornaam' in row.values:
|
|
header_row = i
|
|
break
|
|
|
|
if header_row is not None:
|
|
# Delete all rows before the header row
|
|
df = df.iloc[header_row:].reset_index(drop=True)
|
|
|
|
# Set the first row as column headers
|
|
df.columns = df.iloc[0]
|
|
df = df.iloc[1:].reset_index(drop=True)
|
|
|
|
# Define the columns to keep
|
|
columns_to_keep = [
|
|
'ID', 'Achternaam', 'Voornaam', 'E-mail', 'Loopbaan',
|
|
'Drempelteller omschrijving', 'Programma status omschrijving',
|
|
'OO Periode', 'OO Studiegidsnummer', 'OO Lange omschrijving',
|
|
'OO Eenheden', 'OO Sessie', 'OO Credit (Y/N)', 'OO Periode credit',
|
|
'OO Programma code', 'OO Programma korte omschr.', 'Totaal aantal SP',
|
|
'Aantal SP vereist', 'Aantal SP zonder VZP', 'Adviesrapport code',
|
|
'Waarschuwing', 'Lijsttype'
|
|
]
|
|
|
|
# Keep only the specified columns (only if they exist in the dataframe)
|
|
existing_columns = [col for col in columns_to_keep if col in df.columns]
|
|
df = df[existing_columns]
|
|
|
|
print(f"Deleted {header_row} rows, set proper headers, and kept {len(existing_columns)} columns")
|
|
return df
|
|
else:
|
|
print("Headers 'Achternaam' and 'Voornaam' not found in the file")
|
|
return df
|
|
|
|
def check_headers_dashboard_inschrijvingenfile(df):
|
|
# Check if the headers are already in the column names (first row)
|
|
if 'Naam' in df.columns and 'Voornaam' in df.columns:
|
|
print("Headers found in first row of dashboard_inschrijvingen - no need to search for header row")
|
|
header_row = -1 # Indicates headers are already set
|
|
else:
|
|
# Find the row index where 'Naam' and 'Voornaam' appear as headers
|
|
header_row = None
|
|
for i, row in df.iterrows():
|
|
if 'Naam' in row.values and 'Voornaam' in row.values:
|
|
header_row = i
|
|
break
|
|
|
|
if header_row is not None:
|
|
# Delete all rows before the header row
|
|
df = df.iloc[header_row:].reset_index(drop=True)
|
|
|
|
# Set the first row as column headers
|
|
df.columns = df.iloc[0]
|
|
df = df.iloc[1:].reset_index(drop=True)
|
|
|
|
if header_row is not None and header_row >= 0:
|
|
print(f"Deleted {header_row} rows in dashboard_file, set proper headers")
|
|
elif header_row == -1:
|
|
print(f"Headers were already correct in dashboard_file.")
|
|
|
|
return df
|
|
else:
|
|
print("Headers 'Achternaam' and 'Voornaam' not found in the file")
|
|
return df
|
|
|
|
|
|
if __name__ == "__main__":
|
|
# Read the Excel files
|
|
df_predelib = pd.read_excel('db.xlsx')
|
|
df_dashboard = pd.read_excel('dashboard_inschrijvingen.xlsx')
|
|
|
|
# Process the dataframes
|
|
processed_predelib_df = check_headers_predelibfile(df_predelib)
|
|
processed_dashboard_df = check_headers_dashboard_inschrijvingenfile(df_dashboard)
|
|
|