Refactor code structure and remove redundant code blocks
This commit is contained in:
@@ -181,22 +181,49 @@ def check_students_with_mismatching_SP_values(predelib_df: pd.DataFrame) -> List
|
||||
|
||||
logger.info("All required columns found in dataframe")
|
||||
|
||||
# Check for mismatching SP values
|
||||
mismatching_students = []
|
||||
for index, row in predelib_df.iterrows():
|
||||
if row['Totaal aantal SP'] != row['Aantal SP vereist']:
|
||||
mismatching_students.append({
|
||||
'ID': row['ID'],
|
||||
'Achternaam': row['Achternaam'],
|
||||
'Voornaam': row['Voornaam'],
|
||||
'E-mail': row['E-mail'],
|
||||
'Totaal_aantal_SP': row['Totaal aantal SP'],
|
||||
'Aantal_SP_vereist': row['Aantal SP vereist'],
|
||||
'Waarschuwing': row['Waarschuwing'],
|
||||
'Adviesrapport_code': row['Adviesrapport code']
|
||||
})
|
||||
# Use vectorized comparison to find rows where the SP values differ
|
||||
sp_col = predelib_df['Totaal aantal SP']
|
||||
req_col = predelib_df['Aantal SP vereist']
|
||||
|
||||
logger.info(f"Found {len(mismatching_students)} students with mismatching SP values")
|
||||
# Simple inequality works for most cases; NaN != NaN will be True which is acceptable
|
||||
mask = sp_col != req_col
|
||||
mismatches_df = predelib_df[mask].copy()
|
||||
|
||||
logger.info(f"Found {len(mismatches_df)} raw rows with mismatching SP values")
|
||||
|
||||
if mismatches_df.empty:
|
||||
logger.info("No students with mismatching SP values found")
|
||||
return []
|
||||
|
||||
# Keep only unique students by 'ID' (first occurrence).
|
||||
if 'ID' in mismatches_df.columns:
|
||||
before_dedup = len(mismatches_df)
|
||||
mismatches_df = mismatches_df.drop_duplicates(subset=['ID'])
|
||||
after_dedup = len(mismatches_df)
|
||||
logger.info(f"Reduced from {before_dedup} rows to {after_dedup} unique students by ID")
|
||||
else:
|
||||
logger.warning("Column 'ID' not found - cannot deduplicate by student ID")
|
||||
|
||||
# Ensure optional columns exist to avoid KeyError when building dicts
|
||||
for optional_col in ('Waarschuwing', 'Adviesrapport code'):
|
||||
if optional_col not in mismatches_df.columns:
|
||||
mismatches_df[optional_col] = None
|
||||
|
||||
# Build the list of mismatching students
|
||||
mismatching_students = []
|
||||
for _, row in mismatches_df.iterrows():
|
||||
mismatching_students.append({
|
||||
'ID': row.get('ID'),
|
||||
'Achternaam': row.get('Achternaam'),
|
||||
'Voornaam': row.get('Voornaam'),
|
||||
'E-mail': row.get('E-mail'),
|
||||
'Totaal_aantal_SP': row.get('Totaal aantal SP'),
|
||||
'Aantal_SP_vereist': row.get('Aantal SP vereist'),
|
||||
'Waarschuwing': row.get('Waarschuwing'),
|
||||
'Adviesrapport_code': row.get('Adviesrapport code')
|
||||
})
|
||||
|
||||
logger.info(f"Returning {len(mismatching_students)} unique students with mismatching SP values")
|
||||
return mismatching_students
|
||||
|
||||
except Exception as e:
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,4 +0,0 @@
|
||||
extracurriculaire vakken komen niet uit de wizard. aparte excel voor nodig, nl dashboard inschrijvingen.
|
||||
Voor de check of ze extra keuzevakken opnemen en de vereiste dus hoger moet komen te staan: vgl de kollomen Totaal aantal SP Aantal SP vereist
|
||||
|
||||
deze kolom moet ook worden gechecked als ze eigenlijk minder opnemen; dus die moeten altijd aan elkaar gelijk zijn.
|
||||
|
||||
Reference in New Issue
Block a user