Refactor code structure and remove redundant code blocks

2025-10-21 14:20:04 +02:00
parent bcf8f3acae
commit 468df81386
10 changed files with 2054 additions and 19 deletions
--- a/tool/script.py
+++ b/tool/script.py
@@ -0,0 +1,326 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Update 'BAGES' sheet in 'ongeloofelijken tool.xlsx' with the latest (2025-2026) bachelor History
+study programme from UAntwerpen. It scrapes the official page and writes a normalized table.
+
+Source page (2025-2026 bachelor study programme):
+https://www.uantwerpen.be/nl/studeren/aanbod/alle-opleidingen/geschiedenis-studeren/bachelor/studieprogramma/
+- In 2025-2026 the 'Geschiedenis per periode en gebied' structure changed to a two-pillar model:
+  * Chronologische pijler: 3 OOs (middeleeuwen, nieuwe tijd, nieuwste tijd)
+  * Thematische pijler: 2 OOs
+(See faculty helpdesk note with change summary and transition measures.)
+
+IMPORTANT:
+- This script only updates the 'BAGES' (Bachelor) sheet, because the provided link covers the bachelor page.
+- 'MAGES' and 'SPVP' sheets remain untouched.
+
+Tested with: requests, beautifulsoup4, lxml, pandas, openpyxl
+"""
+
+import re
+import sys
+import time
+import urllib.parse
+from datetime import datetime
+from pathlib import Path
+from typing import Optional, Tuple
+
+import requests
+import pandas as pd
+from bs4 import BeautifulSoup
+from openpyxl import load_workbook
+from openpyxl.utils.exceptions import InvalidFileException
+
+
+# ------------------------- Configuration -------------------------
+EXCEL_PATH = "ongeloofelijken tool.xlsx"
+TARGET_SHEET = "BAGES"
+ARCHIVE_PREFIX = "BAGES_OLD_"
+UA_BA_URL = "https://www.uantwerpen.be/nl/studeren/aanbod/alle-opleidingen/geschiedenis-studeren/bachelor/studieprogramma/"
+TARGET_YEAR_PREFIX = "2025-"   # Anchor/course URLs have '?id=<year>-<code>'; we filter with '2025-'
+TIMEOUT = 30
+HEADERS = {
+    "User-Agent": "Mozilla/5.0 (compatible; uantwerpen-bages-updater/1.0; +https://www.uantwerpen.be/)",
+    "Accept-Language": "nl,en;q=0.8"
+}
+
+
+# ------------------------- Helpers -------------------------
+def fetch_html(url: str) -> BeautifulSoup:
+    resp = requests.get(url, headers=HEADERS, timeout=TIMEOUT)
+    resp.raise_for_status()
+    return BeautifulSoup(resp.text, "lxml")
+
+
+def extract_text(el) -> str:
+    return re.sub(r"\s+", " ", " ".join(el.stripped_strings)) if el else ""
+
+
+def parse_meta_from_block(block_text: str) -> Tuple[Optional[str], Optional[str], Optional[str], Optional[str]]:
+    """
+    Try to parse Semester, Credits, Lecturers, Notes from a block of text next to a course link.
+    Returns (semester, credits, lecturers, notes)
+    """
+    text = block_text
+
+    # Semester examples: '1E SEM', '2E SEM', '1E/2E SEM'
+    sem = None
+    m_sem = re.search(r"\b(1E\s*/\s*2E\s*SEM|1E\s*SEM|2E\s*SEM)\b", text, flags=re.I)
+    if m_sem:
+        sem = m_sem.group(1).upper().replace(" ", "")
+
+    # Credits examples: '6 studiepunten', '3 studiepunten'
+    credits = None
+    m_sp = re.search(r"(\d+)\s*studiepunten", text, flags=re.I)
+    if m_sp:
+        credits = m_sp.group(1)
+
+    # Lecturers: after 'Lesgever (s):' or 'Lesgever(s):'
+    lecturers = None
+    m_lect = re.search(r"Lesgever\s*\(s\)\s*:\s*([^|]+?)(?:\s{2,}|$)", text, flags=re.I)
+    if not m_lect:
+        m_lect = re.search(r"Lesgever[s]?\s*:\s*([^|]+?)(?:\s{2,}|$)", text, flags=re.I)
+    if m_lect:
+        lecturers = m_lect.group(1).strip(" .").replace(" ,", ",")
+
+    # Notes: look for two-yearly etc.
+    notes = None
+    if re.search(r"Tweejaarlijks", text, flags=re.I):
+        # Try to capture the "even/oneven" phrasing
+        m_ev = re.search(r"tweejaarlijks[^.]*?(even|oneven)[^.]*jaar", text, flags=re.I)
+        notes = "Tweejaarlijks" + (f" ({m_ev.group(1).lower()} jaar)" if m_ev else "")
+
+    return sem, credits, lecturers, notes
+
+
+def nearest_sections(a_tag) -> Tuple[Optional[str], Optional[str], Optional[str]]:
+    """
+    Find nearest preceding headings to classify the row.
+    Returns (section, subsection, pillar)
+    - section: e.g., 'Modeltraject deel 1/2/3'
+    - subsection: e.g., 'Wijsbegeerte en sociale wetenschappen', 'Keuzeopleidingsonderdelen', 'Inleiding tot de geschiedenis', etc.
+    - pillar: for deel 3: 'Chronologische pijler', 'Thematische pijler' or None
+    """
+    # The site uses a variety of headings (h2, h3, h4); we trace back to find labels
+    h = a_tag.find_previous(["h2", "h3", "h4", "h5"])
+    section = subsection = pillar = None
+
+    # Walk up multiple previous headings to capture a hierarchy
+    prev_heads = []
+    cur = a_tag
+    for _ in range(40):  # limit walk to avoid infinite loops
+        cur = cur.find_previous(["h2", "h3", "h4", "h5"])
+        if not cur:
+            break
+        txt = extract_text(cur)
+        prev_heads.append(txt)
+
+    # Determine labels from the nearest few headings
+    for txt in prev_heads:
+        t = txt.lower()
+        if section is None and "modeltraject deel" in t:
+            # Normalize like "Modeltraject deel 1"
+            section = txt
+        if subsection is None:
+            # Typical subsections
+            if any(k in t for k in [
+                "wijsbegeerte en sociale wetenschappen",
+                "methodologie van de geschiedenis",
+                "historische oefeningen",
+                "inleiding tot de geschiedenis",
+                "heuristiek",
+                "historisch overzicht",
+                "keuzeopleidingsonderdelen",
+                "sociale wetenschappen",
+            ]):
+                subsection = txt
+        if pillar is None and ("chronologische pijler" in t or "thematische pijler" in t):
+            pillar = txt
+
+        if section and (subsection or pillar):
+            # Good enough
+            break
+
+    return section, subsection, pillar
+
+
+def parse_courses_from_page(soup: BeautifulSoup) -> pd.DataFrame:
+    """
+    Parse all course links for the 2025-xxxx academic year, infer metadata from nearby text,
+    and return a normalized DataFrame.
+    """
+    rows = []
+
+    # Capture all anchors that look like course links containing '?id=2025-<CODE>'
+    for a in soup.find_all("a", href=True):
+        href = a["href"]
+        # Normalize relative links
+        full_url = urllib.parse.urljoin(UA_BA_URL, href)
+        # Filter by the 'id=2025-' parameter (2025-2026)
+        if "id=" in href:
+            q = urllib.parse.urlparse(href).query
+            params = urllib.parse.parse_qs(q)
+            ids = params.get("id", [])
+            if not ids:
+                continue
+            # Some pages use '2025-XXXXX' or '2025-XXXXX&lang=nl'
+            if not any(idv.startswith(TARGET_YEAR_PREFIX) for idv in ids):
+                continue
+            course_id = ids[0]  # e.g., '2025-1002FLWGES'
+        else:
+            # No id=... parameter; skip
+            continue
+
+        # Extract code after '2025-'
+        code = None
+        m = re.match(r"2025-([A-Za-z0-9]+)", course_id)
+        if m:
+            code = m.group(1)
+
+        name = extract_text(a).strip()
+        if not name or not code:
+            continue
+
+        # Use a reasonably large ancestor block for metadata search
+        container = a
+        for _ in range(4):
+            if container.parent:
+                container = container.parent
+        block_text = extract_text(container)
+
+        semester, credits, lecturers, notes = parse_meta_from_block(block_text)
+        section, subsection, pillar = nearest_sections(a)
+
+        rows.append({
+            "Section": section,
+            "Subsection": subsection,
+            "Pillar": pillar,
+            "Course Code": code,
+            "Course Name": name,
+            "URL": full_url,
+            "Semester": semester,
+            "Credits": credits,
+            "Lecturers": lecturers,
+            "Notes": notes
+        })
+
+    df = pd.DataFrame(rows).drop_duplicates(subset=["Course Code", "Course Name"])
+    # Keep only rows that clearly belong to the 'Bachelor' page; sometimes cross-links appear
+    # Heuristic: we keep rows with a Section that starts with "Modeltraject deel" or that have a Pillar marker
+    mask = (
+        df["Section"].fillna("").str.contains(r"Modeltraject deel", case=False) |
+        df["Pillar"].fillna("").str.contains(r"Pijler", case=False)
+    )
+    df = df[mask].copy()
+
+    # Clean up text for consistency
+    def clean_col(s):
+        return s.str.replace(r"\s+", " ", regex=True).str.strip()
+
+    for col in ["Section", "Subsection", "Pillar", "Course Name", "Lecturers", "Notes"]:
+        df[col] = clean_col(df[col].astype(str))
+
+    # Ensure missing pillar/subsection are empty strings for consistent sorting
+    df["Pillar"] = df["Pillar"].fillna("")
+    df["Subsection"] = df["Subsection"].fillna("")
+
+    # Sort for readability: section → pillar → subsection → name
+    df.sort_values(
+        by=["Section", "Pillar", "Subsection", "Course Name"],
+        inplace=True
+    )
+    df.reset_index(drop=True, inplace=True)
+    return df
+
+
+def archive_and_write(excel_path: str, df: pd.DataFrame, target_sheet: str):
+    """
+    - If sheet 'BAGES' exists, rename it to 'BAGES_OLD_YYYYMMDD'
+    - Write df to 'BAGES'
+    """
+    try:
+        wb = load_workbook(excel_path)
+    except FileNotFoundError:
+        print(f"[INFO] File not found, creating new workbook: {excel_path}")
+        # Write a new file straight away
+        with pd.ExcelWriter(excel_path, engine="openpyxl") as writer:
+            df.to_excel(writer, sheet_name=target_sheet, index=False)
+        return
+    except InvalidFileException:
+        print(f"[ERROR] Not a valid Excel file: {excel_path}")
+        sys.exit(1)
+
+    # Rename existing BAGES to archive
+    if target_sheet in wb.sheetnames:
+        date_suffix = datetime.now().strftime("%Y%m%d")
+        archive_name = ARCHIVE_PREFIX + date_suffix
+        # Ensure uniqueness (append a counter if necessary)
+        counter = 1
+        final_archive = archive_name
+        while final_archive in wb.sheetnames:
+            counter += 1
+            final_archive = f"{archive_name}_{counter}"
+        ws = wb[target_sheet]
+        ws.title = final_archive
+        print(f"[INFO] Archived existing '{target_sheet}' as '{final_archive}'")
+
+    # Save intermediate
+    wb.save(excel_path)
+
+    # Now write the new sheet
+    with pd.ExcelWriter(excel_path, engine="openpyxl", mode="a", if_sheet_exists="overlay") as writer:
+        df.to_excel(writer, sheet_name=target_sheet, index=False)
+
+    print(f"[SUCCESS] Wrote updated '{target_sheet}' sheet to '{excel_path}'")
+    # Also save the updated DataFrame to a separate new Excel file for convenience
+    try:
+        src = Path(excel_path)
+        new_name = src.with_name(f"{src.stem}_updated{src.suffix}")
+        # Write a fresh workbook containing only the updated sheet
+        with pd.ExcelWriter(str(new_name), engine="openpyxl") as writer:
+            df.to_excel(writer, sheet_name=target_sheet, index=False)
+        print(f"[INFO] Also wrote updated data to new file '{new_name}'")
+    except Exception as e:
+        print(f"[WARNING] Could not write updated copy to new file: {e}")
+
+
+def main():
+    print("[STEP 1] Fetching the bachelor study programme page (2025-2026)…")
+    soup = fetch_html(UA_BA_URL)
+    time.sleep(0.3)
+
+    print("[STEP 2] Parsing courses and metadata (this may take a few seconds)…")
+    df = parse_courses_from_page(soup)
+    if df.empty:
+        print("[WARNING] No 2025-xxxx course rows found. The page structure may have changed.")
+        print("          Please open the URL in a browser and check if '2025-2026' content is visible.")
+    else:
+        # Sanity: flag pillar rows (deel 3) visibly
+        df["Pillar"] = df["Pillar"].replace({"": None})
+        print(f"[INFO] Parsed {len(df)} course rows for 2025-2026.")
+
+    # Optional: give you a quick view in console
+    head = df.head(10).to_string(index=False)
+    print("[PREVIEW]\n" + head)
+
+    print(f"[STEP 3] Updating Excel: {EXCEL_PATH}")
+    archive_and_write(EXCEL_PATH, df, TARGET_SHEET)
+
+    print("\nDone. You can now open the workbook and review the refreshed 'BAGES' sheet.")
+
+
+if __name__ == "__main__":
+    main()
+    # Try to save a copy of the corrected script beside the original.
+    def save_copy(dest_name: str = "script_fixed.py"):
+        try:
+            import pathlib
+            src = pathlib.Path(__file__)
+            dst = src.with_name(dest_name)
+            dst.write_text(src.read_text(encoding="utf-8"), encoding="utf-8")
+            print(f"[INFO] Wrote a copy of this script to '{dst}'")
+        except Exception as e:
+            print(f"[WARNING] Could not write copy: {e}")
+
+    save_copy()
--- a/tool/script_fixed.py
+++ b/tool/script_fixed.py
@@ -0,0 +1,326 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Update 'BAGES' sheet in 'ongeloofelijken tool.xlsx' with the latest (2025-2026) bachelor History
+study programme from UAntwerpen. It scrapes the official page and writes a normalized table.
+
+Source page (2025-2026 bachelor study programme):
+https://www.uantwerpen.be/nl/studeren/aanbod/alle-opleidingen/geschiedenis-studeren/bachelor/studieprogramma/
+- In 2025-2026 the 'Geschiedenis per periode en gebied' structure changed to a two-pillar model:
+  * Chronologische pijler: 3 OOs (middeleeuwen, nieuwe tijd, nieuwste tijd)
+  * Thematische pijler: 2 OOs
+(See faculty helpdesk note with change summary and transition measures.)
+
+IMPORTANT:
+- This script only updates the 'BAGES' (Bachelor) sheet, because the provided link covers the bachelor page.
+- 'MAGES' and 'SPVP' sheets remain untouched.
+
+Tested with: requests, beautifulsoup4, lxml, pandas, openpyxl
+"""
+
+import re
+import sys
+import time
+import urllib.parse
+from datetime import datetime
+from pathlib import Path
+from typing import Optional, Tuple
+
+import requests
+import pandas as pd
+from bs4 import BeautifulSoup
+from openpyxl import load_workbook
+from openpyxl.utils.exceptions import InvalidFileException
+
+
+# ------------------------- Configuration -------------------------
+EXCEL_PATH = "ongeloofelijken tool.xlsx"
+TARGET_SHEET = "BAGES"
+ARCHIVE_PREFIX = "BAGES_OLD_"
+UA_BA_URL = "https://www.uantwerpen.be/nl/studeren/aanbod/alle-opleidingen/geschiedenis-studeren/bachelor/studieprogramma/"
+TARGET_YEAR_PREFIX = "2025-"   # Anchor/course URLs have '?id=<year>-<code>'; we filter with '2025-'
+TIMEOUT = 30
+HEADERS = {
+    "User-Agent": "Mozilla/5.0 (compatible; uantwerpen-bages-updater/1.0; +https://www.uantwerpen.be/)",
+    "Accept-Language": "nl,en;q=0.8"
+}
+
+
+# ------------------------- Helpers -------------------------
+def fetch_html(url: str) -> BeautifulSoup:
+    resp = requests.get(url, headers=HEADERS, timeout=TIMEOUT)
+    resp.raise_for_status()
+    return BeautifulSoup(resp.text, "lxml")
+
+
+def extract_text(el) -> str:
+    return re.sub(r"\s+", " ", " ".join(el.stripped_strings)) if el else ""
+
+
+def parse_meta_from_block(block_text: str) -> Tuple[Optional[str], Optional[str], Optional[str], Optional[str]]:
+    """
+    Try to parse Semester, Credits, Lecturers, Notes from a block of text next to a course link.
+    Returns (semester, credits, lecturers, notes)
+    """
+    text = block_text
+
+    # Semester examples: '1E SEM', '2E SEM', '1E/2E SEM'
+    sem = None
+    m_sem = re.search(r"\b(1E\s*/\s*2E\s*SEM|1E\s*SEM|2E\s*SEM)\b", text, flags=re.I)
+    if m_sem:
+        sem = m_sem.group(1).upper().replace(" ", "")
+
+    # Credits examples: '6 studiepunten', '3 studiepunten'
+    credits = None
+    m_sp = re.search(r"(\d+)\s*studiepunten", text, flags=re.I)
+    if m_sp:
+        credits = m_sp.group(1)
+
+    # Lecturers: after 'Lesgever (s):' or 'Lesgever(s):'
+    lecturers = None
+    m_lect = re.search(r"Lesgever\s*\(s\)\s*:\s*([^|]+?)(?:\s{2,}|$)", text, flags=re.I)
+    if not m_lect:
+        m_lect = re.search(r"Lesgever[s]?\s*:\s*([^|]+?)(?:\s{2,}|$)", text, flags=re.I)
+    if m_lect:
+        lecturers = m_lect.group(1).strip(" .").replace(" ,", ",")
+
+    # Notes: look for two-yearly etc.
+    notes = None
+    if re.search(r"Tweejaarlijks", text, flags=re.I):
+        # Try to capture the "even/oneven" phrasing
+        m_ev = re.search(r"tweejaarlijks[^.]*?(even|oneven)[^.]*jaar", text, flags=re.I)
+        notes = "Tweejaarlijks" + (f" ({m_ev.group(1).lower()} jaar)" if m_ev else "")
+
+    return sem, credits, lecturers, notes
+
+
+def nearest_sections(a_tag) -> Tuple[Optional[str], Optional[str], Optional[str]]:
+    """
+    Find nearest preceding headings to classify the row.
+    Returns (section, subsection, pillar)
+    - section: e.g., 'Modeltraject deel 1/2/3'
+    - subsection: e.g., 'Wijsbegeerte en sociale wetenschappen', 'Keuzeopleidingsonderdelen', 'Inleiding tot de geschiedenis', etc.
+    - pillar: for deel 3: 'Chronologische pijler', 'Thematische pijler' or None
+    """
+    # The site uses a variety of headings (h2, h3, h4); we trace back to find labels
+    h = a_tag.find_previous(["h2", "h3", "h4", "h5"])
+    section = subsection = pillar = None
+
+    # Walk up multiple previous headings to capture a hierarchy
+    prev_heads = []
+    cur = a_tag
+    for _ in range(40):  # limit walk to avoid infinite loops
+        cur = cur.find_previous(["h2", "h3", "h4", "h5"])
+        if not cur:
+            break
+        txt = extract_text(cur)
+        prev_heads.append(txt)
+
+    # Determine labels from the nearest few headings
+    for txt in prev_heads:
+        t = txt.lower()
+        if section is None and "modeltraject deel" in t:
+            # Normalize like "Modeltraject deel 1"
+            section = txt
+        if subsection is None:
+            # Typical subsections
+            if any(k in t for k in [
+                "wijsbegeerte en sociale wetenschappen",
+                "methodologie van de geschiedenis",
+                "historische oefeningen",
+                "inleiding tot de geschiedenis",
+                "heuristiek",
+                "historisch overzicht",
+                "keuzeopleidingsonderdelen",
+                "sociale wetenschappen",
+            ]):
+                subsection = txt
+        if pillar is None and ("chronologische pijler" in t or "thematische pijler" in t):
+            pillar = txt
+
+        if section and (subsection or pillar):
+            # Good enough
+            break
+
+    return section, subsection, pillar
+
+
+def parse_courses_from_page(soup: BeautifulSoup) -> pd.DataFrame:
+    """
+    Parse all course links for the 2025-xxxx academic year, infer metadata from nearby text,
+    and return a normalized DataFrame.
+    """
+    rows = []
+
+    # Capture all anchors that look like course links containing '?id=2025-<CODE>'
+    for a in soup.find_all("a", href=True):
+        href = a["href"]
+        # Normalize relative links
+        full_url = urllib.parse.urljoin(UA_BA_URL, href)
+        # Filter by the 'id=2025-' parameter (2025-2026)
+        if "id=" in href:
+            q = urllib.parse.urlparse(href).query
+            params = urllib.parse.parse_qs(q)
+            ids = params.get("id", [])
+            if not ids:
+                continue
+            # Some pages use '2025-XXXXX' or '2025-XXXXX&lang=nl'
+            if not any(idv.startswith(TARGET_YEAR_PREFIX) for idv in ids):
+                continue
+            course_id = ids[0]  # e.g., '2025-1002FLWGES'
+        else:
+            # No id=... parameter; skip
+            continue
+
+        # Extract code after '2025-'
+        code = None
+        m = re.match(r"2025-([A-Za-z0-9]+)", course_id)
+        if m:
+            code = m.group(1)
+
+        name = extract_text(a).strip()
+        if not name or not code:
+            continue
+
+        # Use a reasonably large ancestor block for metadata search
+        container = a
+        for _ in range(4):
+            if container.parent:
+                container = container.parent
+        block_text = extract_text(container)
+
+        semester, credits, lecturers, notes = parse_meta_from_block(block_text)
+        section, subsection, pillar = nearest_sections(a)
+
+        rows.append({
+            "Section": section,
+            "Subsection": subsection,
+            "Pillar": pillar,
+            "Course Code": code,
+            "Course Name": name,
+            "URL": full_url,
+            "Semester": semester,
+            "Credits": credits,
+            "Lecturers": lecturers,
+            "Notes": notes
+        })
+
+    df = pd.DataFrame(rows).drop_duplicates(subset=["Course Code", "Course Name"])
+    # Keep only rows that clearly belong to the 'Bachelor' page; sometimes cross-links appear
+    # Heuristic: we keep rows with a Section that starts with "Modeltraject deel" or that have a Pillar marker
+    mask = (
+        df["Section"].fillna("").str.contains(r"Modeltraject deel", case=False) |
+        df["Pillar"].fillna("").str.contains(r"Pijler", case=False)
+    )
+    df = df[mask].copy()
+
+    # Clean up text for consistency
+    def clean_col(s):
+        return s.str.replace(r"\s+", " ", regex=True).str.strip()
+
+    for col in ["Section", "Subsection", "Pillar", "Course Name", "Lecturers", "Notes"]:
+        df[col] = clean_col(df[col].astype(str))
+
+    # Ensure missing pillar/subsection are empty strings for consistent sorting
+    df["Pillar"] = df["Pillar"].fillna("")
+    df["Subsection"] = df["Subsection"].fillna("")
+
+    # Sort for readability: section → pillar → subsection → name
+    df.sort_values(
+        by=["Section", "Pillar", "Subsection", "Course Name"],
+        inplace=True
+    )
+    df.reset_index(drop=True, inplace=True)
+    return df
+
+
+def archive_and_write(excel_path: str, df: pd.DataFrame, target_sheet: str):
+    """
+    - If sheet 'BAGES' exists, rename it to 'BAGES_OLD_YYYYMMDD'
+    - Write df to 'BAGES'
+    """
+    try:
+        wb = load_workbook(excel_path)
+    except FileNotFoundError:
+        print(f"[INFO] File not found, creating new workbook: {excel_path}")
+        # Write a new file straight away
+        with pd.ExcelWriter(excel_path, engine="openpyxl") as writer:
+            df.to_excel(writer, sheet_name=target_sheet, index=False)
+        return
+    except InvalidFileException:
+        print(f"[ERROR] Not a valid Excel file: {excel_path}")
+        sys.exit(1)
+
+    # Rename existing BAGES to archive
+    if target_sheet in wb.sheetnames:
+        date_suffix = datetime.now().strftime("%Y%m%d")
+        archive_name = ARCHIVE_PREFIX + date_suffix
+        # Ensure uniqueness (append a counter if necessary)
+        counter = 1
+        final_archive = archive_name
+        while final_archive in wb.sheetnames:
+            counter += 1
+            final_archive = f"{archive_name}_{counter}"
+        ws = wb[target_sheet]
+        ws.title = final_archive
+        print(f"[INFO] Archived existing '{target_sheet}' as '{final_archive}'")
+
+    # Save intermediate
+    wb.save(excel_path)
+
+    # Now write the new sheet
+    with pd.ExcelWriter(excel_path, engine="openpyxl", mode="a", if_sheet_exists="overlay") as writer:
+        df.to_excel(writer, sheet_name=target_sheet, index=False)
+
+    print(f"[SUCCESS] Wrote updated '{target_sheet}' sheet to '{excel_path}'")
+    # Also save the updated DataFrame to a separate new Excel file for convenience
+    try:
+        src = Path(excel_path)
+        new_name = src.with_name(f"{src.stem}_updated{src.suffix}")
+        # Write a fresh workbook containing only the updated sheet
+        with pd.ExcelWriter(str(new_name), engine="openpyxl") as writer:
+            df.to_excel(writer, sheet_name=target_sheet, index=False)
+        print(f"[INFO] Also wrote updated data to new file '{new_name}'")
+    except Exception as e:
+        print(f"[WARNING] Could not write updated copy to new file: {e}")
+
+
+def main():
+    print("[STEP 1] Fetching the bachelor study programme page (2025-2026)…")
+    soup = fetch_html(UA_BA_URL)
+    time.sleep(0.3)
+
+    print("[STEP 2] Parsing courses and metadata (this may take a few seconds)…")
+    df = parse_courses_from_page(soup)
+    if df.empty:
+        print("[WARNING] No 2025-xxxx course rows found. The page structure may have changed.")
+        print("          Please open the URL in a browser and check if '2025-2026' content is visible.")
+    else:
+        # Sanity: flag pillar rows (deel 3) visibly
+        df["Pillar"] = df["Pillar"].replace({"": None})
+        print(f"[INFO] Parsed {len(df)} course rows for 2025-2026.")
+
+    # Optional: give you a quick view in console
+    head = df.head(10).to_string(index=False)
+    print("[PREVIEW]\n" + head)
+
+    print(f"[STEP 3] Updating Excel: {EXCEL_PATH}")
+    archive_and_write(EXCEL_PATH, df, TARGET_SHEET)
+
+    print("\nDone. You can now open the workbook and review the refreshed 'BAGES' sheet.")
+
+
+if __name__ == "__main__":
+    main()
+    # Try to save a copy of the corrected script beside the original.
+    def save_copy(dest_name: str = "script_fixed.py"):
+        try:
+            import pathlib
+            src = pathlib.Path(__file__)
+            dst = src.with_name(dest_name)
+            dst.write_text(src.read_text(encoding="utf-8"), encoding="utf-8")
+            print(f"[INFO] Wrote a copy of this script to '{dst}'")
+        except Exception as e:
+            print(f"[WARNING] Could not write copy: {e}")
+
+    save_copy()
--- a/tool/updated
+++ b/tool/updated