first commit

2024-11-18 11:59:21 +01:00
parent d51c60d76d
commit e3e65a9c51
12 changed files with 1997 additions and 0 deletions
--- a/studieprogramma's/login.py
+++ b/studieprogramma's/login.py
@@ -0,0 +1,63 @@
+import asyncio
+from pyppeteer import launch
+import logging
+
+logging.basicConfig(level=logging.INFO)
+
+
+async def crawl(url):
+    try:
+        # Launch a new Chromium browser with a visible window
+        print('browser launching')
+        browser = await launch(headless=False)
+        # Open a new page
+        page = await browser.newPage()
+        print('browser opened')
+
+        try:
+            # Navigate to the specified URL
+            await page.goto(url)
+            logging.info(f"Accessed {url}")
+        except Exception as e:
+            logging.error(f"Failed to navigate to {url}: {e}")
+            await browser.close()
+            return
+
+        try:
+            # Wait for the page to fully load
+            await page.waitForSelector('body')
+        except Exception as e:
+            logging.error(f"Failed to load the page properly: {e}")
+            await browser.close()
+            return
+
+        try:
+            # Extract the content of the page
+            content = await page.content()
+            # (Optional) Extract and print all links as an example
+            links = await page.evaluate('''() => {
+                return Array.from(document.querySelectorAll('a')).map(link => ({
+                    text: link.innerText,
+                    url: link.href
+                }));
+            }''')
+
+            for link in links:
+                print(f"Link text: {link['text']}, URL: {link['url']}")
+
+        except Exception as e:
+            logging.error(f"Error extracting or processing the content: {e}")
+
+        finally:
+            # Ensure the browser closes after execution
+            await browser.close()
+
+    except Exception as e:
+        logging.critical(f"Critical error occurred: {e}")
+
+
+# Specify the URL of the web page you want to crawl
+url = 'https://www.google.com/'
+
+# Run the crawl function
+asyncio.get_event_loop().run_until_complete(crawl(url))
--- a/studieprogramma's/main.py
+++ b/studieprogramma's/main.py