ows-master/webcrawler studieprogramma's/login.py

import asyncio
from pyppeteer import launch
import logging

logging.basicConfig(level=logging.INFO)


async def crawl(url):
    try:
        # Launch a new Chromium browser with a visible window
        print('browser launching')
        browser = await launch(headless=False)
        # Open a new page
        page = await browser.newPage()
        print('browser opened')

        try:
            # Navigate to the specified URL
            await page.goto(url)
            logging.info(f"Accessed {url}")
        except Exception as e:
            logging.error(f"Failed to navigate to {url}: {e}")
            await browser.close()
            return

        try:
            # Wait for the page to fully load
            await page.waitForSelector('body')
        except Exception as e:
            logging.error(f"Failed to load the page properly: {e}")
            await browser.close()
            return

        try:
            # Extract the content of the page
            content = await page.content()
            # (Optional) Extract and print all links as an example
            links = await page.evaluate('''() => {
                return Array.from(document.querySelectorAll('a')).map(link => ({
                    text: link.innerText,
                    url: link.href
                }));
            }''')

            for link in links:
                print(f"Link text: {link['text']}, URL: {link['url']}")

        except Exception as e:
            logging.error(f"Error extracting or processing the content: {e}")

        finally:
            # Ensure the browser closes after execution
            await browser.close()

    except Exception as e:
        logging.critical(f"Critical error occurred: {e}")


# Specify the URL of the web page you want to crawl
url = 'https://www.google.com/'

# Run the crawl function
asyncio.get_event_loop().run_until_complete(crawl(url))