import asyncio from pyppeteer import launch import logging logging.basicConfig(level=logging.INFO) async def crawl(url): try: # Launch a new Chromium browser with a visible window print('browser launching') browser = await launch(headless=False) # Open a new page page = await browser.newPage() print('browser opened') try: # Navigate to the specified URL await page.goto(url) logging.info(f"Accessed {url}") except Exception as e: logging.error(f"Failed to navigate to {url}: {e}") await browser.close() return try: # Wait for the page to fully load await page.waitForSelector('body') except Exception as e: logging.error(f"Failed to load the page properly: {e}") await browser.close() return try: # Extract the content of the page content = await page.content() # (Optional) Extract and print all links as an example links = await page.evaluate('''() => { return Array.from(document.querySelectorAll('a')).map(link => ({ text: link.innerText, url: link.href })); }''') for link in links: print(f"Link text: {link['text']}, URL: {link['url']}") except Exception as e: logging.error(f"Error extracting or processing the content: {e}") finally: # Ensure the browser closes after execution await browser.close() except Exception as e: logging.critical(f"Critical error occurred: {e}") # Specify the URL of the web page you want to crawl url = 'https://www.google.com/' # Run the crawl function asyncio.get_event_loop().run_until_complete(crawl(url))