Skip to content

Instantly share code, notes, and snippets.

@svpino
Created March 21, 2023 14:24
Show Gist options
  • Save svpino/0af33ac8a4401253d91d5d6a8e64c7c9 to your computer and use it in GitHub Desktop.
Save svpino/0af33ac8a4401253d91d5d6a8e64c7c9 to your computer and use it in GitHub Desktop.
Web Scraping using Bright Data Scraping Browser and Playwright
import re
import asyncio
from playwright.async_api import async_playwright
USERNAME = "TYPE YOUR USERNAME HERE"
PASSWORD = "TYPE YOUR PASSWORD HERE"
HOST = "zproxy.lum-superproxy.io:9222"
URL = "https://www.svpino.com/" # USE YOUR URL HERE
def process(html):
regex = re.compile("<title>(.*?)</title>", re.IGNORECASE | re.DOTALL)
title = regex.search(html).group(1)
print(f"Title: {title}")
async def main():
browser_url = f"https://{USERNAME}:{PASSWORD}@{HOST}"
async with async_playwright() as pw:
print("Connecting to browser...")
browser = await pw.chromium.connect_over_cdp(browser_url)
page = await browser.new_page()
print(f"Navigating to URL {URL}...")
await page.goto(URL, timeout=120000)
process(await page.evaluate("()=>document.documentElement.outerHTML"))
await browser.close()
asyncio.run(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment