Skip to content

Instantly share code, notes, and snippets.

@Schachte
Last active July 21, 2024 03:06
Show Gist options
  • Save Schachte/64cdc993deaeed283613d5f05f94d51e to your computer and use it in GitHub Desktop.
Save Schachte/64cdc993deaeed283613d5f05f94d51e to your computer and use it in GitHub Desktop.
Realtime query selector evaluation for Playwright
python scripts/playwright_playground.py \
"https://www.google.com/search?q=cloudflare+stock+price" \
"h3" \
--timeout 1 \
--content-type text
# Output allows quick iteration
Cloudflare, Inc. (NET) Stock Price, News, Quote & History
Try new selector (q to quit): div[data-attrid="Price"]
78.66 USD +0.14 (0.18%)todaypast 5 dayspast monthpast 6 monthsyear to datepast yearpast 5 yearsall timeClosed: Jul 19, 7:12 PM EDT • DisclaimerPre-marketAfter hours 78.50 −0.16 (0.20%)
Try new selector (q to quit): div[data-attrid="Price"] > span > span > span
78.66
Try new selector (q to quit): q
import argparse
from playwright.sync_api import sync_playwright
def main(
url: str, selector: str, timeout: float, content_type: str, headless: bool
) -> None:
"""Allows users to interactively evaluate query selectors when using Playwright against a given URL"""
with sync_playwright() as p:
browser = p.chromium.launch(headless=headless)
page = browser.new_page()
page.goto(url)
while True:
try:
element = page.wait_for_selector(selector, timeout=timeout * 1000)
if element:
if content_type == "text":
print(element.text_content())
elif content_type == "html":
print(element.inner_html())
else:
print("Invalid content type. Please choose 'text' or 'html'.")
else:
print("Selector found no content")
selector = input("\nTry new selector (q to quit): ")
if selector.lower() == "q":
break
except Exception as e:
print(f"Error: {str(e)}")
selector = input("\nTry new selector (q to quit): ")
if selector.lower() == "q":
break
browser.close()
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(
description="Test a selector on a webpage using Playwright."
)
parser.add_argument("url", type=str, help="The URL of the webpage to test.")
parser.add_argument("selector", type=str, help="The selector to test.")
parser.add_argument(
"--timeout",
type=float,
default=3,
help="The timeout (in seconds) for the selector to appear.",
)
parser.add_argument(
"--content-type",
type=str,
default="text",
choices=["text", "html"],
help="The type of content to display (text or html).",
)
parser.add_argument(
"--headless",
type=bool,
default=True,
choices=[True, False],
help="Whether or not the browser runs in headless mode or not",
)
return parser.parse_args()
if __name__ == "__main__":
args = parse_args()
main(args.url, args.selector, args.timeout, args.content_type, args.headless)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment