Skip to content

Instantly share code, notes, and snippets.

@sueszli
Created January 12, 2024 13:03
Show Gist options
  • Save sueszli/9fc5d3837239120269323ecff84b2fcc to your computer and use it in GitHub Desktop.
Save sueszli/9fc5d3837239120269323ecff84b2fcc to your computer and use it in GitHub Desktop.
import webbrowser
import time
from playwright.sync_api import sync_playwright
URL = "https://www.amazon.jobs/en/search?___________INSERT YOUR URL HERE___________"
page = sync_playwright().start().chromium.launch().new_page()
page.goto(URL)
links = []
while True:
print("new page")
# get links
newlinks = []
for match in page.query_selector_all("//*[@data-job-id]/div[1]/div/h3/a"):
href = match.get_attribute("href")
full_link = f"https://www.amazon.jobs{href}"
newlinks.append(full_link)
links += newlinks
# open links
for link in newlinks:
webbrowser.open(link)
# pause
time.sleep(30)
# go to next page
nextpage_xpath = "//button[@aria-label='Next page']"
page.wait_for_selector(nextpage_xpath)
nextpage_button = page.query_selector_all(nextpage_xpath)
assert len(nextpage_button) == 1
is_disabled = nextpage_button[0].is_disabled()
if is_disabled:
break
nextpage_button[0].click()
print("found a total of", len(links), "links")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment