sueszli/amazon-jobs-scraper.py

## amazon-jobs-scraper.py
import webbrowser
import time
from playwright.sync_api import sync_playwright


URL = "https://www.amazon.jobs/en/search?___________INSERT YOUR URL HERE___________"

page = sync_playwright().start().chromium.launch().new_page()
page.goto(URL)

links = []

while True:
    print("new page")

    # get links
    newlinks = []
    for match in page.query_selector_all("//*[@data-job-id]/div[1]/div/h3/a"):
        href = match.get_attribute("href")
        full_link = f"https://www.amazon.jobs{href}"
        newlinks.append(full_link)
    links += newlinks

    # open links
    for link in newlinks:
        webbrowser.open(link)

    # pause
    time.sleep(30)

    # go to next page
    nextpage_xpath = "//button[@aria-label='Next page']"
    page.wait_for_selector(nextpage_xpath)
    nextpage_button = page.query_selector_all(nextpage_xpath)
    assert len(nextpage_button) == 1
    is_disabled = nextpage_button[0].is_disabled()
    if is_disabled:
        break
    nextpage_button[0].click()

print("found a total of", len(links), "links")
	import webbrowser
	import time
	from playwright.sync_api import sync_playwright


	URL = "https://www.amazon.jobs/en/search?___________INSERT YOUR URL HERE___________"

	page = sync_playwright().start().chromium.launch().new_page()
	page.goto(URL)

	links = []

	while True:
	print("new page")

	# get links
	newlinks = []
	for match in page.query_selector_all("//*[@data-job-id]/div[1]/div/h3/a"):
	href = match.get_attribute("href")
	full_link = f"https://www.amazon.jobs{href}"
	newlinks.append(full_link)
	links += newlinks

	# open links
	for link in newlinks:
	webbrowser.open(link)

	# pause
	time.sleep(30)

	# go to next page
	nextpage_xpath = "//button[@aria-label='Next page']"
	page.wait_for_selector(nextpage_xpath)
	nextpage_button = page.query_selector_all(nextpage_xpath)
	assert len(nextpage_button) == 1
	is_disabled = nextpage_button[0].is_disabled()
	if is_disabled:
	break
	nextpage_button[0].click()

	print("found a total of", len(links), "links")