ugnb/extract_urls.py

## extract_urls.py
import json
from json import JSONDecodeError
from typing import List

from selenium import webdriver
from selenium.common.exceptions import WebDriverException

driver = webdriver.Remote(
    command_executor='{}/wd/hub'.format('http://localhost:4444'),
    desired_capabilities=webdriver.ChromeOptions().to_capabilities(),
    keep_alive=True)

driver.get('http://google.com/')

try:
    links_json: str = driver.execute_script(
        "return JSON.stringify([].slice.call(document.getElementsByTagName('a')).map(a => a.href))")
    urls: List[str] = json.loads(links_json)

    print(urls)
except WebDriverException as ex:
    print("Failed to get links on page: {}".format(ex))
except JSONDecodeError:
    print("Failed to decode links JSON array: {}".format(links_json))

driver.quit()
	import json
	from json import JSONDecodeError
	from typing import List

	from selenium import webdriver
	from selenium.common.exceptions import WebDriverException

	driver = webdriver.Remote(
	command_executor='{}/wd/hub'.format('http://localhost:4444'),
	desired_capabilities=webdriver.ChromeOptions().to_capabilities(),
	keep_alive=True)

	driver.get('http://google.com/')

	try:
	links_json: str = driver.execute_script(
	"return JSON.stringify([].slice.call(document.getElementsByTagName('a')).map(a => a.href))")
	urls: List[str] = json.loads(links_json)

	print(urls)
	except WebDriverException as ex:
	print("Failed to get links on page: {}".format(ex))
	except JSONDecodeError:
	print("Failed to decode links JSON array: {}".format(links_json))

	driver.quit()