bepcyc/headless_firefox_crawl_js_site.py

## headless_firefox_crawl_js_site.py
### based on great SO answers: https://stackoverflow.com/a/50593885/918211 and https://stackoverflow.com/a/46768243/918211

## Debian/Ubuntu specific
# sudo apt install -y firefox-geckodriver

# python3 -m venv venv
# cd venv
# source bin/activate
# pip install selenium beautifulsoup4

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup as bs
from selenium.webdriver.firefox.options import Options

options = Options()
options.headless = True
driver = webdriver.Firefox(options=options)
driver.get("https://yourdynamicwebsite.org/dgfdgdgd")
    try:
        element = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.ID, "myDynamicElement"))) #waits 10 seconds until element is located. Can have other wait conditions  such as visibility_of_element_located or text_to_be_present_in_element

        html = driver.page_source
        soup = bs(html, "lxml")
        dynamic_text = soup.find_all("p", {"class":"class_name"}) #or other attributes, optional
    else:
        print("Couldnt locate element")
	### based on great SO answers: https://stackoverflow.com/a/50593885/918211 and https://stackoverflow.com/a/46768243/918211

	## Debian/Ubuntu specific
	# sudo apt install -y firefox-geckodriver

	# python3 -m venv venv
	# cd venv
	# source bin/activate
	# pip install selenium beautifulsoup4

	from selenium import webdriver
	from selenium.webdriver.common.by import By
	from selenium.webdriver.support.ui import WebDriverWait
	from selenium.webdriver.support import expected_conditions as EC
	from bs4 import BeautifulSoup as bs
	from selenium.webdriver.firefox.options import Options

	options = Options()
	options.headless = True
	driver = webdriver.Firefox(options=options)
	driver.get("https://yourdynamicwebsite.org/dgfdgdgd")
	try:
	element = WebDriverWait(driver, 10).until(
	EC.presence_of_element_located((By.ID, "myDynamicElement"))) #waits 10 seconds until element is located. Can have other wait conditions such as visibility_of_element_located or text_to_be_present_in_element

	html = driver.page_source
	soup = bs(html, "lxml")
	dynamic_text = soup.find_all("p", {"class":"class_name"}) #or other attributes, optional
	else:
	print("Couldnt locate element")