wkarney/infinite_scroll_scraping_selenium.py

## infinite_scroll_scraping_selenium.py
from time import sleep
from selenium import webdriver
from bs4 import BeautifulSoup

# Headless/incognito Chrome driver
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument("--incognito")
chrome_options.add_argument('headless')
driver = webdriver.Chrome(executable_path='CHROMEDRIVER_PATH',chrome_options=chrome_options)

driver.get('http://www.exampleurl.com')

# Set sleep time for the page to load on scroll
SCROLL_PAUSE_TIME = 2

# Get scroll height
last_height = driver.execute_script("return document.body.scrollHeight")

# If you want to limit the number of scroll loads, add a limit here
scroll_limit = 5

count = 0
while True and count < scroll_limit:
    # Scroll down to bottom
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")

    # Wait to load page
    sleep(SCROLL_PAUSE_TIME)

    # Calculate new scroll height and compare with last scroll height
    new_height = driver.execute_script("return document.body.scrollHeight")
    if new_height == last_height:
        break
    last_height = new_height
    count += 1

sleep(2)

html = driver.page_source
soup = BeautifulSoup(html, 'lxml')
	from time import sleep
	from selenium import webdriver
	from bs4 import BeautifulSoup

	# Headless/incognito Chrome driver
	chrome_options = webdriver.ChromeOptions()
	chrome_options.add_argument("--incognito")
	chrome_options.add_argument('headless')
	driver = webdriver.Chrome(executable_path='CHROMEDRIVER_PATH',chrome_options=chrome_options)

	driver.get('http://www.exampleurl.com')

	# Set sleep time for the page to load on scroll
	SCROLL_PAUSE_TIME = 2

	# Get scroll height
	last_height = driver.execute_script("return document.body.scrollHeight")

	# If you want to limit the number of scroll loads, add a limit here
	scroll_limit = 5

	count = 0
	while True and count < scroll_limit:
	# Scroll down to bottom
	driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")

	# Wait to load page
	sleep(SCROLL_PAUSE_TIME)

	# Calculate new scroll height and compare with last scroll height
	new_height = driver.execute_script("return document.body.scrollHeight")
	if new_height == last_height:
	break
	last_height = new_height
	count += 1

	sleep(2)

	html = driver.page_source
	soup = BeautifulSoup(html, 'lxml')