Skip to content

Instantly share code, notes, and snippets.

@artjomb
Last active June 12, 2023 00:00
Show Gist options
  • Star 16 You must be signed in to star a gist
  • Fork 5 You must be signed in to fork a gist
  • Save artjomb/07209e859f9bf0206f76 to your computer and use it in GitHub Desktop.
Save artjomb/07209e859f9bf0206f76 to your computer and use it in GitHub Desktop.
infinite scroll of stackstatus with python in phantomjs
import selenium
import time
from selenium import webdriver
browser = webdriver.PhantomJS("phantomjs")
browser.get("https://twitter.com/StackStatus")
print browser.title
pause = 3
lastHeight = browser.execute_script("return document.body.scrollHeight")
print lastHeight
i = 0
browser.get_screenshot_as_file("test03_1_"+str(i)+".jpg")
while True:
browser.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(pause)
newHeight = browser.execute_script("return document.body.scrollHeight")
print newHeight
if newHeight == lastHeight:
break
lastHeight = newHeight
i += 1
browser.get_screenshot_as_file("test03_1_"+str(i)+".jpg")
browser.quit()
import selenium
import time
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
import selenium.webdriver.support.expected_conditions as EC
from selenium.webdriver.common.by import By
browser = webdriver.PhantomJS("phantomjs")
#browser.get("https://twitter.com/StackStatus")
browser.get("https://twitter.com/StackOverheards") # shorter, so that ending can be tested
print browser.title
i = 0
browser.get_screenshot_as_file("test03_2_"+str(i)+".jpg")
while True:
print "i", i
elemsCount = browser.execute_script("return document.querySelectorAll('.stream-items > li.stream-item').length")
#print "c", elemsCount
browser.execute_script("window.scrollTo(0, document.body.scrollHeight);")
#element = WebDriverWait(browser, 20).until(
# EC.presence_of_element_located((By.XPATH,
# "//*[contains(@class,'GridTimeline-items')]/li[contains(@class,'stream-item')]["+str(elemsCount+1)+"]")))
try:
WebDriverWait(browser, 20).until(
lambda x: x.find_element_by_xpath(
"//*[contains(@class,'stream-items')]/li[contains(@class,'stream-item')]["+str(elemsCount+1)+"]"))
except:
break
i += 1
browser.get_screenshot_as_file("test03_2_"+str(i)+".jpg")
browser.quit()
@kirussian911
Copy link

why not user-agent?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment