Skip to content

Instantly share code, notes, and snippets.

@kingozorg
Forked from n-kb/scrape.py
Created January 25, 2021 20:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kingozorg/52804623fbf3f9b6b9b72659a402d720 to your computer and use it in GitHub Desktop.
Save kingozorg/52804623fbf3f9b6b9b72659a402d720 to your computer and use it in GitHub Desktop.
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
import csv, time
import os.path
browser = webdriver.Chrome()
for year in range(2013, 2020):
for month in range(1,13):
if month == 2:
days = 28
elif month in (1,3,5,7,8,10,12):
days = 31
else:
days = 30
filename = "data/%d-%d.csv" % (year, month)
# Test if file exists
if not os.path.isfile(filename):
export_data = []
since = "%d-%d-%d" % (year, month, 1)
until = "%d-%d-%d" % (year, month, days)
twitto = "๐ŸŒŸ๐ŸŒŸ๐ŸŒŸ๐ŸŒŸ๐ŸŒŸ"
url = 'https://twitter.com/search?q=(from%3A' + twitto + ')'
url += '%20until%3A' + until + '%20since%3A' + since + '&src=typed_query&f=live'
browser.get(url)
time.sleep(3)
lenOfPage = browser.execute_script("window.scrollTo(0, document.body.scrollHeight);var lenOfPage=document.body.scrollHeight;return lenOfPage;")
match=False
while(match==False):
lastCount = lenOfPage
time.sleep(3)
tweets = browser.find_elements_by_xpath('//article[@role="article"]')
for tweet in tweets:
try:
date = tweet.find_element_by_xpath('.//a[@title]').text
text = tweet.find_element_by_xpath('.//div[@lang]').text
print(date, text)
export_data.append({"date": date, "text": text})
except Exception:
pass
lenOfPage = browser.execute_script("window.scrollTo(0, document.body.scrollHeight);var lenOfPage=document.body.scrollHeight;return lenOfPage;")
if lastCount==lenOfPage:
match=True
csv_columns = ["date", "text"]
with open(filename, 'w') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=csv_columns)
writer.writeheader()
for data in export_data:
writer.writerow(data)
browser.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment