Skip to content

Instantly share code, notes, and snippets.

@svngoku
Created April 28, 2022 20:00
Show Gist options
  • Save svngoku/8d5043c3e9b53ff871e1c70ff9495e2c to your computer and use it in GitHub Desktop.
Save svngoku/8d5043c3e9b53ff871e1c70ff9495e2c to your computer and use it in GitHub Desktop.
# scrape 100000 tweets with the #harcelement on twitter with selenium
# and use chrome driver
# and save datas on the csv file with labels
import selenium
from selenium import webdriver
import time
from selenium.webdriver.common.keys import Keys
driver = webdriver.Chrome('C:/Users/user/chromedriver')
#twitter accesx
driver.get('https://twitter.com/login')
user = driver.find_element_by_class_name('js-username-field')
user.send_keys('twitter_account')
password = driver.find_element_by_class_name('js-password-field')
password.send_keys('twitter_account_password')
password.send_keys(Keys.ENTER)
time.sleep(60)
#search
search = driver.find_element_by_id('search-query')
search.send_keys('#harcelement')
tweet_search = driver.find_element_by_xpath('//*[@id="global-nav-search"]/span/button')
tweet_search.click()
time.sleep(60)
#scroll down
for y in range(1,9):
driver.execute_script("window.scrollTo(0,document.body.scrollHeight);")
time.sleep(60)
for x in range(1,15):
driver.execute_script("window.scrollTo(0,document.body.scrollHeight);")
time.sleep(60)
with open('harcelement.csv', 'a') as f:
f.write(driver.page_source)
print(x)
print(y)
#categorize
with open('harcelement.csv', 'r') as f:
lines = f.readlines()
for line in lines:
with open('harcelement_complet.csv', 'a') as f:
f.write(line)
if '<p class="TweetTextSize TweetTextSize--jumbo js-tweet-text tweet-text"' in line:
f.write('harcelement,\n')
else:
f.write('null,\n')
print('finish')
driver.quit()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment