Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save DipanshKhandelwal/f7acfded1b547fbd76c2b7d7810e6dd9 to your computer and use it in GitHub Desktop.
Save DipanshKhandelwal/f7acfded1b547fbd76c2b7d7810e6dd9 to your computer and use it in GitHub Desktop.
Scraping Verified Twitter Usernames using Selenium
import time
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
chrome_options = Options()
chrome_options.add_argument("--headless")
driver = webdriver.Chrome('/usr/lib/chromium-browser/chromedriver', chrome_options=chrome_options)
driver.get('https://www.twitter.com/login')
search_box = driver.find_element_by_class_name('js-username-field')
search_box.send_keys('your_email_address') # <--- your email goes here
driver.implicitly_wait(1)
search_box = driver.find_element_by_class_name('js-password-field')
search_box.send_keys('your_password') # <--- your password goes here
driver.implicitly_wait(1)
search_box.submit()
textContent = []
driver.get('https://www.twitter.com/verified/following')
for d in range(100):
for times in range(210):
file = open('twitter_names_scroll_again_wow'+str(d), 'w+')
print("process : " + str(d) + " time : " + str(times))
driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
time.sleep(2)
page_source = driver.page_source
page_content = BeautifulSoup(page_source, "html.parser")
some = page_content.find_all(class_="ProfileCard-screennameLink")
for i in some[len(textContent):]:
name = i.get('href')[1:]
if name not in textContent:
textContent.append(name)
file.write(name+'\n')
driver.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment