Skip to content

Instantly share code, notes, and snippets.

@MantisSTS
Created July 20, 2022 18:51
Show Gist options
  • Save MantisSTS/d8ab7067cceb661ae92cf21cdb0dd304 to your computer and use it in GitHub Desktop.
Save MantisSTS/d8ab7067cceb661ae92cf21cdb0dd304 to your computer and use it in GitHub Desktop.
linkedin_scraper.py
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
import time
import math
ts = math.ceil(time.time())
def add_to_list(name):
if name.isascii():
name = name.lower().replace(".", "").split(" ")
email = name[0] + "." + name[1] + "@company.com"
with open("users{}.txt".format(ts), "a") as f:
f.write(email + "\n")
def scrape_linkedin(driver):
# Use selenium to loop over linkedin pages and grab the first and last name of each user
driver.get("https://www.linkedin.com/checkpoint/rm/sign-in-another-account?fromSignIn=true&trk=guest_homepage-basic_nav-header-signin")
time.sleep(1)
try:
driver.find_element(By.XPATH, '//button[normalize-space()="Accept cookies"]').click()
time.sleep(1)
except:
print("[INFO]: No cookies bar detected, passing...")
pass
driver.find_element(By.NAME, "session_key").send_keys("<YOUR EMAIL>")
driver.find_element(By.NAME, "session_password").send_keys("<YOUR PASSWORD>")
driver.find_element(By.XPATH, "//button[normalize-space()='Sign in']").click()
driver.get("https://www.linkedin.com/search/results/people/?currentCompany=%5B%<COMPANY ID>%22%5D&origin=FACETED_SEARCH&sid=%40BY")
time.sleep(1)
# Scroll to the bottom of the page so we can see the button
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(0.9)
counter = 1
next_button = driver.find_element(By.CSS_SELECTOR, ".artdeco-pagination__button--next")
while True:
print("[INFO]: Scraping page #" + str(counter))
counter = counter + 1
# Scroll to the bottom of the page so we can see the button
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(1)
next_button = driver.find_element(By.CSS_SELECTOR, ".artdeco-pagination__button--next")
time.sleep(1)
names = driver.find_elements(By.CSS_SELECTOR, ".reusable-search__result-container .app-aware-link > span > span")
for name in names:
if "view" in name.text.lower() and "profile" in name.text.lower():
pass
else:
print("[INFO]: Found user:", name.text)
add_to_list(name.text)
if next_button.is_enabled():
next_button.click()
time.sleep(2)
else:
print("[INFO]: No more pages to scrape... Exiting")
break
def main():
chromeDriverPath = r'./chromedriver.exe'
service = Service(chromeDriverPath)
options = webdriver.ChromeOptions()
# This is optional
options.add_argument('--incognito') # start in incognito mode
driver = webdriver.Chrome(service=service, options=options)
scrape_linkedin(driver)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment