Skip to content

Instantly share code, notes, and snippets.

@clod81
Last active March 15, 2023 02:06
Show Gist options
  • Save clod81/e56ad1c10ee6d2e8eff03c03e1de91cb to your computer and use it in GitHub Desktop.
Save clod81/e56ad1c10ee6d2e8eff03c03e1de91cb to your computer and use it in GitHub Desktop.
LinkedIn Python3 Selenium Scrape by: Employee + Company name. Returns: info - name, last position start date, LinkedIn profile URL. Info saved to: linked_dates.txt
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
import time
import sys
import os
global driver
driver = webdriver.Chrome("/PATH_TO/chromedriver")
p = "Present"
def scrape(name):
global driver
search = driver.find_element(By.CLASS_NAME, 'search-global-typeahead__input')
search.send_keys(name + " COMPANY_NAME_REPLACE_ME")
time.sleep(2)
search.send_keys(Keys.ENTER)
time.sleep(5)
div = driver.find_element(By.CLASS_NAME, 'entity-result__title-text')
ppl = div.find_element(By.CLASS_NAME, 'app-aware-link ')
ppl.click()
time.sleep(5)
exp = driver.find_elements(By.CLASS_NAME, 'pvs-header__title-container')
for e in exp:
try:
data = e.find_element(By.TAG_NAME, 'span')
if data.get_attribute("innerHTML") == "<!---->Experience<!---->":
parent = e.find_element(By.XPATH, '..')
parent = parent.find_element(By.XPATH, '..')
parent = parent.find_element(By.XPATH, '..')
parent = parent.find_element(By.XPATH, '..')
es = parent.find_elements(By.CLASS_NAME, "visually-hidden")
for v in es:
ihtml = v.get_attribute("innerHTML")
if ihtml.find(p) != -1:
with open('linked_dates.txt', 'a') as file:
file.write(name + "," + driver.current_url + "," + ihtml + '\n')
return
except Exception as ex:
exc_type, exc_obj, exc_tb = sys.exc_info()
fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
print(exc_type, fname, exc_tb.tb_lineno)
driver.quit()
return
try:
base = "https://www.linkedin.com"
driver.get(base)
time.sleep(4)
driver.add_cookie({"name": "li_sugr", "value": "REPLACE_ME", "domain": "www.linkedin.com"}) # changes per user / session
driver.add_cookie({"name": "aam_uuid", "value": "REPLACE_ME", "domain": "www.linkedin.com"}) # changes per user / session
driver.add_cookie({"name": "timezone", "value": "REPLACE_ME", "domain": "www.linkedin.com"})
driver.add_cookie({"name": "_guid", "value": "REPLACE_ME", "domain": "www.linkedin.com"}) # changes per user / session
driver.add_cookie({"name": "lms_ads", "value": "REPLACE_ME", "domain": "www.linkedin.com"}) # changes per user / session
driver.add_cookie({"name": "lms_analytics", "value": "REPLACE_ME", "domain": "www.linkedin.com"}) # changes per user / session
driver.add_cookie({"name": "li_at", "value": "REPLACE_ME": "www.linkedin.com"}) # changes per user / session
driver.add_cookie({"name": "JSESSIONID", "value": "REPLACE_ME", "domain": "www.linkedin.com"}) # changes per user / session
driver.add_cookie({"name": "ln_or", "value": "REPLACE_ME", "domain": "www.linkedin.com"})
driver.add_cookie({"name": "li_theme", "value": "dark", "domain": "www.linkedin.com"})
driver.add_cookie({"name": "li_theme_set", "value": "user", "domain": "www.linkedin.com"})
driver.get(base + "/feed/")
time.sleep(4)
scrape(sys.argv[1])
driver.quit()
except Exception as ex:
exc_type, exc_obj, exc_tb = sys.exc_info()
fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
print(exc_type, fname, exc_tb.tb_lineno)
driver.quit()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment