Skip to content

Instantly share code, notes, and snippets.

@anilkilic
Created May 14, 2024 09:27
Show Gist options
  • Save anilkilic/53044d08962b0c6c4a03b2baa909268e to your computer and use it in GitHub Desktop.
Save anilkilic/53044d08962b0c6c4a03b2baa909268e to your computer and use it in GitHub Desktop.
LinkedIn Scraper
from voyager import Voyager
def main():
voyager = Voyager()
sample_job_id = 3785477367
job = voyager.get_job(sample_job_id)
print(f"{job = }")
print("*"*40)
job_clean = voyager.get_job(sample_job_id, clean=True)
print(f"{job_clean = }")
print("*"*40)
jobs_clean = voyager.get_jobs(list(range(sample_job_id - 100, sample_job_id)), clean=True)
print(f"{jobs_clean = }")
print("*"*40)
if __name__ == "__main__":
main()
# NOTE: Fill the li_at variable at line 46
import requests
class Voyager:
def __init__(self):
self.session = self._create_session()
self.url = "https://www.linkedin.com/voyager/api/voyagerJobsJobPostings/"
self.keys = [
"jobPostingUrl",
"title",
"formattedLocation",
"formattedExperienceLevel",
"formattedEmploymentStatus",
"formattedJobFunctions",
"formattedIndustries",
"formattedSalaryDescription",
"benefits",
"localizedCostPerApplicantChargeableRegion",
"workRemoteAllowed",
#"originalListedAt",
"views",
#"applies",
"applyMethod"
]
def get_job(self, job_id, clean=False):
r = self.session.get(self.url + str(job_id))
if clean:
return {key: r.json().get(key) for key in self.keys}
return r.json()
def get_jobs(self, job_ids, clean=False):
ids = ",".join([str(job_id) for job_id in job_ids])
r = self.session.get(f"{self.url}?ids=List({ids})")
if clean:
return [{key: item.get(key) for key in self.keys} for item in r.json().get("results").values()]
return r.json()
def _create_session(self):
token = "1251184134771286165"
li_at = "" # Get this cookie after you logged in to linkedin.
headers = {
"User-Agent": "LinkedIn/9.28.7219 CFNetwork/1410.0.3 Darwin/22.6.0",
"Accept": "application/json",
"Csrf-Token": f"ajax:{token}",
"Cookie": f'JSESSIONID="ajax:{token}"; li_at={li_at}'
}
session = requests.Session()
session.headers.update(headers)
return session
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment