Forked from adrianhorning08/scrapeLinkedinSearch.js
Created
January 13, 2024 23:39
-
-
Save mals14/7359f7d696a1c3429a6e221886cd1ea5 to your computer and use it in GitHub Desktop.
Scrape Linkedin Search
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
async function scrapeLinkedinSearch() { | |
let allProfiles = []; | |
let page = 1; | |
window.scrollTo(0, document.body.scrollHeight); | |
await new Promise((resolve) => setTimeout(resolve, 500)); | |
// find the button aria-label="Next" | |
let nextButton = document.querySelector('button[aria-label="Next"]'); | |
const peeps = getProfiles(); | |
allProfiles.push(...peeps); | |
// is the nextButton disabled? | |
let isNextButtonDisabled = nextButton.disabled; | |
while (!isNextButtonDisabled) { | |
console.log("page", page); | |
let randomDelay = Math.floor(Math.random() * 3000) + 1000; | |
console.log(`Waiting ${randomDelay}ms so you don't get flagged 🤪`); | |
console.log( | |
`If you need anything else scraped, email me: adrian@thewebscrapingguy.com` | |
); | |
await new Promise((resolve) => setTimeout(resolve, randomDelay)); | |
window.scrollTo(0, document.body.scrollHeight); | |
await new Promise((resolve) => setTimeout(resolve, 900)); | |
const people = getProfiles(); | |
allProfiles.push(...people); | |
nextButton = document.querySelector('button[aria-label="Next"]'); | |
isNextButtonDisabled = nextButton.disabled; | |
nextButton.click(); | |
page++; | |
} | |
// dedupe the profiles | |
allProfiles = allProfiles.filter( | |
(v, i, a) => a.findIndex((t) => t.linkedinId === v.linkedinId) === i | |
); | |
console.log( | |
`Congrats! 🎉 You just scraped ${allProfiles.length} profiles! If you want more leads, or want anything else scraped, email me: adrian@thewebscrapingguy.com` | |
); | |
const ts = new Date().toISOString(); | |
const fileName = "linkedin-profiles-" + ts + ".csv"; | |
convertJsonToCsvAndDownload(allProfiles, fileName); | |
} | |
function convertJsonToCsvAndDownload(jsonData, fileName) { | |
// Convert JSON to CSV | |
const csvData = []; | |
// Extract the headers | |
const headers = Object.keys(jsonData[0]); | |
csvData.push(headers.join(",")); | |
jsonData.forEach((item) => { | |
const row = []; | |
for (const key in item) { | |
if (item.hasOwnProperty(key)) { | |
const value = item[key].includes(",") ? `"${item[key]}"` : item[key]; | |
row.push(value); | |
} | |
} | |
csvData.push(row.join(",")); | |
}); | |
// Create a Blob containing the CSV data | |
const csvBlob = new Blob([csvData.join("\n")], { | |
type: "text/csv;charset=utf-8", | |
}); | |
// Create a URL for the Blob | |
const csvUrl = URL.createObjectURL(csvBlob); | |
// Create a link element | |
const link = document.createElement("a"); | |
link.href = csvUrl; | |
link.target = "_blank"; | |
link.download = fileName; | |
// Append the link to the body | |
document.body.appendChild(link); | |
// Trigger a click event on the link | |
link.click(); | |
// Remove the link and revoke the Blob URL | |
document.body.removeChild(link); | |
URL.revokeObjectURL(csvUrl); | |
} | |
function getProfiles() { | |
const allPeeps = []; | |
const listOfProfiles = document.querySelectorAll(".entity-result"); | |
for (let i = 0; i < listOfProfiles.length; i++) { | |
const el = listOfProfiles[i]; | |
const spanElement = el.querySelector(".entity-result__title-text"); | |
const linkedinId = el | |
.getAttribute("data-chameleon-result-urn") | |
?.split(":")?.[3]; | |
// Extract the person's name | |
let name = spanElement | |
.querySelector('span[aria-hidden="true"]') | |
.textContent.trim(); | |
const textRegex = /[A-Za-z0-9\s]+/g; | |
const textMatches = name?.match(textRegex); | |
if (textMatches) { | |
// Join the matches to get the extracted text | |
const extractedText = textMatches.join(""); | |
name = extractedText.trim(); | |
} | |
const title = el | |
.querySelector(".entity-result__primary-subtitle") | |
.textContent.trim(); | |
const location = el | |
.querySelector(".entity-result__secondary-subtitle") | |
.textContent.trim(); | |
// Extract the LinkedIn profile URL | |
const linkedinProfileUrl = spanElement | |
.querySelector("a.app-aware-link") | |
.getAttribute("href"); | |
allPeeps.push({ | |
linkedinId, | |
name, | |
title, | |
location, | |
url: linkedinProfileUrl?.split("?")?.[0], | |
}); | |
} | |
console.log(`Found ${allPeeps.length} profiles!`); | |
return allPeeps; | |
} | |
await scrapeLinkedinSearch(); | |
// if you need anything scraped, email me: adrian@thewebscrapingguy.com 🤘 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment