Skip to content

Instantly share code, notes, and snippets.

@mpaccione
Created April 2, 2021 19:00
Show Gist options
  • Save mpaccione/fed996e7a4b1d935ba59d7caf6b6995d to your computer and use it in GitHub Desktop.
Save mpaccione/fed996e7a4b1d935ba59d7caf6b6995d to your computer and use it in GitHub Desktop.
recursiveScrape
async function scrapeRecruiters(page) {
const recruiterJSON = [];
async function recursiveScrape(jobCountParam) {
try {
let jobCount = jobCountParam;
console.log({ jobCount });
if (jobCountParam === jobsJSON.length - 1) {
writeJobsToCSV(recruiterJSON);
} else {
try {
// go to company page and wait for it to finish loading
await page.goto(`${jobsJSON[jobCount].url}`);
await new Promise((resolve) => setTimeout(resolve, 4000));
const currentUrl = await page.evaluate(() => {
return window.location.href;
});
await page.goto(`${currentUrl}/people/`);
await new Promise((resolve) => setTimeout(resolve, 4000));
// Click People Search Field "
await page.waitForSelector("#people-search-keywords");
await page.click("#people-search-keywords");
} catch (error) {
console.error(error);
await new Promise((resolve) => setTimeout(resolve, 4000));
// Recursion
recursiveScrape(jobCount);
}
// Find Recruiters
await page.keyboard.type("Recruiter");
await new Promise((resolve) => setTimeout(resolve, 1000));
const recruiterURLs = await page.$$eval(
".artdeco-entity-lockup__image",
(elements) => {
// Return the top 3 links
return elements.map((element, index) => {
if (index < 4 && index > 0) {
// Skip First Index - Company Logo
return element.children[0].href;
}
});
}
);
// Create Recruiter Obj and Push to Recruiter JSON
const { job, company, url } = jobsJSON[jobCount];
const recruiters = recruiterURLs.filter((recruiter) => {
if (recruiter !== null) {
return recruiter;
}
});
recruiterJSON.push({
job,
company,
url,
recruiter1: recruiters[0] ? recruiters[0] : "None",
recruiter2: recruiters[1] ? recruiters[1] : "None",
recruiter3: recruiters[2] ? recruiters[2] : "None",
contacted: "",
responded: "",
interviewed: "",
offered: "",
});
// Recursion
jobCount++;
recursiveScrape(jobCount);
}
} catch (error) {
console.error(error);
await new Promise((resolve) => setTimeout(resolve, 4000));
// Recursion
recursiveScrape(jobCount);
}
}
recursiveScrape(0);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment