Skip to content

Instantly share code, notes, and snippets.

@adrianhorning08
Created September 10, 2023 19:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save adrianhorning08/a7cabfcc52ffcbe34c463d80f8e4e4e5 to your computer and use it in GitHub Desktop.
Save adrianhorning08/a7cabfcc52ffcbe34c463d80f8e4e4e5 to your computer and use it in GitHub Desktop.
Apollo Scraper
function clickAccessEmailIfAvailable() {
const tbodys = document.querySelectorAll(
".finder-results-list-panel-content table tbody"
);
for (let i = 0; i < tbodys.length; i++) {
const tbody = tbodys[i];
const buttons = tbody.querySelectorAll("button");
// Create an array to store buttons with text including "Access Email"
const filteredButtons = [];
// Loop through the selected buttons and filter them
buttons.forEach((button) => {
if (button.textContent.includes("Access Email")) {
filteredButtons.push(button);
}
});
if (filteredButtons.length === 1) {
filteredButtons[0].click();
}
}
}
function scrapePeople() {
const people = [];
const tbodys = document.querySelectorAll(
".finder-results-list-panel-content table tbody"
);
for (let i = 0; i < tbodys.length; i++) {
const tbody = tbodys[i];
const name = tbody.querySelector("tr td:nth-child(1)").textContent;
const firstName = name?.split(" ")?.[0];
const lastName = name?.split(" ")?.[1];
const title = tbody.querySelector("tr td:nth-child(2)").textContent;
const company = tbody.querySelector(
"tr td:nth-child(3) > span > div > div"
).textContent;
const location = tbody.querySelector("tr td:nth-child(5)").textContent;
const employeeCount = tbody.querySelector("tr td:nth-child(6)").textContent;
const email = tbody.querySelector("tr td:nth-child(7)").textContent;
const industry = tbody.querySelector("tr td:nth-child(8)").textContent;
const keywords = tbody.querySelector("tr td:nth-child(9)").textContent;
people.push({
name,
firstName,
lastName,
title,
company,
location,
employeeCount,
email,
industry,
keywords,
});
}
console.log("people", people);
return people;
}
function createCSV(jsonData, fileName) {
// Convert JSON to CSV
const csvData = [];
// Extract the headers
const headers = Object.keys(jsonData[0]);
csvData.push(headers.join(","));
jsonData.forEach((item) => {
const row = [];
for (const key in item) {
if (item.hasOwnProperty(key)) {
const value = item[key]?.includes(",") ? `"${item[key]}"` : item[key];
row.push(value);
}
}
csvData.push(row.join(","));
});
// Create a Blob containing the CSV data
const csvBlob = new Blob([csvData.join("\n")], {
type: "text/csv;charset=utf-8",
});
// Create a URL for the Blob
const csvUrl = URL.createObjectURL(csvBlob);
// Create a link element
const link = document.createElement("a");
link.href = csvUrl;
link.target = "_blank";
link.download = fileName;
// Append the link to the body
document.body.appendChild(link);
// Trigger a click event on the link
link.click();
// Remove the link and revoke the Blob URL
document.body.removeChild(link);
URL.revokeObjectURL(csvUrl);
}
async function scrapeApollo() {
const people = [];
let page = 1;
let nextButton = document.querySelector("button[aria-label='right-arrow']");
let disabled = nextButton.disabled;
while (!disabled) {
console.log(
`Scraping page ${page}. If you need anything else web scraped, email me: adrian@thewebscrapingguy.com`
);
const peeps = scrapePeople();
people.push(...peeps);
nextButton = document.querySelector("button[aria-label='right-arrow']");
disabled = nextButton.disabled;
nextButton.click();
await new Promise((resolve) => setTimeout(resolve, 2000));
page++;
}
console.log(
`Congrats! 🎉 You just scraped ${people.length} profiles! If you want more leads, or want anything else scraped, email me: adrian@thewebscrapingguy.com`
);
createCSV(people, `apollo_${new Date().getTime()}.csv`);
}
await scrapeApollo();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment