Skip to content

Instantly share code, notes, and snippets.

@Hyllesen
Created January 13, 2021 20:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Hyllesen/fa3716fa0e76c8bd38428b014f16c53a to your computer and use it in GitHub Desktop.
Save Hyllesen/fa3716fa0e76c8bd38428b014f16c53a to your computer and use it in GitHub Desktop.
const puppeteer = require("puppeteer");
const cheerio = require("cheerio");
async function scrapeListings(page, url) {
await page.goto(url);
const currentUrl = await page.url();
if (currentUrl !== url) {
console.log(currentUrl);
console.log(url);
console.log("We've reached the end!");
}
const html = await page.content();
const $ = cheerio.load(html);
const listings = $(".data-table__value a")
.map((index, element) => {
const title = $(element).text().trim();
const job_url = $(element).attr("href");
const base_url = "https://www.payscale.com";
const url = base_url + job_url;
const company = "hsbc";
const source = "payscale";
return { company, title, source, url };
})
.get();
return listings;
}
async function acceptCookies(page) {
const selector = ".privacy-modal__buttons > button";
await page.click(selector);
await page.waitForSelector(selector, { hidden: true });
}
async function main() {
const browser = await puppeteer.launch({ headless: false });
let [page] = await browser.pages();
await page.goto("https://www.payscale.com/research/UK/Employer=HSBC/Salary/");
await acceptCookies(page);
const $ = cheerio.load(await page.content());
for (let pageNumber = 1; pageNumber < 999; pageNumber++) {
const url =
"https://www.payscale.com/research/UK/Employer=HSBC/Salary/Page-" +
pageNumber;
const listings = await scrapeListings(page, url);
console.log(listings);
}
}
main();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment