Skip to content

Instantly share code, notes, and snippets.

@findscode
Created June 9, 2019 21:43
Show Gist options
  • Save findscode/a72b141b8418c4cda1a9d9c37b9e44e0 to your computer and use it in GitHub Desktop.
Save findscode/a72b141b8418c4cda1a9d9c37b9e44e0 to your computer and use it in GitHub Desktop.
Scrap Facebook events using puppeteer.js (now with infinite scrolling)
const puppeteer = require("puppeteer");
(async () => {
const autoScroll = async (page) => {
await page.evaluate(async () => {
await new Promise((resolve, reject) => {
let totalHeight = 0;
const distance = 100;
const timer = setInterval(async () => {
const scrollHeight = document.body.scrollHeight;
window.scrollBy(0, distance);
totalHeight += distance;
if (totalHeight >= scrollHeight) {
if (document.querySelector([role="progressbar"])) {
const currentHeight = document.body.scrollHeight;
await page.waitForFunction(() => !document.querySelector([role="progressbar"]), { polling: "mutation" });
await page.waitForFunction((currentHeight) => document.body.scrollHeight > currentHeight + 200, currentHeight);
} else {
clearInterval(timer);
resolve();
}
}
}, 500);
});
});
}
try {
const browser = await puppeteer.launch({
args: ["--no-sandbox", "--disable-setuid-sandbox"],
headless: true,
timeout: 600000
});
const page = await browser.newPage();
await page.goto("https://www.facebook.com/events/discovery/?suggestion_token=%7B%22city%22%3A%22107677462599905%22%2C%22time%22%3A%22tomorrow%22%2C%22timezone%22%3A%22Europe%2FMinsk%22%7D&acontext=%7B%22ref%22%3A51%2C%22source%22%3A2%2C%22source_dashboard_filter%22%3A%22discovery%22%2C%22action_history%22%3A%22[%7B%5C%22surface%5C%22%3A%5C%22discover_filter_list%5C%22%2C%5C%22mechanism%5C%22%3A%5C%22surface%5C%22%2C%5C%22extra_data%5C%22%3A%7B%5C%22dashboard_filter%5C%22%3A%5C%22discovery%5C%22%7D%7D%2C%7B%5C%22surface%5C%22%3A%5C%22discover_filter_list%5C%22%2C%5C%22mechanism%5C%22%3A%5C%22surface%5C%22%2C%5C%22extra_data%5C%22%3A%7B%5C%22dashboard_filter%5C%22%3A%5C%22discovery%5C%22%7D%7D%2C%7B%5C%22surface%5C%22%3A%5C%22discover_filter_list%5C%22%2C%5C%22mechanism%5C%22%3A%5C%22surface%5C%22%2C%5C%22extra_data%5C%22%3A%7B%5C%22dashboard_filter%5C%22%3A%5C%22discovery%5C%22%7D%7D]%22%2C%22has_source%22%3Atrue%7D");
await page.setViewport({
width: 1200,
height: 500
});
await page.waitForSelector("div.clearfix > div > div > div > div > div > a");
await autoScroll(page);
const links = await page.evaluate(() => {
const elements = Array.from(document.querySelectorAll("div.clearfix > div > div > div > div > div > a"));
return elements.map(element => element.href);
});
console.log(links);
browser.close();
} catch (error) {
console.log(error);
}
})();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment