Skip to content

Instantly share code, notes, and snippets.

@koladilip
Created November 26, 2019 04:08
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save koladilip/279245da667de226f6b6bdae5242653f to your computer and use it in GitHub Desktop.
Save koladilip/279245da667de226f6b6bdae5242653f to your computer and use it in GitHub Desktop.
const puppeteer = require('puppeteer');
async function wait(timeInMills) {
return new Promise((resolve) => {
setTimeout(() => {
resolve();
}, timeInMills);
});
}
// Scrolls the page till new content is available
async function scrollPage(page) {
const currentContentLength = (await page.content()).length;
await page.evaluate(() => {
window.scrollBy(0, document.body.scrollHeight);
});
await wait(Math.max(5000, 10000 * Math.random()));
const nextContentLength = (await page.content()).length;
if (currentContentLength != nextContentLength) {
console.log("Scrolling page:", await page.url(), "for more content");
await scrollPage(page);
}
}
// Scrolls the page and gets the page content using PhantomJS
async function getPageData(pageUrl, shouldScrollPage) {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto(pageUrl);
if (shouldScrollPage) {
await scrollPage(page);
}
const pageContent = await page.content();
await page.close();
await browser.close();
return pageContent;
};
@ilhamsa1
Copy link

ilhamsa1 commented Aug 3, 2020

thank you

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment