Skip to content

Instantly share code, notes, and snippets.

@leonardorifeli
Created October 19, 2020 04:15
Show Gist options
  • Save leonardorifeli/b1ae4c35f9b1edaca7f6d2db7d78a091 to your computer and use it in GitHub Desktop.
Save leonardorifeli/b1ae4c35f9b1edaca7f6d2db7d78a091 to your computer and use it in GitHub Desktop.
Puppeteer
const puppeteer = require('puppeteer');
class Jokes {
static async getBestData() {
const browser = await this.getBrowser();
const page = (await browser.pages())[0];
await this._definePageConfig(page);
console.info('Accessing www.piadas.com.br.');
await page.goto('https://www.piadas.com.br', {waitUntil: 'networkidle2'});
await page.waitForSelector('#menu-4009-1');
await page.click('#menu-4009-1 > a');
console.info('Accessing best jokes.');
await page.waitForSelector('.botao_branco > a')
const urls = await page.$$eval('.botao_branco > a', element => element.filter(link => link.href).map(link => link.href))
console.info('Getting best five authors by single joke url.');
let jokesData = [];
for (const url of urls.splice(0, 4)) {
let jokeData = await this.getData(url);
jokesData.push({
author: jokeData.author,
date: jokeData.date,
});
}
console.info('Finished: ');
await browser.close();
return jokesData;
}
static async getData(url) {
const browser = await this.getBrowser();
const page = (await browser.pages())[0];
await this._definePageConfig(page);
await page.goto(url);
const authorElement = await page.$('a[title="Ver perfil do usuário."]');
const author = await page.evaluate(element => element.textContent, authorElement);
const dateElement = await page.$('.field-name-post-date');
const date = await page.evaluate(element => element.textContent, dateElement);
await browser.close();
return {author, date};
}
static async getBrowser() {
return await puppeteer.launch({headless: true});
}
static async _definePageConfig(page) {
page.setViewport({width: 1280, height: 720});
await page.setRequestInterception(true);
page.on('request', (request) => {
if (['image', 'stylesheet', 'font', 'script'].indexOf(request.resourceType()) !== -1) {
request.abort();
} else {
request.continue();
}
});
}
}
const jokes = Jokes;
(async () => {
try {
let bestJokers = await jokes.getBestData();
bestJokers.forEach((bestJoker) => {
console.log(`- Author: ${bestJoker.author} published on ${bestJoker.date}`)
});
} catch (e) {
console.error(`Error to parse best jokers on www.piadas.com.br: ${e.message}`)
}
})();
@leonardorifeli
Copy link
Author

Result (on now):

Accessing www.piadas.com.br.
Accessing best jokes.
Getting best five authors by single joke url.
Finished.
- Author: anonimo published on 06/04/1999 - 08:30
- Author: anonimo published on 10/05/2007 - 12:31
- Author: residentjill published on 03/28/2010 - 22:22
- Author: anonimo published on 12/26/1999 - 00:00

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment