Skip to content

Instantly share code, notes, and snippets.

@andreyserdjuk
Created November 8, 2016 19:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save andreyserdjuk/f1f23fe351612b471e011df6f5c045c5 to your computer and use it in GitHub Desktop.
Save andreyserdjuk/f1f23fe351612b471e011df6f5c045c5 to your computer and use it in GitHub Desktop.
cheerio parser example
const fetch = require('node-fetch');
const cheerio = require('cheerio');
const fs = require('fs');
let url = "http://loveread.ec/read_book.php?id=56443&p=";
let requests = [];
let startTime = new Date().getTime();
for (let counter = 1; counter < 36; ++counter) {
requests.push(
fetch(url + counter)
.then(res => {
return res.text();
})
.then(parseBody)
);
}
Promise
.all(requests)
.then(strings =>
fs.writeFileSync('out.txt', strings.join(''))
);
console.log('Scraped in %s seconds', (new Date().getTime() - startTime) / 1000);
function parseBody(body) {
let $ = cheerio.load(body);
return $('.MsoNormal').slice(0).text()
.replace(/^\s+Страница\s+/gm, '')
.replace(/\s+Страница\s+$/gm, '');
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment