Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
require('events').EventEmitter.defaultMaxListeners = 100;
const cheerio = require('cheerio');
const request = require('request');
const fs = require('fs');
var interval = 500;
let data = new Array;
request({
method: 'GET',
url: 'https://www.npr.org/'
}, (err, res, body) => {
if (err) return console.error(err);
let $ = cheerio.load(body);
// Iterate over the nav links
$('li.submenu__item > a').each(function(index, elem) {
setTimeout(function () {
// Iterate over each post on the page
$('div.imagewrap > a').each(function(index, elem) {
setTimeout(function () {
let link = $(elem).attr('href');
request({
method: 'GET',
url: link
}, (err, res, body) => {
if (err) return console.error(err);
let $ = cheerio.load(body);
let post = {
title: $('h1').text(),
url: link,
date: $('.date').text(),
slug: $('h3.slug > a').text(),
author: $('.byline__name > a:nth-child(1)').text(),
img: $('#storytext > div > div > img').attr('src'),
body:$('#storytext > p').map(function(i, el) { return $(this).text(); }).get().join(' ')
}
data.push(post)
})
fs.writeFile("log.json", JSON.stringify(data), function(err) {
if (err) return console.error(err);
console.log("Anoter post saved!");
});
}, index * interval);
});
}, index * interval);
})
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment