Skip to content

Instantly share code, notes, and snippets.

@connor11528
Created August 22, 2015 18:43
Show Gist options
  • Save connor11528/a5ecaf56ceff25f4a4bc to your computer and use it in GitHub Desktop.
Save connor11528/a5ecaf56ceff25f4a4bc to your computer and use it in GitHub Desktop.
nutella-scraping-solution
var got = require('got');
var cheerio = require('cheerio');
var redditUrl = 'http://web.archive.org/web/20120216223019/http://www.reddit.com/r/science/';
var nextUrl = 'https://web.archive.org/web/20030910064848/http://www.ed.gov/index.jhtml';
got(redditUrl, function(err, html){
var $ = cheerio.load(html);
// logs out all readable text
// console.log($('body').text());
// logs out href of every a tag
// $('a').map(function(i, el){
// console.log($(el).attr('href'))
// });
// Collecting data solution
// $('.link').map(function (i, el) {
// el = $(el)
// var score = el.find('.score.unvoted')
// var a = el.find('a')
// var row = {
// score: score.text(),
// href: a.attr('href'),
// content: a.text()
// }
// console.log(row)
// });
// Outputting to CSV
var writer = require('format-data')('csv');
$('.link').map(function (i, el) {
el = $(el)
var score = el.find('.score.unvoted')
var a = el.find('a')
var row = {
score: score.text(),
href: a.attr('href'),
content: a.text()
}
writer.write(row);
});
// writes to file
// var fs = require('fs');
// var file = fs.writeFileStream('output.csv');
// writer.pipe(file);
// writes to standard out
writer.pipe(process.stdout)
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment