Created
August 22, 2015 18:43
-
-
Save connor11528/a5ecaf56ceff25f4a4bc to your computer and use it in GitHub Desktop.
nutella-scraping-solution
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var got = require('got'); | |
var cheerio = require('cheerio'); | |
var redditUrl = 'http://web.archive.org/web/20120216223019/http://www.reddit.com/r/science/'; | |
var nextUrl = 'https://web.archive.org/web/20030910064848/http://www.ed.gov/index.jhtml'; | |
got(redditUrl, function(err, html){ | |
var $ = cheerio.load(html); | |
// logs out all readable text | |
// console.log($('body').text()); | |
// logs out href of every a tag | |
// $('a').map(function(i, el){ | |
// console.log($(el).attr('href')) | |
// }); | |
// Collecting data solution | |
// $('.link').map(function (i, el) { | |
// el = $(el) | |
// var score = el.find('.score.unvoted') | |
// var a = el.find('a') | |
// var row = { | |
// score: score.text(), | |
// href: a.attr('href'), | |
// content: a.text() | |
// } | |
// console.log(row) | |
// }); | |
// Outputting to CSV | |
var writer = require('format-data')('csv'); | |
$('.link').map(function (i, el) { | |
el = $(el) | |
var score = el.find('.score.unvoted') | |
var a = el.find('a') | |
var row = { | |
score: score.text(), | |
href: a.attr('href'), | |
content: a.text() | |
} | |
writer.write(row); | |
}); | |
// writes to file | |
// var fs = require('fs'); | |
// var file = fs.writeFileStream('output.csv'); | |
// writer.pipe(file); | |
// writes to standard out | |
writer.pipe(process.stdout) | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment