public
Last active

Gets top three articles from frontpage and newpage of Hacker News. Blog post on scraping: http://blog.dtrejo.com/scraping-made-easy-with-jquery-and-selectorga

  • Download Gist
top3stories.js
JavaScript
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51
// Scraping Made Easy with jQuery and SelectorGadget
// (http://blog.dtrejo.com/scraping-made-easy-with-jquery-and-selectorga)
// by David Trejo
//
// Install node.js and npm:
// http://joyeur.com/2010/12/10/installing-node-and-npm/
// Then run
// npm install jsdom jquery http-agent
// node numresults.js
//
var util = require('util')
, url = require('url')
, httpAgent = require('http-agent')
, jsdom = require('jsdom').jsdom;
 
function printTop3(agent) {
var window = jsdom(agent.body).createWindow()
, $ = require('jquery').create(window);
//
// Now you can use jQuery to your heart's content!
//
var titles = $('.title a')
, points = $('.subtext span');
var printme = $.map(points, function(el, i) {
if (i < 3) {
return $(el).text() + '\t' + $(titles[i]).text();
}
});
console.log(printme.join('\n'));
}
 
var urls = ['', 'newest'];
var agent = httpAgent.create('news.ycombinator.com', urls);
console.log('Scraping', urls.length, 'pages from', agent.host);
 
agent.addListener('next', function (err, agent) {
printTop3(agent);
console.log();
agent.next();
});
 
agent.addListener('stop', function (err, agent) {
if (err) console.log(err);
console.log('All done!');
});
 
// Start scraping
agent.start();

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.