Skip to content

Instantly share code, notes, and snippets.

@handloomweaver
Forked from DTrejo/top3stories.js
Created October 27, 2011 15:14
Show Gist options
  • Save handloomweaver/1319833 to your computer and use it in GitHub Desktop.
Save handloomweaver/1319833 to your computer and use it in GitHub Desktop.
Gets top three articles from frontpage and newpage of Hacker News. Blog post on scraping: http://blog.dtrejo.com/scraping-made-easy-with-jquery-and-selectorga
// Scraping Made Easy with jQuery and SelectorGadget
// (http://blog.dtrejo.com/scraping-made-easy-with-jquery-and-selectorga)
// by David Trejo
//
// Install node.js and npm:
// http://joyeur.com/2010/12/10/installing-node-and-npm/
// Then run
// npm install jsdom jquery http-agent
// node numresults.js
//
var util = require('util')
, url = require('url')
, httpAgent = require('http-agent')
, jsdom = require('jsdom').jsdom;
function printTop3(agent) {
var window = jsdom(agent.body).createWindow()
, $ = require('jquery').create(window);
//
// Now you can use jQuery to your heart's content!
//
var titles = $('.title a')
, points = $('.subtext span');
var printme = $.map(points, function(el, i) {
if (i < 3) {
return $(el).text() + '\t' + $(titles[i]).text();
}
});
console.log(printme.join('\n'));
}
var urls = ['', 'newest'];
var agent = httpAgent.create('news.ycombinator.com', urls);
console.log('Scraping', urls.length, 'pages from', agent.host);
agent.addListener('next', function (err, agent) {
printTop3(agent);
console.log();
agent.next();
});
agent.addListener('stop', function (err, agent) {
if (err) console.log(err);
console.log('All done!');
});
// Start scraping
agent.start();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment