Skip to content

Instantly share code, notes, and snippets.

@pbowyer
Created July 17, 2011 20:07
Show Gist options
  • Save pbowyer/1088002 to your computer and use it in GitHub Desktop.
Save pbowyer/1088002 to your computer and use it in GitHub Desktop.
Sample (non-working) node.io scraper
var nodeio = require('node.io');
exports.job = new nodeio.Job({max: 1, retries: 1, auto_retry: false, jsdom: true }, {
input: ["http://www.nuffieldtheatre.co.uk/events/category/C81/"],
run: function (search_page) {
this.getHtml(search_page, function(err, $) {
//Handle any request / parsing errors
if (err) this.exit(err);
console.log($('.listing h2'));
next_link = $('.paginate_links a:contains("Next")')[0].href || false;
if (next_link) {
console.log("Next Page: "+next_link);
this.add(next_link);
}
//Scrape projects on the page and emit
var projects = [];
$('.listing').each(function (i, listing) {
var project = {};
project.date = $('.date', listing).text().trim();
// To make this work with node.io, comment out the line above
// and uncomment the line below
//project.date = $(listing).find('.date').text().trim();
projects.push(project);
});
this.emit(projects);
});
}
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment