Skip to content

Instantly share code, notes, and snippets.

@haveaguess
Created October 4, 2013 21:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save haveaguess/6833379 to your computer and use it in GitHub Desktop.
Save haveaguess/6833379 to your computer and use it in GitHub Desktop.
sdf
var log = require("./lib/debug.js");
var Crawler = require("crawler").Crawler;
var c = new Crawler({
"maxConnections":10,
// Global callback
// This will be called for each crawled page
"callback":function(error,result,$) {
// $ is a jQuery instance scoped to the server-side DOM of the page
$("a").each(function(index,a) {
// c.queue(a.href);
console.log(a.href);
});
}
});
// Queue just one URL, with default callback
// c.queue("http://codinginmysleep.com");
// Queue a list of URLs
// c.queue(["http://jamendo.com/","http://tedxparis.com"]);
// Queue URLs with custom callbacks & parameters
function queueToDepth(uri, depth, callback) {
var crawl = {
"uri":uri,
"jQuery":true,
// The global callback won't be called
"callback":function(error,result) {
//callback
callback(depth, error, result);
//recurse
if (depth > 1) {
queueToDepth(uri, depth - 1, callback);
}
}
};
c.queue([crawl]);
}
function main() {
var urlDepth = 1;
var uri = "http://www.longshanks-consulting.com/";
queueToDepth(uri, urlDepth, function(depth, error, result) {
if (error) {
log.error("rer" , result);
log.error("err" , error);
log.error("depth", depth);
log.error("uri", uri);
return;
}
console.log("Grabbed (depth " + (urlDepth-depth) + ") ",result.body.length,"bytes");
});
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment