Skip to content

Instantly share code, notes, and snippets.

@kavanagh
Created October 2, 2012 01:19
Show Gist options
  • Save kavanagh/3815602 to your computer and use it in GitHub Desktop.
Save kavanagh/3815602 to your computer and use it in GitHub Desktop.
A very simple node script to spider JSON web services.
var request = require( 'request' ),
fs = require( 'fs' ),
util = require( 'util' );
var todo = process.argv.splice( 2 ),
processor = {
'/rest/path/foo/bar': {
'response.result.cid': function( str ) {
return str; //'http://domain.com/' + str + '.html';
}
}
};
function convertToUrl( str, jsonPath, uri ) {
if ( !str || !uri ) {
return;
}
if ( processor[uri.pathname] && processor[uri.pathname][jsonPath] ) {
return processor[uri.pathname][jsonPath]( str );
}
}
function visit( url ) {
if ( !url ) {
return;
}
request(url, function ( error, response, body ) {
if ( !error && response.headers['content-type'].toLowerCase().indexOf('application/json') === 0) {
util.puts( 'Processing JSON response: ' + response.request.href );
var filename = ( response.request.host + ':' + response.request.path ).replace( /\//g, ':' );
fs.writeFile(filename, response.body, function ( err ) {
traverse( JSON.parse(body), null, response.request.uri );
visit( todo.shift() );
if ( err ) throw err;
util.puts( 'Saved response: ' + filename );
});
} else {
if ( error ) {
console.error( 'HTTP ERROR: ' + error );
} else {
util.puts( 'Response is not JSON: ' + response.request.href );
}
visit( todo.shift() );
}
});
}
function traverse( obj, path, uri ) {
if ( !obj ) {
return;
}
if ( obj.constructor === Object ) {
path = path ? path + '.' : '';
var keys = Object.keys( obj );
if ( keys.length ) {
keys.forEach(function( val ){
traverse( this[val], path + val, uri );
}, obj);
}
} else if ( obj instanceof Array ) {
path = path ? path : '';
obj.forEach(function( element ){
traverse( element, path, uri );
});
} else if( typeof obj === 'string' ) {
var url = convertToUrl( obj, path, uri );
if ( url && /^http(s?)\:\/\//.test(url) ) {
util.print( 'Adding path to queue: "', path, '"' );
util.puts( url );
todo.push( url );
}
}
}
visit( todo.shift() );
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment