Skip to content

Instantly share code, notes, and snippets.

@apocas
Last active November 26, 2017 12:05
Show Gist options
  • Save apocas/4583334 to your computer and use it in GitHub Desktop.
Save apocas/4583334 to your computer and use it in GitHub Desktop.
Node NPM registry crawler.
var jsdom = require('jsdom'),
request = require('request'),
url = require('url'),
npm = require("npm"),
redis = require("redis");
var client = redis.createClient();
var configObject = {
"dev": false,
"loglevel": "error"
};
getData(0);
function getData(j) {
request({
uri: 'https://npmjs.org/browse/all/' + j
}, function (err, response, body) {
if (err && response.statusCode !== 200) {
console.log('Request error.');
}
npm.load(configObject, function (er, npm) {
jsdom.env({
html: body,
scripts: ['http://code.jquery.com/jquery-1.6.min.js']
}, function (err, window) {
var $ = window.jQuery;
var $rows = $('body').find('.row');
if ($rows.length > 1) {
$rows.each(function (i, item) {
var $a = $(item).find('a');
var module = $a.html();
crawled(module);
});
getData(j + 1);
}
});
});
});
}
function crawled(module) {
client.sadd("crawled", module, function (err, idd) {
if (idd == 1) {
npm.commands.view([module], true, function (er, data2) {
if (data2 != undefined) {
var mm = data2[Object.keys(data2)[0]];
if (mm != undefined) {
var deps = mm.dependencies;
var module = mm.name;
if (deps != undefined && Object.keys(deps).length > 0) {
for(var i = 0; i < Object.keys(deps).length; i++) {
var lib = Object.keys(deps)[i];
console.log(lib + " <- " + module);
client.sadd(lib, module);
}
}
}
}
});
}
});
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment