Skip to content

Instantly share code, notes, and snippets.

@jamessouth
Last active May 1, 2020 20:12
Show Gist options
  • Save jamessouth/23bb240dadc09a9c3c6cbce2f11ba90c to your computer and use it in GitHub Desktop.
Save jamessouth/23bb240dadc09a9c3c6cbce2f11ba90c to your computer and use it in GitHub Desktop.
Node script to search npmjs.com for packages that share one or more given packages as dependencies, i.e. what packages have x, y, and z as dependencies?
const https = require('https');
const { Transform } = require('stream');
//each package listed as depending on another is in its own <section>
const breakIntoSections = () => new Transform({
transform(ch, _, cb) {
const rows = ((this.partialRow || '') + ch.toString()).split(/<\/section>/);
this.partialRow = rows.pop();
for (const row of rows) {
this.push(`${row}\n`);
}
cb();
},
flush(cb) {
this.push(this.partialRow || '');
cb();
},
});
//each <section> has one <h3> with the package name
const getH3s = () => new Transform({
transform(ch, _, cb) {
const str = ch.toString();
if (/<h3.+h3>/g.test(str)) {
const newstr = str.match(/<h3.+h3>/g, '');
this.push(`${newstr}\n`);
}
cb();
},
});
//get package name from <h3>
const getPackageNames = (arr, resolve) => new Transform({
transform(ch, _, cb) {
const str = ch.toString();
const newstr = str.match(/>.+(?=<\/h3)/g, '')[0].slice(1);
if (!newstr.startsWith('Help')) {
arr.push(newstr);
}
cb();
},
flush(cb) {
resolve(arr);
cb();
},
});
//stream web page of packages that depend on the given package
async function getData(offset, arr, pkg) {
return new Promise((resolve) => {
setTimeout(() => {
https.get(`https://www.npmjs.com/browse/depended/${pkg}?offset=${offset}`, (chunks) => chunks
.pipe(breakIntoSections())
.pipe(getH3s())
.pipe(getPackageNames(arr, resolve)));
}, process.argv.slice(2).length * 1000); //if not slowed the calls are fired off as quickly as possible (about 5/second)
//which seems to exceed the rate limit and the script exits early, plus regular navigation is curtailed. 1 second/package
//searched is arbitrary but seems to space requests out enough that the server is not bothered
});
}
//get array of packages that depend on the given package, i.e. what packages use terser as a regular (not dev) dependency?
async function getDepArray(arr, pkg) {
return new Promise(async (resolve) => {
let data;
let offset = 0;
do {
data = await getData(offset, [], pkg);
arr.push(data);
offset += 36; //36 packages are listed per page
} while (data.length > 0);
resolve(arr);
});
}
//kick off search in parallel, returns array of array(each package) of array(each page), which will be flattened to array of array
async function main(arr) {
return Promise.all(arr.map((pkg) => getDepArray([], pkg)));
}
//reduce array of array down to one array of package names that share all the given packages as dependencies
function getSharedDeps(arr) {
return arr.reduce((acc, cur) => acc.filter((e) => cur.includes(e)));
}
//start
main(process.argv.slice(2)) //call with node dep-check.js package1 package2 package3...
.then((arr) => arr.map((subArr) => subArr.flat())) //array of array of array -> array of array
.then((results) => console.log('results: ', getSharedDeps(results)));
@jamessouth
Copy link
Author

There doesn't seem to be a public API for this data. I tried libraries.io but they include dev dependencies and there didn't seem to be a way to exclude them.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment