Skip to content

Instantly share code, notes, and snippets.

@nijikokun
Last active January 26, 2016 23:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nijikokun/2f1f16325f8ffe14b1b3 to your computer and use it in GitHub Desktop.
Save nijikokun/2f1f16325f8ffe14b1b3 to your computer and use it in GitHub Desktop.
NPM V3 read-package-tree cache POC

Extremely quick POC caching for NPM v3

Warning

This file is a proof-of-concept, and is not at all a reliable patch. This simply tests whether it would be faster (for HDD / CPU) to cache all package.json lookups in the read-package-tree module versus reloading every time.

From my findings it goes for me personally on a project with (80 modules) from 20 minutes to 3 seconds

Notes

This creates a file under cache/.<folder-name>.json for the cache, it is extremely large, please refrain from opening in a text editor, you have been warned.

This changes npm install without specifying modules, for example:

$ npm install

Will do an entire re-install instead of a difference (which is what I didn't look into solving)

Ideas

  • Use sqlite or a comparison algo, or anything better tbh, this is super inefficient way of solving it, but serves its purpose for a proof-of-concept
  • Properly diff root package.json against a cached variant to do fix the above problem
  • Delete entries that were not looked up
var fs = require('fs')
var rpj = require('read-package-json')
var path = require('path')
var dz = require('dezalgo')
var once = require('once')
var readdir = require('readdir-scoped-modules')
var debug = require('debuglog')('rpt')
var subcache = {}
function dpath (p) {
if (!p) return ''
if (p.indexOf(process.cwd()) === 0) {
p = p.substr(process.cwd().length + 1)
}
return p
}
module.exports = rpt
rpt.Node = Node
rpt.Link = Link
var ID = 0
function Node (pkg, logical, physical, er, cache) {
if (cache[physical]) return cache[physical]
if (!(this instanceof Node)) {
return new Node(pkg, logical, physical, er, cache)
}
cache[physical] = this
debug(this.constructor.name, dpath(physical), pkg && pkg._id)
this.id = ID++
this.package = pkg || {}
this.path = logical
this.realpath = physical
this.parent = null
this.isLink = false
this.children = []
this.error = er
}
Node.prototype.package = null
Node.prototype.path = ''
Node.prototype.realpath = ''
Node.prototype.children = null
Node.prototype.error = null
function Link (pkg, logical, physical, realpath, er, cache) {
if (cache[physical]) return cache[physical]
if (!(this instanceof Link)) {
return new Link(pkg, logical, physical, realpath, er, cache)
}
cache[physical] = this
debug(this.constructor.name, dpath(physical), pkg && pkg._id)
this.id = ID++
this.path = logical
this.realpath = realpath
this.package = pkg || {}
this.parent = null
this.target = new Node(this.package, logical, realpath, er, cache)
this.isLink = true
this.children = this.target.children
this.error = er
}
Link.prototype = Object.create(Node.prototype, {
constructor: { value: Link }
})
Link.prototype.target = null
Link.prototype.realpath = ''
function loadNode (logical, physical, externalCache, internalCache, cb) {
debug('loadNode', dpath(logical))
fs.realpath(physical, function (er, real) {
if (er) return cb(er)
debug('realpath l=%j p=%j real=%j', dpath(logical), dpath(physical), dpath(real))
var pj = path.resolve(real, 'package.json')
var ce = externalCache[pj]
fs.stat(pj, function (er, stat) {
if (ce) {
ce.lookup = true
if (stat) {
if (stat.ctime > ce.ctime) {
return readNode(logical, physical, real, externalCache, internalCache, pj, cb)
}
}
return handleNodePkg(logical, physical, real, externalCache, internalCache, pj, null, ce.data, cb)
}
externalCache[pj] = {
ctime: stat ? stat.ctime : null,
lookup: true,
data: null
}
return readNode(logical, physical, real, externalCache, internalCache, pj, cb)
})
})
}
function readNode (logical, physical, real, externalCache, internalCache, pj, cb) {
rpj(pj, function (er, pkg) {
pkg = pkg || null
return handleNodePkg(logical, physical, real, externalCache, internalCache, pj, er, pkg, cb)
})
}
function handleNodePkg (logical, physical, real, externalCache, internalCache, pj, er, pkg, cb) {
var node
externalCache[pj].data = pkg
if (physical === real) {
node = new Node(pkg, logical, physical, er, internalCache)
} else {
node = new Link(pkg, logical, physical, real, er, internalCache)
}
cb(null, node)
}
function loadChildren (node, externalCache, internalCache, filterWith, cb) {
debug('loadChildren', dpath(node.path))
// don't let it be called more than once
cb = once(cb)
var nm = path.resolve(node.path, 'node_modules')
readdir(nm, function (er, kids) {
// If there are no children, that's fine, just return
if (er) return cb(null, node)
kids = kids.filter(function (kid) {
return kid[0] !== '.' && (!filterWith || filterWith(node, kid))
})
var l = kids . length
if (l === 0) return cb(null, node)
kids.forEach(function (kid) {
var kidPath = path.resolve(nm, kid)
var kidRealPath = path.resolve(node.realpath,'node_modules',kid)
loadNode(kidPath, kidRealPath, externalCache, internalCache, then)
})
function then (er, kid) {
if (er) return cb(er)
node.children.push(kid)
kid.parent = node
if (--l === 0) {
sortChildren(node)
return cb(null, node)
}
}
})
}
function sortChildren (node) {
node.children = node.children.sort(function (a, b) {
a = a.package.name ? a.package.name.toLowerCase() : a.path
b = b.package.name ? b.package.name.toLowerCase() : b.path
return a > b ? 1 : -1
})
}
function loadTree (node, did, externalCache, internalCache, filterWith, cb) {
debug('loadTree', dpath(node.path), !!internalCache[node.path], !!externalCache[node.path])
if (did[node.realpath]) {
return dz(cb)(null, node)
}
did[node.realpath] = true
cb = once(cb)
loadChildren(node, externalCache, internalCache, filterWith, function (er, node) {
if (er) return cb(er)
var kids = node.children.filter(function (kid) {
return !did[kid.realpath]
})
var l = kids.length
if (l === 0) return cb(null, node)
kids.forEach(function (kid, index) {
loadTree(kid, did, externalCache, internalCache, filterWith, then)
})
function then (er, kid) {
if (er) return cb(er)
if (--l === 0) cb(null, node)
}
})
}
function rpt (root, filterWith, cb) {
if (!cb) {
cb = filterWith
filterWith = null
}
fs.stat('cache/', function (er, stat) {
if (er && er.code === 'ENOENT') {
fs.mkdir('cache/', function (er) {
if (er) return cb(er)
return rptc(root, filename, filterWith, cb)
})
}
return rptc(root, filterWith, cb)
})
}
function rptc (root, filterWith, cb) {
var filename = 'cache/.' + root.split('/').pop() + '.json'
fs.stat(filename, function (er, stat) {
if (er && er.code === 'ENOENT') {
return rpt_(root, filename, {}, filterWith, cb)
}
fs.readFile(filename, function (er, data) {
if (er) return cb(er)
return rpt_(root, filename, JSON.parse(data), filterWith, cb)
})
})
}
function rpt_ (root, externalCacheName, externalCache, filterWith, cb) {
fs.realpath(root, function (er, realRoot) {
if (er) return cb(er)
debug('rpt', dpath(realRoot))
var internalCache = Object.create(null)
loadNode(root, realRoot, externalCache, internalCache, function (er, node) {
// if there's an error, it's fine, as long as we got a node
if (!node) return cb(er)
loadTree(node, {}, externalCache, internalCache, filterWith, function (lter, tree) {
fs.writeFile(externalCacheName, JSON.stringify(externalCache), function (err) {
cb(er && er.code !== 'ENOENT' ? er : lter, tree)
})
})
})
})
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment