Last active
March 22, 2017 17:56
-
-
Save isaacs/edeab8f3c25af100dbea87b29908471a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ for TEST in old.js ts.js np3.js np3-seek.js np3-ignore.js no-parse.js; do echo ""; echo $TEST; for i in {1..5}; do node $TEST test/fixtures.tar >/dev/null; done; done | |
old.js | |
24.48 | |
25.71 | |
25.696 | |
26.064 | |
24.163 | |
ts.js | |
43.783 | |
32.742 | |
42.84 | |
37.62 | |
24.885 | |
np3.js | |
22.333 | |
21.877 | |
22.392 | |
22.277 | |
26.681 | |
np3-seek.js | |
13.655 | |
14.247 | |
17.884 | |
15.737 | |
13.07 | |
np3-ignore.js | |
9.554 | |
10.376 | |
9.671 | |
10.198 | |
9.807 | |
no-parse.js | |
5.873 | |
5.649 | |
6.187 | |
5.776 | |
5.678 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var ReadStream = require('fs-readstream-seek') | |
const input = new ReadStream(process.argv[2]) | |
const parse = chunk => { | |
var path = chunk.slice(0, 100).toString().replace(/\u0000.*/, '') | |
if (!path) | |
return | |
var size = parseNumeric(chunk.slice(124, 124 + 12)) | |
var blockSize = Math.ceil(size / 512) * 512 | |
console.log(path) | |
input.seek(input.readPos + blockSize) | |
} | |
const parseNumeric = f => { | |
if (f[0] & 0x80) return parse256(f) | |
var str = f.toString().replace(/\u0000.*$/, '').trim() | |
var res = parseInt(str, 8) | |
return isNaN(res) ? null : res | |
} | |
const parse256 = buf => { | |
// first byte MUST be either 80 or FF | |
// 80 for positive, FF for 2's comp | |
var positive | |
if (buf[0] === 0x80) positive = true | |
else if (buf[0] === 0xFF) positive = false | |
else return null | |
// build up a base-256 tuple from the least sig to the highest | |
var zero = false | |
var tuple = [] | |
for (var i = buf.length - 1; i > 0; i --) { | |
var byte = buf[i] | |
if (positive) tuple.push(byte) | |
else if (zero && byte === 0) tuple.push(0) | |
else if (zero) { | |
zero = false | |
tuple.push(0x100 - byte) | |
} else tuple.push(0xFF - byte) | |
} | |
for (var sum = 0, i = 0, l = tuple.length; i < l; i ++) { | |
sum += tuple[i] * Math.pow(256, i) | |
} | |
return positive ? sum : -1 * sum | |
} | |
const read = _ => { | |
let chunk | |
while (chunk = input.read(512)) { | |
parse(chunk) | |
} | |
} | |
input.on('readable', read) | |
const start = process.hrtime() | |
process.on('exit', c => { | |
const end = process.hrtime(start) | |
console.error(end[0]*1e3 + end[1]/1e6) | |
}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const Parser = require('./lib/new3.js') | |
const p = new Parser({ | |
filter (entry) { | |
console.log(entry.path) | |
return false | |
} | |
}) | |
const fs = require('fs') | |
const data = fs.readFileSync(process.argv[2]) | |
console.log('< %j', process.argv[2]) | |
process.on('exit', c => { | |
const end = process.hrtime(start) | |
console.error(end[0]*1e3 + end[1]/1e6) | |
}) | |
const start = process.hrtime() | |
p.write(data) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const Parser = require('./lib/new3.js') | |
const p = new Parser() | |
const ReadStream = require('fs-readstream-seek') | |
p.on('entry', entry => { | |
const entryBlockSize = entry.blockRemain | |
const entryEnd = input.readPos + entryBlockSize | |
const entryStart = input.readPos - 512 | |
entry.remain = 0 | |
entry.blockRemain = 0 | |
console.log(entry.path) | |
if (entryBlockSize !== 0) | |
input.seek(entryEnd) | |
}) | |
console.log('< %j', process.argv[2]) | |
const input = new ReadStream(process.argv[2]) | |
input.once('end', _ => p.end()) | |
const read = _ => { | |
reading = true | |
let chunk | |
let pos = input.readPos | |
// this is just reading headers | |
while (chunk = input.read(512)) { | |
if (!p.write(chunk)) | |
return p.once('drain', read) | |
pos = input.readPos | |
} | |
} | |
input.on('readable', read) | |
const start = process.hrtime() | |
process.on('exit', c => { | |
const end = process.hrtime(start) | |
console.error(end[0]*1e3 + end[1]/1e6) | |
}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var Parser = require('./lib/new3.js') | |
var p = new Parser() | |
p.on('entry', entry => { | |
entry.resume() | |
console.log(entry.path) | |
}) | |
const fs = require('fs') | |
const input = fs.readFileSync(process.argv[2]) | |
const start = process.hrtime() | |
p.write(input) | |
const end = process.hrtime(start) | |
console.error(end[0]*1e3 + end[1]/1e6) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const Parse = require('./').Parse | |
const p = Parse() | |
p.on('entry', entry => { | |
console.log(entry.path) | |
}) | |
p.resume() | |
const fs = require('fs') | |
const input = fs.readFileSync(process.argv[2]) | |
process.on('exit', c => { | |
const end = process.hrtime(start) | |
console.error(end[0]*1e3 + end[1]/1e6) | |
}) | |
const start = process.hrtime() | |
p.write(input) | |
p.end() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const ts = require('tar-stream') | |
const p = ts.extract() | |
p.on('entry', (entry, stream, callback) => { | |
console.log(entry.name) | |
stream.resume() | |
callback() | |
}) | |
p.on('data', _ => _) | |
const fs = require('fs') | |
const input = fs.readFileSync(process.argv[2]) | |
const start = process.hrtime() | |
p.write(input) | |
p.end() | |
// const fs = require('fs') | |
// const input = fs.createReadStream(process.argv[2]) | |
// input.pipe(p) | |
process.on('exit', c => { | |
const end = process.hrtime(start) | |
console.error(end[0]*1e3 + end[1]/1e6) | |
}) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
5 different approaches to doing
tar tf <file>
in Node. (All times in ms usingprocess.hrtime
.)no-parse.js
is the fastest. It does literally nothing, but could be faster if it was operating on a single big buffer instead of using fs-readstream-seek. It's also not correct, since it doesn't handle long pathnames, extended headers, etc. Just a baseline, really. "Doing almost nothing" is always the fastest program.old.js
is the slowest. It uses the current node-tar parser. The parser in node-tar isn't that bad, but it is doing a lot more junk than it needs to, rather inefficiently, with no way to skip over entry bodies, or consume in larger than 512 byte chunks.ts.js
is using tar-stream, which is a very nice tar parser in node. It's usually faster than old.js, but I've noticed that as I ran this benchmark a lot, it occasionally has one very slow run. Not sure what's going on there. old.js is consistently slow, but ts.js is inconsistently fast. /shrugnp3.js
is the 3rd new parser iteration. It's the simplest one, just read the data in one big block, write to the parser, done.np3-seek.js
uses fs-readstream-seek to jump past entry bodies. This is a good approach if the file is larger than you want to load into memory.np3-ignore.js
uses a filter function to ignore all the files, just dumping their paths as they come in. In this test, it also writes the data in one big chunk, and the parser skips over the body entirely if it's ignored. This is the fastest correct implementation so far.