Skip to content

Instantly share code, notes, and snippets.

@bjouhier
Created May 10, 2013 12:46
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bjouhier/5554200 to your computer and use it in GitHub Desktop.
Save bjouhier/5554200 to your computer and use it in GitHub Desktop.
First bench to compare raw callbacks and the 3 streamline modes: callbacks, fibers and generators
var fs = require('fs');
function fib(n) {
return n <= 1 ? 1 : fib(n - 1) + fib(n - 2);
}
function rawBench(n, loop, modulo, asyncFn, callback) {
var count = 0;
var l = loop;
function fibo(nn, cb) {
if (modulo && count++ % modulo === 0) asyncFn(function(err) {
if (err) return cb(err);
fibo1(nn, cb);
});
else fibo1(nn, cb);
}
function fibo1(nn, cb) {
if (nn <= 1) return cb(null, 1);
fibo(nn - 1, function(err, v1) {
if (err) return cb(err);
fibo(nn - 2, function(err, v2) {
if (err) return cb(err);
cb(null, v1 + v2);
})
})
}
var expected = fib(n);
var t0 = Date.now();
var loopCb = function(err, got) {
if (err) return callback(err);
if (got !== expected) throw new Error("bench failed: got " + got + ', expected ' + expected);
if (--l > 0) {
fibo(n, loopCb);
} else {
console.log('raw callbacks:\t' + (Date.now() - t0) + "ms");
callback(null, count);
}
};
fibo(n, loopCb);
}
function bench(prefix, n, loop, modulo, asyncFn, _) {
var count = 0;
function fibo(_, nn) {
if (modulo && count++ % modulo === 0) asyncFn(_);
if (nn <= 1) return 1;
return fibo(_, nn - 1) + fibo(_, nn - 2);
}
var expected = fib(n);
var t0 = Date.now();
for (var i = 0; i < loop; i++) {
var got = fibo(_, n);
if (got !== expected) throw new Error("bench failed: got " + got + ', expected ' + expected);
}
var tabs = (prefix.length < 7) ? '\t\t' : '\t';
console.log(prefix + ':' + tabs + (Date.now() - t0) + "ms");
return count;
}
var syncBench = bench;
function makeBench(mode) {
var fn;
var str = "(function() {" + require("streamline/lib/" + mode + "/transform").transform("fn=" + bench.toString()) + "return bench; })()";
eval(str);
//console.log(mode + ": " + fn);
return function(n, loop, modulo, asyncFn, cb) {
fn(mode, n, loop, modulo, asyncFn, cb);
}
}
var callbacksBench = makeBench('callbacks')
var fibersBench = makeBench('fibers');
var generatorsBench = makeBench('generators');
function fname(fn) {
if (fn === setImmediate) return 'setImmediate';
return fn.name;
}
function pass(n, loop, modulo, asyncFn, cb) {
console.log('STARTING PASS: n=' + n + ', loop=' + loop + ', modulo=' + modulo + ', fn=' + fname(asyncFn));;
rawBench(n, loop, modulo, asyncFn, function(err, count) {
callbacksBench(n, loop, modulo, asyncFn, function(err, c) {
if (err) throw err;
if (c !== count) throw new Error("count mismatch: expected " + count + ', got ' + c);
fibersBench(n, loop, modulo, asyncFn, function(err, c) {
if (err) throw err;
if (c !== count) throw new Error("count mismatch: expected " + count + ', got ' + c);
generatorsBench(n, loop, modulo, asyncFn, function(err) {
if (err) throw err;
if (c !== count) throw new Error("count mismatch: expected " + count + ', got ' + c);
cb();
})
})
})
})
}
function warmUp(cb) {
console.log("*** WARMING UP ***")
pass(25, 1, 3, setImmediate, function() {
pass(25, 1, 3, setImmediate, function() {
pass(25, 1, 3, setImmediate, function() {
cb();
});
});
});
}
function test1(cb) {
console.log("\n*** setImmediate n=25, loop=1 ***")
pass(25, 1, 3, setImmediate, function() {
pass(25, 1, 10, setImmediate, function() {
pass(25, 1, 100, setImmediate, function() {
pass(25, 1, 1000, setImmediate, function() {
cb();
});
});
});
});
}
function test2(cb) {
console.log("\n*** setImmediate n=1 loop=100000 ***")
pass(1, 100000, 3, setImmediate, function() {
pass(1, 100000, 10, setImmediate, function() {
pass(1, 100000, 100, setImmediate, function() {
pass(1, 100000, 1000, setImmediate, function() {
cb();
});
});
});
});
}
function readMe(cb) {
fs.readFile(__filename, "utf8", cb);
}
function test3(cb) {
console.log("\n*** readMe n=1 loop=10000 ***")
pass(1, 10000, 3, readMe, function() {
pass(1, 10000, 10, readMe, function() {
pass(1, 10000, 100, readMe, function() {
cb();
});
});
});
}
warmUp(function() {
test1(function() {
test2(function() {
test3(function() {
console.log("BENCH COMPLETE!")
});
});
});
});
@bjouhier
Copy link
Author

The bench takes the following parameters

  • n: the value we pass to the fibo function
  • loop: the number of times we loop on the fibo function
  • modulo: the frequency at which we call the async function when we recurse inside the fibo computation
  • asyncFn: the asynchronous function that the bench calls when recursing inside fibo.

With n=25 and loop=1 we test the speed of a highly recursive function
With n=1 and loop>1000 we test the speed of a loop.

The asyncFn allows me to play with various async functions. I used setImmediate, which should be the fastest async function for this test (process.nextTick does not work because it is not truly async and it triggers a stack overflow -- why doesn't it trampoline?). I also used a readMe function that reads the current source file and gives a more realistic sense of the overhead on real calls.

The modulo allow me to play with the ratio between sync and async processing in the bench. If I set it to 1 the async function is called at every recursion into fibo. If I set it to 100 it is only called once every 100 calls to the low level fibo function.

Notes:

  • If I set modulo to a very low value < 3, I get crashes in generators mode (illegal access). This is why I always set a value >= 3 in the bench.
  • I had to make a few changes to fibers to cope with V8 3.19 API changes. You have to build it from my clone (https://github.com/bjouhier/node-fibers)

@bjouhier
Copy link
Author

Here is the typical output I get on my macbook:

*** WARMING UP ***
STARTING PASS: n=25, loop=1, modulo=3, fn=setImmediate
raw callbacks:  290ms
callbacks:  635ms
fibers:     472ms
generators: 631ms
STARTING PASS: n=25, loop=1, modulo=3, fn=setImmediate
raw callbacks:  265ms
callbacks:  625ms
fibers:     498ms
generators: 654ms
STARTING PASS: n=25, loop=1, modulo=3, fn=setImmediate
raw callbacks:  272ms
callbacks:  627ms
fibers:     491ms
generators: 619ms

*** setImmediate n=25, loop=1 ***
STARTING PASS: n=25, loop=1, modulo=3, fn=setImmediate
raw callbacks:  263ms
callbacks:  631ms
fibers:     493ms
generators: 679ms
STARTING PASS: n=25, loop=1, modulo=10, fn=setImmediate
raw callbacks:  109ms
callbacks:  359ms
fibers:     149ms
generators: 368ms
STARTING PASS: n=25, loop=1, modulo=100, fn=setImmediate
raw callbacks:  36ms
callbacks:  217ms
fibers:     18ms
generators: 276ms
STARTING PASS: n=25, loop=1, modulo=1000, fn=setImmediate
raw callbacks:  22ms
callbacks:  191ms
fibers:     5ms
generators: 241ms

*** setImmediate n=1 loop=100000 ***
STARTING PASS: n=1, loop=100000, modulo=3, fn=setImmediate
raw callbacks:  85ms
callbacks:  260ms
fibers:     207ms
generators: 260ms
STARTING PASS: n=1, loop=100000, modulo=10, fn=setImmediate
raw callbacks:  31ms
callbacks:  155ms
fibers:     59ms
generators: 156ms
STARTING PASS: n=1, loop=100000, modulo=100, fn=setImmediate
raw callbacks:  9ms
callbacks:  108ms
fibers:     7ms
generators: 106ms
STARTING PASS: n=1, loop=100000, modulo=1000, fn=setImmediate
raw callbacks:  6ms
callbacks:  103ms
fibers:     2ms
generators: 98ms

*** readMe n=1 loop=10000 ***
STARTING PASS: n=1, loop=10000, modulo=3, fn=readMe
raw callbacks:  243ms
callbacks:  248ms
fibers:     245ms
generators: 258ms
STARTING PASS: n=1, loop=10000, modulo=10, fn=readMe
raw callbacks:  69ms
callbacks:  85ms
fibers:     73ms
generators: 83ms
STARTING PASS: n=1, loop=10000, modulo=100, fn=readMe
raw callbacks:  8ms
callbacks:  20ms
fibers:     8ms
generators: 20ms
BENCH COMPLETE!

@bjouhier
Copy link
Author

Quick observations:

  • In tight benches with lots of calls to setImmediate, raw callbacks outperform the others by a factor of 2 to 3.
  • Fibers always outperform streamline callbacks and generators.
  • Fibers nails down everyone else when the sync logic dominates the async calls (when modulo is high). For example, it is 4 times faster than raw callbacks in the n=25, loop=1, modulo=1000, fn=setImmediate case.
  • Streamline callbacks and generators always come up very close, with a slight advantage to callbacks.
  • The results get much closer when real I/O calls start to dominate. For example, all results are in the [243 258] range with the simple loop of readMe calls.
  • The raw callbacks bench is less robust. It stack overflows when modulo gets close to 5000. The others don't.

My comments:

  • The difference between streamline callbacks and raw callbacks is likely due to the fact that streamline provides some comfort features: long stack traces, automatic trampolining (avoids the stack overflow with high modulo), TLS-like context, robust exception handling. This isn't free.
  • I expected very good performance from fibers when the sync/async ratio increases. This is because the sync-style logic that sits on top of async calls undergoes very little transformation in fibers mode. So there is almost no overhead in the higher level sync-style code, not even the overhead of a callback. On the other hand fibers has more overhead than callbacks when the number of async calls is very high because it has to go through the fibers layer every time.
  • Generators are a bit disappointing but this is not completely suprising. First, they just landed in V8 and they probably aren't optimized. But this is also likely due to the "single frame continuation" constraint: when you have to traverse several layers of calls before reaching the async calls, every layer has to create a generator and you need a run function that interacts with all these generators to make them move forwards (see the run function in lib/generators/runtime.js module). This is a bit like callbacks where the callbacks impact all the layers that sit on top of async APIs, but not at all like fibers where the higher layers don't need to be transformed.
  • The fibers and generators benches are based on code which has been transformed by streamline, not on hand-written code. There may be room for improvement with manual code, although I don't expect the gap to be in any way comparable to the one between raw callbacks and streamline callbacks. The fibers transformation/runtime is actually quite smart (Marcel wrote it). I wrote the generators transform and I think it is pretty efficient but it would interesting to bench it against other solutions, for example against libraries that combine promises and generators (I think that those will be slower because they need to create more closures but this is just a guess at this point).

@creationix
Copy link

Great writeup!

@spollack
Copy link

good stuff, thanks Bruno!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment