Skip to content

Instantly share code, notes, and snippets.

@billywhizz

billywhizz/.gitignore

Last active Feb 19, 2021
Embed
What would you like to do?
stdio perf test for just-js
wc
wcb
scratch

testing the overhead of just(js) on a single syscall

Clone the gist

git clone git@gist.github.com:c8b13d66914f84745fbd9977ac72f7d4.git

preparation

install just(js) on linux/x86_64

  • Building requires g++, make and curl
# Get the latest just source
curl -L -o just.tar.gz https://github.com/just-js/just/archive/0.0.21.tar.gz
tar -zxvf just.tar.gz
cd just-0.0.21
make runtime-static
## install to /usr/local/bin
sudo make install
## export JUST_HOME for importing libs and JUST_TARGET for building/bundling
export JUST_HOME=$(pwd)
export JUST_TARGET=$(pwd)
  • Alternatively you can download a precompiled binary that should work on modern x86_64 linux here:
curl -L -o just https://github.com/just-js/just/releases/download/0.0.21/just

build and run with docker

docker build -t stdio-perf-test .
# run docker, mounting current directory in /app in container
docker run -it --rm -v $(pwd):/app stdio-perf-test

running

details in DEMO.md

[{"name":"./wc","data":[{"pid":28676,"name":"count","utime":6,"stime":324,"rss":819200,"time":3330,"status":0,"blocksize":65536,"count":250000,"program":"./wc","args":["65536"],"usage":99.09,"sys":97.29,"user":1.8,"bytes":16384000000,"rate":4692,"stdout":"size 16384000000 reads 250000 blocksize 65536","results":{"size":16384000000,"reads":250000,"blocksize":65536}},{"pid":28679,"name":"count","utime":64,"stime":267,"rss":745472,"time":3626,"status":0,"blocksize":4096,"count":2000000,"program":"./wc","args":["4096"],"usage":89.33,"sys":72.06,"user":17.27,"bytes":8192000000,"rate":2154,"stdout":"size 8192000000 reads 2000000 blocksize 4096","results":{"size":8192000000,"reads":2000000,"blocksize":4096}},{"pid":28682,"name":"count","utime":79,"stime":196,"rss":815104,"time":3470,"status":0,"blocksize":256,"count":2000000,"program":"./wc","args":["256"],"usage":79.25,"sys":56.48,"user":22.76,"bytes":512000000,"rate":140,"stdout":"size 512000000 reads 2000000 blocksize 256","results":{"size":512000000,"reads":2000000,"blocksize":256}}],"type":"column"},{"name":"just wc.js","data":[{"pid":28684,"name":"count","utime":13,"stime":324,"rss":18644992,"time":3413,"status":0,"blocksize":65536,"count":250000,"program":"just","args":["wc.js","65536"],"usage":98.74,"sys":94.93,"user":3.8,"bytes":16384000000,"rate":4578,"stdout":"size 16384000000 reads 250000 blocksize 65536","results":{"size":16384000000,"reads":250000,"blocksize":65536}},{"pid":28701,"name":"count","utime":96,"stime":213,"rss":18509824,"time":3110,"status":0,"blocksize":4096,"count":2000000,"program":"just","args":["wc.js","4096"],"usage":99.35,"sys":68.48,"user":30.86,"bytes":8192000000,"rate":2512,"stdout":"size 8192000000 reads 2000000 blocksize 4096","results":{"size":8192000000,"reads":2000000,"blocksize":4096}},{"pid":28710,"name":"count","utime":112,"stime":159,"rss":17555456,"time":3086,"status":0,"blocksize":256,"count":2000000,"program":"just","args":["wc.js","256"],"usage":82.79,"sys":48.57,"user":34.21,"bytes":512000000,"rate":158,"stdout":"size 512000000 reads 2000000 blocksize 256","results":{"size":512000000,"reads":2000000,"blocksize":256}}],"type":"column"},{"name":"just wc-async.js","data":[{"pid":28720,"name":"count","utime":22,"stime":315,"rss":18894848,"time":3382,"status":0,"blocksize":65536,"count":250000,"program":"just","args":["wc-async.js","65536"],"usage":98.56,"sys":92.13,"user":6.43,"bytes":16384000000,"rate":4620,"stdout":"size 16384000000 reads 250000 blocksize 65536 rss 18894848","results":{"size":16384000000,"reads":250000,"blocksize":65536,"rss":18894848}},{"pid":28737,"name":"count","utime":167,"stime":221,"rss":18972672,"time":3696,"status":0,"blocksize":4096,"count":2000000,"program":"just","args":["wc-async.js","4096"],"usage":99.66,"sys":56.76,"user":42.89,"bytes":8192000000,"rate":2113,"stdout":"size 8192000000 reads 2000000 blocksize 4096 rss 19120128","results":{"size":8192000000,"reads":2000000,"blocksize":4096,"rss":19120128}},{"pid":28748,"name":"count","utime":167,"stime":149,"rss":18755584,"time":3179,"status":0,"blocksize":256,"count":2000000,"program":"just","args":["wc-async.js","256"],"usage":99.4,"sys":46.87,"user":52.53,"bytes":512000000,"rate":153,"stdout":"size 512000000 reads 2000000 blocksize 256 rss 17952768","results":{"size":512000000,"reads":2000000,"blocksize":256,"rss":17952768}}],"type":"column"},{"name":"./wcb","data":[{"pid":28758,"name":"count","utime":13,"stime":322,"rss":12816384,"time":3384,"status":0,"blocksize":65536,"count":250000,"program":"./wcb","args":["65536"],"usage":98.99,"sys":95.15,"user":3.84,"bytes":16384000000,"rate":4617,"stdout":"size 16384000000 reads 250000 blocksize 65536","results":{"size":16384000000,"reads":250000,"blocksize":65536}},{"pid":28775,"name":"count","utime":86,"stime":220,"rss":12779520,"time":3091,"status":0,"blocksize":4096,"count":2000000,"program":"./wcb","args":["4096"],"usage":98.77,"sys":71.01,"user":27.75,"bytes":8192000000,"rate":2527,"stdout":"size 8192000000 reads 2000000 blocksize 4096","results":{"size":8192000000,"reads":2000000,"blocksize":4096}},{"pid":28784,"name":"count","utime":99,"stime":168,"rss":12460032,"time":3177,"status":0,"blocksize":256,"count":2000000,"program":"./wcb","args":["256"],"usage":84.04,"sys":52.88,"user":31.16,"bytes":512000000,"rate":153,"stdout":"size 512000000 reads 2000000 blocksize 256","results":{"size":512000000,"reads":2000000,"blocksize":256}}],"type":"column"},{"name":"node wc-node.js","data":[{"pid":28793,"name":"count","utime":106,"stime":338,"rss":57270272,"time":4233,"status":0,"blocksize":65536,"count":250000,"program":"node","args":["wc-node.js","65536"],"usage":104.34,"sys":79.43,"user":24.91,"bytes":16384000000,"rate":3691,"stdout":"size 16384000000 reads 250000 blocksize 65536","results":{"size":16384000000,"reads":250000,"blocksize":65536}},{"time":0}],"type":"column"},{"name":"./wc","data":[{"pid":28810,"name":"count","utime":7,"stime":332,"rss":745472,"time":3401,"status":0,"blocksize":65536,"count":250000,"program":"./wc","args":["65536"],"usage":99.67,"sys":97.61,"user":2.05,"bytes":16384000000,"rate":4594,"stdout":"size 16384000000 reads 250000 blocksize 65536","results":{"size":16384000000,"reads":250000,"blocksize":65536}},{"pid":28823,"name":"count","utime":65,"stime":264,"rss":749568,"time":3713,"status":0,"blocksize":4096,"count":2000000,"program":"./wc","args":["4096"],"usage":88.6,"sys":71.1,"user":17.5,"bytes":8192000000,"rate":2104,"stdout":"size 8192000000 reads 2000000 blocksize 4096","results":{"size":8192000000,"reads":2000000,"blocksize":4096}},{"pid":28825,"name":"count","utime":67,"stime":211,"rss":827392,"time":3596,"status":0,"blocksize":256,"count":2000000,"program":"./wc","args":["256"],"usage":77.3,"sys":58.67,"user":18.63,"bytes":512000000,"rate":135,"stdout":"size 512000000 reads 2000000 blocksize 256","results":{"size":512000000,"reads":2000000,"blocksize":256}}],"type":"column"},{"name":"just wc.js","data":[{"pid":28827,"name":"count","utime":13,"stime":325,"rss":19058688,"time":3418,"status":0,"blocksize":65536,"count":250000,"program":"just","args":["wc.js","65536"],"usage":98.88,"sys":95.08,"user":3.8,"bytes":16384000000,"rate":4571,"stdout":"size 16384000000 reads 250000 blocksize 65536","results":{"size":16384000000,"reads":250000,"blocksize":65536}},{"pid":28844,"name":"count","utime":86,"stime":223,"rss":18411520,"time":3142,"status":0,"blocksize":4096,"count":2000000,"program":"just","args":["wc.js","4096"],"usage":98.34,"sys":70.97,"user":27.37,"bytes":8192000000,"rate":2486,"stdout":"size 8192000000 reads 2000000 blocksize 4096","results":{"size":8192000000,"reads":2000000,"blocksize":4096}},{"pid":28857,"name":"count","utime":108,"stime":157,"rss":17571840,"time":3127,"status":0,"blocksize":256,"count":2000000,"program":"just","args":["wc.js","256"],"usage":84.74,"sys":50.2,"user":34.53,"bytes":512000000,"rate":156,"stdout":"size 512000000 reads 2000000 blocksize 256","results":{"size":512000000,"reads":2000000,"blocksize":256}}],"type":"column"},{"name":"just wc-async.js","data":[{"pid":28867,"name":"count","utime":21,"stime":318,"rss":19091456,"time":3411,"status":0,"blocksize":65536,"count":250000,"program":"just","args":["wc-async.js","65536"],"usage":99.38,"sys":93.22,"user":6.15,"bytes":16384000000,"rate":4580,"stdout":"size 16384000000 reads 250000 blocksize 65536 rss 19091456","results":{"size":16384000000,"reads":250000,"blocksize":65536,"rss":19091456}},{"pid":28894,"name":"count","utime":179,"stime":194,"rss":19017728,"time":3736,"status":0,"blocksize":4096,"count":2000000,"program":"just","args":["wc-async.js","4096"],"usage":99.83,"sys":51.92,"user":47.91,"bytes":8192000000,"rate":2091,"stdout":"size 8192000000 reads 2000000 blocksize 4096 rss 19165184","results":{"size":8192000000,"reads":2000000,"blocksize":4096,"rss":19165184}},{"pid":28907,"name":"count","utime":165,"stime":155,"rss":18763776,"time":3209,"status":0,"blocksize":256,"count":2000000,"program":"just","args":["wc-async.js","256"],"usage":99.71,"sys":48.3,"user":51.41,"bytes":512000000,"rate":152,"stdout":"size 512000000 reads 2000000 blocksize 256 rss 17862656","results":{"size":512000000,"reads":2000000,"blocksize":256,"rss":17862656}}],"type":"column"},{"name":"./wcb","data":[{"pid":28916,"name":"count","utime":11,"stime":326,"rss":12816384,"time":3409,"status":0,"blocksize":65536,"count":250000,"program":"./wcb","args":["65536"],"usage":98.85,"sys":95.62,"user":3.22,"bytes":16384000000,"rate":4583,"stdout":"size 16384000000 reads 250000 blocksize 65536","results":{"size":16384000000,"reads":250000,"blocksize":65536}},{"pid":28937,"name":"count","utime":82,"stime":225,"rss":12779520,"time":3091,"status":0,"blocksize":4096,"count":2000000,"program":"./wcb","args":["4096"],"usage":99.32,"sys":72.79,"user":26.52,"bytes":8192000000,"rate":2527,"stdout":"size 8192000000 reads 2000000 blocksize 4096","results":{"size":8192000000,"reads":2000000,"blocksize":4096}},{"pid":28947,"name":"count","utime":95,"stime":173,"rss":12460032,"time":3250,"status":0,"blocksize":256,"count":2000000,"program":"./wcb","args":["256"],"usage":82.46,"sys":53.23,"user":29.23,"bytes":512000000,"rate":150,"stdout":"size 512000000 reads 2000000 blocksize 256","results":{"size":512000000,"reads":2000000,"blocksize":256}}],"type":"column"},{"name":"node wc-node.js","data":[{"pid":28957,"name":"count","utime":102,"stime":340,"rss":57532416,"time":4233,"status":0,"blocksize":65536,"count":250000,"program":"node","args":["wc-node.js","65536"],"usage":104.41,"sys":80.32,"user":24.09,"bytes":16384000000,"rate":3691,"stdout":"size 16384000000 reads 250000 blocksize 65536","results":{"size":16384000000,"reads":250000,"blocksize":65536}},{"time":0}],"type":"column"},{"name":"./wc","data":[{"pid":28979,"name":"count","utime":9,"stime":330,"rss":778240,"time":3414,"status":0,"blocksize":65536,"count":250000,"program":"./wc","args":["65536"],"usage":99.29,"sys":96.66,"user":2.63,"bytes":16384000000,"rate":4576,"stdout":"size 16384000000 reads 250000 blocksize 65536","results":{"size":16384000000,"reads":250000,"blocksize":65536}},{"pid":28989,"name":"count","utime":68,"stime":258,"rss":737280,"time":3626,"status":0,"blocksize":4096,"count":2000000,"program":"./wc","args":["4096"],"usage":89.9,"sys":71.15,"user":18.75,"bytes":8192000000,"rate":2154,"stdout":"size 8192000000 reads 2000000 blocksize 4096","results":{"size":8192000000,"reads":2000000,"blocksize":4096}},{"pid":28991,"name":"count","utime":77,"stime":203,"rss":741376,"time":3547,"status":0,"blocksize":256,"count":2000000,"program":"./wc","args":["256"],"usage":78.93,"sys":57.23,"user":21.7,"bytes":512000000,"rate":137,"stdout":"size 512000000 reads 2000000 blocksize 256","results":{"size":512000000,"reads":2000000,"blocksize":256}}],"type":"column"},{"name":"just wc.js","data":[{"pid":28993,"name":"count","utime":11,"stime":329,"rss":18661376,"time":3426,"status":0,"blocksize":65536,"count":250000,"program":"just","args":["wc.js","65536"],"usage":99.24,"sys":96.03,"user":3.21,"bytes":16384000000,"rate":4560,"stdout":"size 16384000000 reads 250000 blocksize 65536","results":{"size":16384000000,"reads":250000,"blocksize":65536}},{"pid":29010,"name":"count","utime":89,"stime":220,"rss":18501632,"time":3149,"status":0,"blocksize":4096,"count":2000000,"program":"just","args":["wc.js","4096"],"usage":98.12,"sys":69.86,"user":28.26,"bytes":8192000000,"rate":2480,"stdout":"size 8192000000 reads 2000000 blocksize 4096","results":{"size":8192000000,"reads":2000000,"blocksize":4096}},{"pid":29020,"name":"count","utime":105,"stime":158,"rss":17915904,"time":3086,"status":0,"blocksize":256,"count":2000000,"program":"just","args":["wc.js","256"],"usage":85.22,"sys":51.19,"user":34.02,"bytes":512000000,"rate":158,"stdout":"size 512000000 reads 2000000 blocksize 256","results":{"size":512000000,"reads":2000000,"blocksize":256}}],"type":"column"},{"name":"just wc-async.js","data":[{"pid":29030,"name":"count","utime":28,"stime":308,"rss":19034112,"time":3382,"status":0,"blocksize":65536,"count":250000,"program":"just","args":["wc-async.js","65536"],"usage":99.34,"sys":91.07,"user":8.27,"bytes":16384000000,"rate":4620,"stdout":"size 16384000000 reads 250000 blocksize 65536 rss 19034112","results":{"size":16384000000,"reads":250000,"blocksize":65536,"rss":19034112}},{"pid":29048,"name":"count","utime":166,"stime":202,"rss":18862080,"time":3696,"status":0,"blocksize":4096,"count":2000000,"program":"just","args":["wc-async.js","4096"],"usage":99.56,"sys":54.65,"user":44.91,"bytes":8192000000,"rate":2113,"stdout":"size 8192000000 reads 2000000 blocksize 4096 rss 19116032","results":{"size":8192000000,"reads":2000000,"blocksize":4096,"rss":19116032}},{"pid":29057,"name":"count","utime":169,"stime":152,"rss":18743296,"time":3230,"status":0,"blocksize":256,"count":2000000,"program":"just","args":["wc-async.js","256"],"usage":99.38,"sys":47.05,"user":52.32,"bytes":512000000,"rate":151,"stdout":"size 512000000 reads 2000000 blocksize 256 rss 17940480","results":{"size":512000000,"reads":2000000,"blocksize":256,"rss":17940480}}],"type":"column"},{"name":"./wcb","data":[{"pid":29067,"name":"count","utime":8,"stime":334,"rss":12812288,"time":3457,"status":0,"blocksize":65536,"count":250000,"program":"./wcb","args":["65536"],"usage":98.92,"sys":96.61,"user":2.31,"bytes":16384000000,"rate":4519,"stdout":"size 16384000000 reads 250000 blocksize 65536","results":{"size":16384000000,"reads":250000,"blocksize":65536}},{"pid":29085,"name":"count","utime":90,"stime":217,"rss":12775424,"time":3103,"status":0,"blocksize":4096,"count":2000000,"program":"./wcb","args":["4096"],"usage":98.93,"sys":69.93,"user":29,"bytes":8192000000,"rate":2517,"stdout":"size 8192000000 reads 2000000 blocksize 4096","results":{"size":8192000000,"reads":2000000,"blocksize":4096}},{"pid":29095,"name":"count","utime":98,"stime":172,"rss":12460032,"time":3246,"status":0,"blocksize":256,"count":2000000,"program":"./wcb","args":["256"],"usage":83.17,"sys":52.98,"user":30.19,"bytes":512000000,"rate":150,"stdout":"size 512000000 reads 2000000 blocksize 256","results":{"size":512000000,"reads":2000000,"blocksize":256}}],"type":"column"},{"name":"node wc-node.js","data":[{"pid":29113,"name":"count","utime":105,"stime":347,"rss":57204736,"time":4329,"status":0,"blocksize":65536,"count":250000,"program":"node","args":["wc-node.js","65536"],"usage":104.41,"sys":80.15,"user":24.25,"bytes":16384000000,"rate":3609,"stdout":"size 16384000000 reads 250000 blocksize 65536","results":{"size":16384000000,"reads":250000,"blocksize":65536}},{"time":0}],"type":"column"}]
const { writeFile } = require('fs')
const { launch, watch } = require('process')
function getSeries (results, field) {
return results.map(r => {
const { name, type } = r
const data = r.data.map(d => d[field])
return { name, type, data }
})
}
function getSeriesPerCore (results, field) {
return results.map(r => {
const { name, type } = r
const data = r.data.map(d => {
const val = d[field]
const usage = d.usage / 100
return Math.floor(val / usage)
})
return { name, type, data }
})
}
async function main () {
const results = require('results.json')
const categories = results[0].data.map(r => `bs = ${r.blocksize}`)
const duration = {
title: { text: 'Time Taken (ms), Lower is Better' },
xAxis: { categories },
yAxis: {
title: { text: 'time' },
tickInterval: 10
},
series: getSeries(results, 'time')
}
const throughput = {
title: { text: 'Throughput (MBps), Higher is Better' },
xAxis: { categories },
yAxis: {
title: { text: 'rate' },
tickInterval: 10
},
series: getSeries(results, 'rate')
}
const throughputPerCore = {
title: { text: 'Throughput Per Core (MBps), Higher is Better' },
xAxis: { categories },
yAxis: {
title: { text: 'rate' },
tickInterval: 10
},
series: getSeriesPerCore(results, 'rate')
}
const html = `
<script src="https://code.highcharts.com/highcharts.js"></script>
<script src="https://code.highcharts.com/modules/exporting.js"></script>
<style>
#container {
height: 50%;
margin: 1em auto;
}
</style>
<div id="throughput"></div>
<div id="throughputPerCore"></div>
<div id="duration"></div>
<script type="text/javascript">
Highcharts.chart('duration', ${JSON.stringify(duration, null, ' ')});
Highcharts.chart('throughput', ${JSON.stringify(throughput, null, ' ')});
Highcharts.chart('throughputPerCore', ${JSON.stringify(throughputPerCore, null, ' ')});
</script>`
writeFile('./results.html', ArrayBuffer.fromString(html))
await watch(launch('xdg-open', [`file://${just.sys.cwd()}/results.html`]))
}
main().catch(err => console.error(err))
const { writeFile } = require('fs')
const dd = require('dd.js')
const AD = '\u001b[0m'
const AG = '\u001b[32m'
const AM = '\u001b[35m'
async function run (size = 65536, count = 500000, command = './wc') {
just.print(`${command.padEnd(20, ' ')} ${size.toString().padStart(6, ' ')} ${count.toString().padStart(10, ' ')} `, false)
const args = `${command} ${size}`.split(' ')
const process = await dd.run(size, count, args[0], args.slice(1))
const { results } = process
if (results.size !== (size * count)) throw new Error('bad size in result')
if (results.blocksize !== size) throw new Error('bad blocksize in result')
if (results.reads !== count) throw new Error('bad number of reads in result')
just.print(`${AG}complete${AD} ${AM}time${AD} ${process.time.toString().padStart(6, ' ')} ${AM}rate${AD} ${process.rate.toString().padStart(6, ' ')}`)
return process
}
async function runall (program, factor = 1000) {
const results = { name: program, data: [], type: 'column' }
results.data.push(await run(65536, 250 * factor, program))
if (program.match(/node\s.+/)) {
results.data.push({ time: 0 })
} else {
results.data.push(await run(32768, 500 * factor, program))
results.data.push(await run(16384, 1000 * factor, program))
results.data.push(await run(8192, 1500 * factor, program))
results.data.push(await run(4096, 2000 * factor, program))
results.data.push(await run(2048, 2000 * factor, program))
results.data.push(await run(1024, 2000 * factor, program))
results.data.push(await run(512, 2000 * factor, program))
results.data.push(await run(256, 2000 * factor, program))
}
return results
}
async function main (factor = 1000, repeat = 10) {
const programs = [
'./wc',
'just wc.js',
'just wc-async.js',
'./wcb',
'node wc-node.js'
]
const results = {}
const all = []
just.print('warming up')
for (const program of programs) {
await runall(program, factor)
}
just.print('done warming up')
for (let i = 0; i < repeat; i++) {
for (const program of programs) {
const result = await runall(program, factor)
all.push(result)
writeFile('./all.json', ArrayBuffer.fromString(JSON.stringify(all)))
if (results[program]) {
const prev = results[program].data
const curr = result.data
for (let j = 0; j < prev.length; j++) {
if (curr[j].time < prev[j].time) prev[j].time = curr[j].time
if (curr[j].rate > prev[j].rate) prev[j].rate = curr[j].rate
}
} else {
results[program] = result
}
}
}
const output = Object.keys(results).map(k => results[k])
writeFile('./results.json', ArrayBuffer.fromString(JSON.stringify(output)))
just.clearTimeout(timer)
}
// TODO: bug where event loop falls out if we don't put something on it here - this is some microtask issue. we need to track how many microtasks are queed in the event loop cope and continue the loop until there are none
const timer = just.setInterval(() => {}, 1000)
main(...just.args.slice(2).map(v => parseInt(v, 10))).catch(err => just.error(err.stack))
const { sys } = just.library('sys')
const { net } = just.library('net')
const { epoll } = just.library('epoll')
const { fork, exec, waitpid } = sys
const { pipe, close, dup, read } = net
const { STDIN_FILENO, STDOUT_FILENO } = sys
const { SystemError, exit, setInterval, setTimeout, clearTimeout } = just
const { readStat } = require('monitor.js')
const { EPOLLERR, EPOLLHUP, EPOLLIN } = epoll
function run (blocksize = 65536, count = 1000, program = './wc', args = []) {
const pipe1 = []
pipe(pipe1, 0)
const pipe2 = []
pipe(pipe2, 0)
let children = []
let pid = fork()
if (pid < 0) throw new SystemError('fork')
if (pid === 0) {
dup(pipe1[1], STDOUT_FILENO)
close(pipe1[0])
close(pipe1[1])
exec('dd', ['if=/dev/zero', `bs=${blocksize}`, 'status=none', `count=${count}`])
exit(1)
}
children.push({ pid, start: Date.now(), name: 'dd' })
pid = fork()
if (pid < 0) throw new SystemError('fork')
if (pid === 0) {
dup(pipe1[0], STDIN_FILENO)
dup(pipe2[1], STDOUT_FILENO)
close(pipe1[0])
close(pipe1[1])
close(pipe2[1])
exec(program, args)
exit(1)
}
children.push({ pid, start: Date.now(), name: 'count' })
close(pipe1[0])
close(pipe1[1])
close(pipe2[1])
const stdout = []
const buf = new ArrayBuffer(4096)
just.factory.loop.add(pipe2[0], (fd, event) => {
if (event & EPOLLERR || event & EPOLLHUP) {
just.factory.loop.remove(fd)
close(fd)
return
}
if (event & EPOLLIN) {
stdout.push(buf.readString(read(fd, buf)))
}
})
const timer = setInterval(() => {
for (const process of children) {
const { utime, stime, rssPages } = readStat(process.pid)
process.utime = utime
process.stime = stime
if (!process.rss || rssPages > process.rss) process.rss = rssPages
}
}, 10)
function check (resolve, reject) {
for (const process of children) {
const [status, kpid] = waitpid(new Uint32Array(2), process.pid)
if (kpid === process.pid) {
if (status !== 0) {
reject(new Error(`Bad Status for ${process.name} ${status}`))
return
}
if (process.name === 'count') {
process.end = Date.now()
process.time = process.end - process.start
delete process.start
delete process.end
process.status = status
process.blocksize = parseInt(blocksize, 10)
process.count = parseInt(count, 10)
process.program = program
process.args = args
process.rss = process.rss * just.sys.pageSize
process.usage = Math.floor((process.stime + process.utime) / (process.time / 10) * 10000) / 100
process.sys = Math.floor((process.stime) / (process.time / 10) * 10000) / 100
process.user = Math.floor((process.utime) / (process.time / 10) * 10000) / 100
process.bytes = parseInt(blocksize, 10) * parseInt(count, 10)
process.rate = Math.floor((process.bytes / (process.time / 1000)) / (1024 * 1024))
process.stdout = stdout.join('').trim()
process.results = {}
const parts = process.stdout.split(' ')
for (let i = 0; i < parts.length; i += 2) {
process.results[parts[i]] = parseInt(parts[i + 1], 10)
}
resolve(process)
}
children = children.filter(child => child.pid !== kpid)
}
}
if (children.length) {
setTimeout(() => check(resolve, reject), 1)
return
}
clearTimeout(timer)
}
return new Promise(check)
}
module.exports = { run }

goals

The goal of this demo is to establish the overhead of making syscalls in just(js) runtime for Javascript. We will write a simple console application which counts the number of bytes piped to it over stdin. The application will call the read syscall repeatedly in 64k chunks, which seems to be the optimal setting.

We will implement the demo in C, in Javascript on just(js) using both sync and async interfaces. We will also write a comparison application on node.js to establish if there is any extra overhead on that platform.

If you want to skip all the detail below you can go straight to the results and assertions here.

prerequisites

See README.md for initial setup. In order to run this test you will need linux strace and time tools. These should be pre-installed if you run in the docker created in README.md.

initial investigation - using wc

First of all, let's see what performance we get from using the wc tool that comes with most if not all unix-y systems. We will pipe 500k 64k chunks from /dev/zero into 'wc -c' command to count the number of bytes.

time dd if=/dev/zero bs=65536 count=500000 | wc -c

Run it a few times to rule out any contention and we can see best performance on my setup of something like this

32768000000 bytes (33 GB, 31 GiB) copied, 8.20788 s, 4.0 GB/s

real    0m8.212s

So it takes 8.2 seconds in total and runs at a rate of 4 GB per second

establishing a baseline - wc.c

We should be able to improve slightly on system wc if we write an optimised c version as the wc application does more than just count bytes and also likely does not have any processor specific optimisations that may help.

Here is our little program. It shouldn't need any explantion.

wc.c

#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <errno.h>
#include <string.h>

int main(int argc, char *argv[]) {
  unsigned int blocksize = 65536;
  if (argc == 2) {
    blocksize = atoi(argv[1]);
  }
  char buf[blocksize];
  unsigned long size = 0;
  unsigned int reads = 0;
  int n = read(STDIN_FILENO, buf, blocksize);
  while (n > 0) {
    reads++;
    size += n;
    n = read(STDIN_FILENO, buf, blocksize);
  }
  if (n < 0) {
    fprintf(stderr, "read: %s (%i)\n", strerror(errno), errno);
    exit(1);
  }
  fprintf(stdout, "size %lu reads %u blocksize %u\n", size, reads, blocksize);
}

Let's compile it with all optimizations...

gcc -O3 -o wc wc.c

... and let's see what we get ...

time dd if=/dev/zero bs=65536 count=500000 | ./wc

500000+0 records in
500000+0 records out
32768000000 bytes (33 GB, 31 GiB) copied, 7.24869 s, 4.5 GB/s
size: 32768000000

real    0m7.253s
user    0m0.424s
sys     0m14.077s

Ok, so that's 7.2 seconds and 4.5 GB/s - a nice 11% improvement over the builtin wc tool.

While we are at it, let's see what syscalls our program makes

dd if=/dev/zero bs=65536 count=500000 | strace -c ./wc

% time     seconds  usecs/call     calls    errors syscall
------ ----------- ----------- --------- --------- ----------------
100.00    5.447454          11    500002           read
  0.00    0.000013           4         3           brk
  0.00    0.000009           3         3           fstat

So 500k read syscalls sounds about right and there are no other syscalls to speak of. We can compare this to the other programs later. This program is just running read() in a tight loop and doing little else, which is exactly what we want for our test.

just sync performance

We will run with blocking/synchronous syscalls in wc.js and see how close we get to raw C performance.

wc.js

const { print, error, exit, memoryUsage, sys, net } = just
const { read } = net
const { strerror, errno, STDIN_FILENO } = sys
const blocksize = parseInt(just.args[2] || 65536)
const buf = new ArrayBuffer(blocksize)
let size = 0
let reads = 0
let n = read(STDIN_FILENO, buf)
while (n > 0) {
  reads++
  size += n
  n = read(STDIN_FILENO, buf)
}
if (n < 0) {
  error(`read: ${strerror(errno())} (${errno()})`)
  exit(1)
}
print(`size ${size} reads ${reads} blocksize ${blocksize}`)

It shouldn't need much explanation. We import the internal modules we need from just runtime, create a 64k ArrayBuffer and then do blocking reads on stdin in a loop until we get 0 or an error code back.

Let's see what the results are like...

time dd if=/dev/zero bs=65536 count=500000 | just wc.js

500000+0 records in
500000+0 records out
32768000000 bytes (33 GB, 31 GiB) copied, 7.19774 s, 4.6 GB/s
size 32768000000 rss 18542592

real    0m7.205s
user    0m0.649s
sys     0m13.740s

So, looks like performance is exactly the same as the C version, if not a little faster. How can this be? Let's look at the syscalls to make sure we are making the same number of read() calls as the C version.

dd if=/dev/zero bs=65536 count=500000 | strace -c just wc.js

% time     seconds  usecs/call     calls    errors syscall
------ ----------- ----------- --------- --------- ----------------
 99.96    5.341601          11    500014           read
  0.02    0.000878           7       133         6 futex
  0.01    0.000495           5       110           mprotect
  0.00    0.000196          33         6           munmap
  0.00    0.000164           5        36           madvise

This looks good. The same 500k syscalls. We can also see some mprotect and futex calls. These are due to locking issues with ArrayBuffers in v8 - something I have looked into before but need to investigate further.

Let's just take a quick look at garbage collection stats and see what is happening. We are only allocating a single buffer outside the loop and on the C++ side we are only creating short lived v8 objects which should get marked as "young generation" and collected by the scavenger which is very quick.

dd if=/dev/zero bs=65536 count=500000 | just --trace-gc wc.js

[8451:0x298000000000]      688 ms: Scavenge 1.3 (1.5) -> 0.9 (2.0) MB, 0.3 / 0.0 ms  (average mu = 1.000, current mu = 1.000) allocation failure 
[8451:0x298000000000]     1201 ms: Scavenge 1.3 (2.0) -> 0.9 (1.8) MB, 0.3 / 0.0 ms  (average mu = 1.000, current mu = 1.000) allocation failure 
[8451:0x298000000000]     1819 ms: Scavenge 1.4 (1.8) -> 0.9 (1.8) MB, 0.1 / 0.0 ms  (average mu = 1.000, current mu = 1.000) allocation failure 
[8451:0x298000000000]     2441 ms: Scavenge 1.4 (1.8) -> 0.9 (1.8) MB, 0.1 / 0.0 ms  (average mu = 1.000, current mu = 1.000) allocation failure 
[8451:0x298000000000]     3059 ms: Scavenge 1.4 (1.8) -> 0.9 (1.8) MB, 0.1 / 0.0 ms  (average mu = 1.000, current mu = 1.000) allocation failure 
[8451:0x298000000000]     3678 ms: Scavenge 1.4 (1.8) -> 0.9 (1.8) MB, 0.1 / 0.0 ms  (average mu = 1.000, current mu = 1.000) allocation failure 
[8451:0x298000000000]     4304 ms: Scavenge 1.4 (1.8) -> 0.9 (1.8) MB, 0.1 / 0.0 ms  (average mu = 1.000, current mu = 1.000) allocation failure 
[8451:0x298000000000]     4934 ms: Scavenge 1.4 (1.8) -> 0.9 (1.8) MB, 0.1 / 0.0 ms  (average mu = 1.000, current mu = 1.000) allocation failure 
[8451:0x298000000000]     5555 ms: Scavenge 1.4 (1.8) -> 0.9 (1.8) MB, 0.1 / 0.0 ms  (average mu = 1.000, current mu = 1.000) allocation failure 
[8451:0x298000000000]     6175 ms: Scavenge 1.4 (1.8) -> 0.9 (1.8) MB, 0.1 / 0.0 ms  (average mu = 1.000, current mu = 1.000) allocation failure 
[8451:0x298000000000]     6799 ms: Scavenge 1.4 (1.8) -> 0.9 (1.8) MB, 0.1 / 0.0 ms  (average mu = 1.000, current mu = 1.000) allocation failure 
500000+0 records in
500000+0 records out
32768000000 bytes (33 GB, 31 GiB) copied, 7.38609 s, 4.4 GB/s
size 32768000000 rss 18587648
[8451:0x298000000000]     7381 ms: Mark-sweep (reduce) 1.3 (1.8) -> 0.1 (1.8) MB, 0.4 / 0.0 ms  (average mu = 1.000, current mu = 1.000) low memory notification GC in old space requested
[8451:0x298000000000]     7382 ms: Mark-sweep (reduce) 0.1 (1.8) -> 0.1 (1.0) MB, 0.4 / 0.0 ms  (average mu = 0.068, current mu = 0.068) low memory notification GC in old space requested

We can see the scavenger working away roughly every 600ms but having negligible impact on performance. The full Mark-sweep collection at the end is because just(js) runtime forces garbage collection and clean up of memory before exiting.

If you want to dive deeper into V8 garbage collection there are two very good articles here and here on the V8 team blog.

So, I think we are ok in asserting that there is practically zero overhead to a simple syscall from JS compared to plain C.

just async performance

Let's do the same thing but using the event loop and only counting the bytes as the event loop tells us data is ready. The code here is slightly more complex as we need to mark the stdin file descriptor as non-blocking and add it to the default event loop of the just runtime.

wc-async.js

const { net, sys, print, error, memoryUsage } = just
const { read, O_NONBLOCK } = net
const { strerror, errno, STDIN_FILENO } = sys
const { EPOLLIN } = just.loop
const { loop } = just.factory
const blocksize = parseInt(just.args[2] || 65536)
const buf = new ArrayBuffer(blocksize)
const flags = sys.fcntl(STDIN_FILENO, sys.F_GETFL, 0) | O_NONBLOCK
sys.fcntl(STDIN_FILENO, sys.F_SETFL, flags)
let size = 0
let reads = 0
function onData (fd) {
  const n = read(fd, buf)
  if (n < 0) {
    error(`read: ${strerror(errno())} (${errno()})`)
    loop.remove(fd)
    just.exit(1)
  }
  if (n === 0) {
    print(`size ${size} reads ${reads} blocksize ${blocksize} rss ${memoryUsage().rss}`)
    loop.remove(fd)
    return
  }
  reads++
  size += n
}
loop.add(STDIN_FILENO, onData, EPOLLIN)

We import the default event loop from the just runtime, use fcntl to set the fd as non blocking, and then we tell the event loop that we are only listening for data coming in (EPOLLIN).

By default we are using level triggered events which means the event will carry on firing if we do not read all the available data on each iteration. If we had added EPOLLET to our mask as follows:

const { EPOLLIN, EPOLLET } = just.loop
...
loop.add(STDIN_FILENO, onData, EPOLLIN | EPOLLET)

... then we would only receive an event when data was detected and would be expected to keep on reading until the read syscall returns EAGAIN to indicate the kernel buffer for that file descriptor is empty. If you want to delve deeper into this, I would suggest this great article from Cindy Sridharan.

So, let's see what the async results look like.

time dd if=/dev/zero bs=65536 count=500000 | just wc-async.js

500000+0 records in
500000+0 records out
32768000000 bytes (33 GB, 31 GiB) copied, 7.16841 s, 4.6 GB/s
size 32768000000 rss 19324928

real    0m7.175s
user    0m0.796s
sys     0m13.533s

Nice. So it looks like doing async adds little or no overhead to this program. Let's see what syscalls were made.

time dd if=/dev/zero bs=65536 count=500000 | strace -c just wc-async.js

% time     seconds  usecs/call     calls    errors syscall
------ ----------- ----------- --------- --------- ----------------
 76.89    5.228990          10    500013           read
 23.10    1.570899           3    500001           epoll_wait
  0.01    0.000503           6        83         1 futex
  0.00    0.000209           2        93           mprotect

Interesting. So even though we are making twice as many syscalls we get the same performance. What is going on here? I'll have to delve a little deeper into why this is the case but I am guessing it is something to do with the blocking calls in the first example having some additional overhead over the non-blocking reads in the async version.

just(js) static build

Before we finish with just(js), let's have a look at the memory Usage. We can run a quick eval to see what default memory usage is on startup

just eval 'just.print(just.memoryUsage().rss)'
13529088

So, about 13.5 MB. In the synchronous example we saw usage after the run was 18.5 MB and 19.3 MB for the async version. So the overhead of loading the various modules and running the code is around 5-6MB. If we are interested in reducing memory usage further we can compile the application into a static binary and see what happens.

First, we make a few changes to take into account our static app will not use any of the standard just(js) runtime created here. Instead, we will compile a bare application that only uses the small number of functions (currently print, error, load, exit, pid, chdir, sleep, builtin, memoryUsage and version) in the just(js) core and the builder will figure out what other modules it needs and compile those in.

wcb.js

// we need to use just.library() to load modules for now as that is how builder pickes them up
just.library = (name, lib = name) => just.load(name)
const { sys } = just.library('sys')
const { net } = just.library('net')
const { read } = net
const { strerror, errno, STDIN_FILENO } = sys
const blocksize = parseInt(just.args[1] || 65536)
const buf = new ArrayBuffer(blocksize)
let size = 0
let reads = 0
let n = 0
while ((n = read(STDIN_FILENO, buf))) {
  reads++
  size += n
}
if (n < 0) {
  just.error(`read: ${strerror(errno())} (${errno()})`)
  just.exit(1)
}
just.print(`size ${size} reads ${reads} blocksize ${blocksize}`)

Let's run a build with the --dump flag to see what kind of configuration this generates. We will also pass --clean and --static flags to tell it to clean up existing auto generated object files and to build a statically linked binary so we will have no runtime dependencies on system libraries.

just build wcb.js --clean --static --dump

{
  "version": "0.0.21",
  "libs": [
    "lib/fs.js",
    "lib/loop.js",
    "lib/path.js",
    "lib/process.js",
    "lib/build.js",
    "lib/repl.js",
    "lib/configure.js",
    "lib/acorn.js"
  ],
  "modules": [
    {
      "name": "sys",
      "obj": [
        "modules/sys/sys.o"
      ],
      "lib": [
        "rt"
      ]
    },
    {
      "name": "fs",
      "obj": [
        "modules/fs/fs.o"
      ]
    },
    {
      "name": "net",
      "obj": [
        "modules/net/net.o"
      ]
    },
    {
      "name": "vm",
      "obj": [
        "modules/vm/vm.o"
      ]
    },
    {
      "name": "epoll",
      "obj": [
        "modules/epoll/epoll.o"
      ]
    }
  ],
  "capabilities": [],
  "target": "wcb",
  "main": "just.js",
  "v8flags": "--stack-trace-limit=10 --use-strict --disallow-code-generation-from-strings",
  "embeds": [
    "just.js",
    "config.js",
    "wcb.js"
  ],
  "static": true,
  "debug": false,
  "v8flagsFromCommandLine": true,
  "external": {},
  "index": "wcb.js",
  "LIBS": "lib/fs.js lib/loop.js lib/path.js lib/process.js lib/build.js lib/repl.js lib/configure.js lib/acorn.js",
  "EMBEDS": "just.js config.js wcb.js",
  "MODULES": "modules/sys/sys.o modules/fs/fs.o modules/net/net.o modules/vm/vm.o modules/epoll/epoll.o",
  "LIB": "-lrt",
  "justDir": "/home/andrew/.just",
  "build": "main-static",
  "moduleBuild": "module-static"
}

Ok, so that looks like it has a bunch of extra junk in there that we don't need for our mini-app. So, let's create a config for the app to tell the builder we want to use 'wcb.js' as our main script and not 'just.js'.

wcb.config.js

module.exports = { main: 'wcb.js' }

And let's dump the config again and see what comes out this time.

just build wcb.js --clean --static --dump

{
  "main": "wcb.js",
  "external": {},
  "modules": [
    {
      "name": "sys",
      "obj": [
        "modules/sys/sys.o"
      ],
      "lib": [
        "rt"
      ]
    },
    {
      "name": "net",
      "obj": [
        "modules/net/net.o"
      ]
    }
  ],
  "target": "wcb",
  "version": "0.0.21",
  "v8flags": "--stack-trace-limit=10 --use-strict --disallow-code-generation-from-strings",
  "debug": false,
  "capabilities": [],
  "static": true,
  "libs": [],
  "embeds": [
    "wcb.js"
  ],
  "LIBS": "",
  "EMBEDS": "wcb.js",
  "MODULES": "modules/sys/sys.o modules/net/net.o",
  "LIB": "-lrt",
  "justDir": "/home/andrew/.just",
  "build": "main-static",
  "moduleBuild": "module-static"
}

That looks better. Now we are only importing the sys and net C++ modules and none of the JS files and c++ libraries not required for this program. Let's go ahead and build it. In order to build it an app, we will need to set JUST_HOME environment variable to tell the builder where to download and build the various things it needs. You can set this to the directory you compiled the runtime in or if you are using docker it will already be set for you.

just build wcb.js --cleanall --clean --static --silent

clean wcb complete in 0.01 sec
clean modules/sys complete in 0.02 sec
build modules/sys complete in 1.35 sec
clean modules/net complete in 1.36 sec
build modules/net complete in 2.37 sec
build wcb 0.0.21 (wcb.js) complete in 5.99 sec

If you have issues with previous versions of object files that were generated you can pass the --cleanall flag also which will clean the C++ modules and rebuild them. The build system is very much a work in progress so please report any issues here.

We should now have a nice small binary called 'wcb' in the current directory. On my system this comes out around 13MB which is quite a bit smaller than the dynamic build. Bear in mind that a static build will not be able to import any libraries using dlopen at run-time so this option is very much for those cases where the app has all the dependencies it needs bundled up within it.

Let's run the test on our new binary and see what the memory usage is like.

time dd if=/dev/zero bs=65536 count=500000 | ./wcb

500000+0 records in
500000+0 records out
32768000000 bytes (33 GB, 31 GiB) copied, 7.2249 s, 4.5 GB/s
size 32768000000 rss 12763136

real    0m7.232s
user    0m0.581s
sys     0m13.869s

Nice! we just saved about 6MB of memory over the version running under just(js) runtime.

comparing to node

Ok, we will now do a quick comparison against a typical node.js program which meets our requirement.

wc-node.js

let size = 0
const { stdin } = process
let reads = 0
const blocksize = parseInt(process.argv[2] || 65536, 10)
stdin.on('readable', () => {
  let chunk
  while ((chunk = stdin.read())) {
    reads++
    size += chunk.length
  }
})
stdin.on('end', () => {
  console.log(`size ${size} reads ${reads} blocksize ${blocksize}`)
})
stdin.on('error', err => {
  console.error(err.stack)
})

Let's run it and see how it does. I am running v15.6.0 of node.js on Ubuntu 18.04.

time dd if=/dev/zero bs=65536 count=500000 | node wc-node.js

500000+0 records in
500000+0 records out
32768000000 bytes (33 GB, 31 GiB) copied, 8.93132 s, 3.7 GB/s
size 32768000000 rss 56827904

real    0m8.941s
user    0m2.432s
sys     0m15.505s

So, we are seeing a run time of 8.9 seconds and a processing rate of 3.7 GB/s. This is about a 23% decrease in throughput. If we look at memory usage we can see the node.js process is consuming 56MB compared to 19MB or 13MB with just(js), a factor of 3-4 higher.

Let's see if we can make the node.js program do a little better than this. We'll write something that calls the C++ bindings directly. There used to be a way of doing this in a blocking fashion in node.js many moons ago but i was unable to figure it out when digging through the node.js source code for this demo.

wc-node-fast.js

const { Pipe } = process.binding('pipe_wrap')
const blocksize = parseInt(process.argv[2] || 65536, 10)
const stdin = new Pipe(0)
stdin.open(0)
let size = 0
let reads = 0
stdin.onread = buf => {
  if (!buf) {
    console.log(`size ${size} reads ${reads} blocksize ${blocksize} rss ${process.memoryUsage().rss}`)
    return
  }
  reads++
  size += buf.byteLength
}
stdin.readStart()

Let's run it and see if we get a better result.

time dd if=/dev/zero bs=65536 count=500000 | node wc-node-fast.js

500000+0 records in
500000+0 records out
32768000000 bytes (33 GB, 31 GiB) copied, 8.85562 s, 3.7 GB/s
size 32768000000 rss 66383872

real    0m8.867s
user    0m1.587s
sys     0m16.226s

Hmmm... this is no faster and now memory usage is 10MB more. Let's take a look at syscalls in the node.js version.

dd if=/dev/zero bs=65536 count=500000 | strace -c node wc-node-fast.js

% time     seconds  usecs/call     calls    errors syscall
------ ----------- ----------- --------- --------- ----------------
 98.06    5.654736          11    500019         5 read
  1.01    0.058474           3     17994      6502 futex
  0.78    0.044935           3     15671           epoll_wait
  0.09    0.005060           3      1965           getpid

and with the first node.js version...

dd if=/dev/zero bs=65536 count=500000 | strace -c node wc-node.js

% time     seconds  usecs/call     calls    errors syscall
------ ----------- ----------- --------- --------- ----------------
 96.62    5.446947          11    500032        15 read
  1.39    0.078514           3     31270     15633 epoll_ctl
  1.21    0.068166           3     21504      6990 futex
  0.68    0.038313           2     15643           epoll_wait
  0.08    0.004764           2      1966           getpid

Ok, we can see a significant number of extra syscalls here. The second program is calling futex and epoll_wait a lot more than the just(js) versions. This needs further investigation and is likely to do with extra garbage collection and a different polling mechanism on the event loop. We can see both node.js programs do the same number of reads so we are reading in 64k chunks across the board.

For now, I will have to leave the experiment with node.js as I cannot figure out a way to make this do better without writing a C++ module. Let's do some runs for different block sizes and see what the overall results look like.

different block sizes

The script 'bench.js' will perform ten runs for each program across all power of 2 block sizes from 256 bytes to 65536 bytes. It saves all results in a file names 'all.json'. It will take the best score across the ten rounds for each program and save them in a file named 'results.json'. If you then run 'analyse.js' it will produce a html page with the results plotted using HighCharts.

# run the benchmark and produce all.json and results.json
just bench.js
# generate a report on the results and display in the browser - not xdg-open will not work on docker
just analyse.js

throughput/MBps

throughput

This shows the best performance over 10 runs for each program at the different chunk sizes. We can see pretty clearly there is no significant difference between the JS and c versions of the programs. In fact, we see slightly better performance from the JS programs with smaller chunk sizes.

We can see here node.js throughput is approx. 30% lower than the c or just(js) programs.

time taken/ms

duration

And the same picture when we look at time taken.

Unfortunately, i could not find any way of forcing node.js to read from the pipe in explicitly sized chunks for the comparison above. It's so frustrating to have to wade through tons and tons of web pages and stack overflow questions and the source code itself only to find there doesn't seem to be any way to do this. If anyone knows how, please ping me.

assertions

So, that brings the investigations to an end for now. Let's see if we make make some assertions.

  • There is practically zero overhead for JIT optimized JS when calling into C/C++ for the linux read() syscall
  • just(js) performance is equal to optimized C for reading data from stdin
  • just(js) and C performance is approximately 30% faster than node.js for this task. Further investigation is needed to ascertain why.
  • if your program is syscall heavy, there should be negligible overhead in using JS over C/C++

further work

  • investigate overhead for calls with larger and different types of arguments
  • investigate where the overhead is coming from in node.js and if it is possible to eliminate some or all of it
  • compare calls from JS into C++ to an unoptmized C call into another c function to determine overhead without the cost of a syscall.

Please feel free to point out any mistakes/omissions and point out any improvements that could be made.

FROM debian:buster-slim AS builder
RUN apt update
RUN apt install -y g++
RUN apt install -y make
RUN apt install -y tar
RUN apt install -y gzip
RUN apt install -y curl
RUN apt install -y libfindbin-libs-perl
RUN apt install -y strace
RUN curl -L -o 0.0.21.tar.gz -L https://github.com/just-js/just/archive/0.0.21.tar.gz
RUN tar -zxvf 0.0.21.tar.gz
WORKDIR /just-0.0.21
RUN mkdir -p /usr/local/lib/just
ENV JUST_HOME=/just-0.0.21
ENV JUST_TARGET=/just-0.0.21
ENV LD_LIBRARY_PATH=/just-0.0.21/modules
RUN make runtime
RUN cp just /usr/local/bin/just
WORKDIR /tmp
RUN curl -o nodejs.tar.gz https://nodejs.org/dist/v15.8.0/node-v15.8.0-linux-x64.tar.gz
RUN tar -zxvf nodejs.tar.gz
RUN cp node-v15.8.0-linux-x64/bin/node /usr/local/bin/node
WORKDIR /app
CMD ["/bin/bash"]
const { fs } = just.library('fs')
const { sys } = just.library('sys')
const { net } = just.library('net')
function readStat (pid = sys.pid()) {
const buf = new ArrayBuffer(4096)
const path = `/proc/${pid}/stat`
const fd = fs.open(path)
net.seek(fd, 0, net.SEEK_SET)
let bytes = net.read(fd, buf)
const parts = []
while (bytes > 0) {
parts.push(buf.readString(bytes))
bytes = net.read(fd, buf)
}
const fields = parts.join('').split(' ')
const comm = fields[1]
const state = fields[2]
const [
ppid,
pgrp,
session,
ttyNr,
tpgid,
flags,
minflt,
cminflt,
majflt,
cmajflt,
utime,
stime,
cutime,
cstime,
priority,
nice,
numThreads,
itrealvalue,
starttime,
vsize,
rssPages,
rsslim,
startcode,
endcode,
startstack,
kstkesp,
kstkeip,
signal,
blocked,
sigignore,
sigcatch,
wchan,
nswap,
cnswap,
exitSignal,
processor,
rtPriority,
policy,
delayacctBlkioTicks,
guestTime,
cguestTime,
startData,
endData,
startBrk,
argStart,
argEnd,
envStart,
envEnd,
exitCode
] = fields.slice(3).map(v => Number(v))
net.close(fd)
return {
pid,
comm,
state,
ppid,
pgrp,
session,
ttyNr,
tpgid,
flags,
minflt,
cminflt,
majflt,
cmajflt,
utime,
stime,
cutime,
cstime,
priority,
nice,
numThreads,
itrealvalue,
starttime,
vsize,
rssPages,
rsslim,
startcode,
endcode,
startstack,
kstkesp,
kstkeip,
signal,
blocked,
sigignore,
sigcatch,
wchan,
nswap,
cnswap,
exitSignal,
processor,
rtPriority,
policy,
delayacctBlkioTicks,
guestTime,
cguestTime,
startData,
endData,
startBrk,
argStart,
argEnd,
envStart,
envEnd,
exitCode
}
}
module.exports = { readStat }
<script src="https://code.highcharts.com/highcharts.js"></script>
<script src="https://code.highcharts.com/modules/exporting.js"></script>
<style>
#container {
height: 50%;
margin: 1em auto;
}
</style>
<div id="throughput"></div>
<div id="throughputPerCore"></div>
<div id="duration"></div>
<script type="text/javascript">
Highcharts.chart('duration', {
"title": {
"text": "Time Taken (ms), Lower is Better"
},
"xAxis": {
"categories": [
"bs = 65536",
"bs = 4096",
"bs = 256"
]
},
"yAxis": {
"title": {
"text": "time"
},
"tickInterval": 10
},
"series": [
{
"name": "./wc",
"type": "column",
"data": [
3330,
3626,
3470
]
},
{
"name": "just wc.js",
"type": "column",
"data": [
3413,
3110,
3086
]
},
{
"name": "just wc-async.js",
"type": "column",
"data": [
3382,
3696,
3179
]
},
{
"name": "./wcb",
"type": "column",
"data": [
3384,
3091,
3177
]
},
{
"name": "node wc-node.js",
"type": "column",
"data": [
4233,
0
]
}
]
});
Highcharts.chart('throughput', {
"title": {
"text": "Throughput (MBps), Higher is Better"
},
"xAxis": {
"categories": [
"bs = 65536",
"bs = 4096",
"bs = 256"
]
},
"yAxis": {
"title": {
"text": "rate"
},
"tickInterval": 10
},
"series": [
{
"name": "./wc",
"type": "column",
"data": [
4692,
2154,
140
]
},
{
"name": "just wc.js",
"type": "column",
"data": [
4578,
2512,
158
]
},
{
"name": "just wc-async.js",
"type": "column",
"data": [
4620,
2113,
153
]
},
{
"name": "./wcb",
"type": "column",
"data": [
4617,
2527,
153
]
},
{
"name": "node wc-node.js",
"type": "column",
"data": [
3691,
null
]
}
]
});
Highcharts.chart('throughputPerCore', {
"title": {
"text": "Throughput Per Core (MBps), Higher is Better"
},
"xAxis": {
"categories": [
"bs = 65536",
"bs = 4096",
"bs = 256"
]
},
"yAxis": {
"title": {
"text": "rate"
},
"tickInterval": 10
},
"series": [
{
"name": "./wc",
"type": "column",
"data": [
4735,
2411,
176
]
},
{
"name": "just wc.js",
"type": "column",
"data": [
4636,
2528,
190
]
},
{
"name": "just wc-async.js",
"type": "column",
"data": [
4687,
2120,
153
]
},
{
"name": "./wcb",
"type": "column",
"data": [
4664,
2558,
182
]
},
{
"name": "node wc-node.js",
"type": "column",
"data": [
3537,
null
]
}
]
});
</script>
[{"name":"./wc","data":[{"pid":28676,"name":"count","utime":6,"stime":324,"rss":819200,"time":3330,"status":0,"blocksize":65536,"count":250000,"program":"./wc","args":["65536"],"usage":99.09,"sys":97.29,"user":1.8,"bytes":16384000000,"rate":4692,"stdout":"size 16384000000 reads 250000 blocksize 65536","results":{"size":16384000000,"reads":250000,"blocksize":65536}},{"pid":28679,"name":"count","utime":64,"stime":267,"rss":745472,"time":3626,"status":0,"blocksize":4096,"count":2000000,"program":"./wc","args":["4096"],"usage":89.33,"sys":72.06,"user":17.27,"bytes":8192000000,"rate":2154,"stdout":"size 8192000000 reads 2000000 blocksize 4096","results":{"size":8192000000,"reads":2000000,"blocksize":4096}},{"pid":28682,"name":"count","utime":79,"stime":196,"rss":815104,"time":3470,"status":0,"blocksize":256,"count":2000000,"program":"./wc","args":["256"],"usage":79.25,"sys":56.48,"user":22.76,"bytes":512000000,"rate":140,"stdout":"size 512000000 reads 2000000 blocksize 256","results":{"size":512000000,"reads":2000000,"blocksize":256}}],"type":"column"},{"name":"just wc.js","data":[{"pid":28684,"name":"count","utime":13,"stime":324,"rss":18644992,"time":3413,"status":0,"blocksize":65536,"count":250000,"program":"just","args":["wc.js","65536"],"usage":98.74,"sys":94.93,"user":3.8,"bytes":16384000000,"rate":4578,"stdout":"size 16384000000 reads 250000 blocksize 65536","results":{"size":16384000000,"reads":250000,"blocksize":65536}},{"pid":28701,"name":"count","utime":96,"stime":213,"rss":18509824,"time":3110,"status":0,"blocksize":4096,"count":2000000,"program":"just","args":["wc.js","4096"],"usage":99.35,"sys":68.48,"user":30.86,"bytes":8192000000,"rate":2512,"stdout":"size 8192000000 reads 2000000 blocksize 4096","results":{"size":8192000000,"reads":2000000,"blocksize":4096}},{"pid":28710,"name":"count","utime":112,"stime":159,"rss":17555456,"time":3086,"status":0,"blocksize":256,"count":2000000,"program":"just","args":["wc.js","256"],"usage":82.79,"sys":48.57,"user":34.21,"bytes":512000000,"rate":158,"stdout":"size 512000000 reads 2000000 blocksize 256","results":{"size":512000000,"reads":2000000,"blocksize":256}}],"type":"column"},{"name":"just wc-async.js","data":[{"pid":28720,"name":"count","utime":22,"stime":315,"rss":18894848,"time":3382,"status":0,"blocksize":65536,"count":250000,"program":"just","args":["wc-async.js","65536"],"usage":98.56,"sys":92.13,"user":6.43,"bytes":16384000000,"rate":4620,"stdout":"size 16384000000 reads 250000 blocksize 65536 rss 18894848","results":{"size":16384000000,"reads":250000,"blocksize":65536,"rss":18894848}},{"pid":28737,"name":"count","utime":167,"stime":221,"rss":18972672,"time":3696,"status":0,"blocksize":4096,"count":2000000,"program":"just","args":["wc-async.js","4096"],"usage":99.66,"sys":56.76,"user":42.89,"bytes":8192000000,"rate":2113,"stdout":"size 8192000000 reads 2000000 blocksize 4096 rss 19120128","results":{"size":8192000000,"reads":2000000,"blocksize":4096,"rss":19120128}},{"pid":28748,"name":"count","utime":167,"stime":149,"rss":18755584,"time":3179,"status":0,"blocksize":256,"count":2000000,"program":"just","args":["wc-async.js","256"],"usage":99.4,"sys":46.87,"user":52.53,"bytes":512000000,"rate":153,"stdout":"size 512000000 reads 2000000 blocksize 256 rss 17952768","results":{"size":512000000,"reads":2000000,"blocksize":256,"rss":17952768}}],"type":"column"},{"name":"./wcb","data":[{"pid":28758,"name":"count","utime":13,"stime":322,"rss":12816384,"time":3384,"status":0,"blocksize":65536,"count":250000,"program":"./wcb","args":["65536"],"usage":98.99,"sys":95.15,"user":3.84,"bytes":16384000000,"rate":4617,"stdout":"size 16384000000 reads 250000 blocksize 65536","results":{"size":16384000000,"reads":250000,"blocksize":65536}},{"pid":28775,"name":"count","utime":86,"stime":220,"rss":12779520,"time":3091,"status":0,"blocksize":4096,"count":2000000,"program":"./wcb","args":["4096"],"usage":98.77,"sys":71.01,"user":27.75,"bytes":8192000000,"rate":2527,"stdout":"size 8192000000 reads 2000000 blocksize 4096","results":{"size":8192000000,"reads":2000000,"blocksize":4096}},{"pid":28784,"name":"count","utime":99,"stime":168,"rss":12460032,"time":3177,"status":0,"blocksize":256,"count":2000000,"program":"./wcb","args":["256"],"usage":84.04,"sys":52.88,"user":31.16,"bytes":512000000,"rate":153,"stdout":"size 512000000 reads 2000000 blocksize 256","results":{"size":512000000,"reads":2000000,"blocksize":256}}],"type":"column"},{"name":"node wc-node.js","data":[{"pid":28793,"name":"count","utime":106,"stime":338,"rss":57270272,"time":4233,"status":0,"blocksize":65536,"count":250000,"program":"node","args":["wc-node.js","65536"],"usage":104.34,"sys":79.43,"user":24.91,"bytes":16384000000,"rate":3691,"stdout":"size 16384000000 reads 250000 blocksize 65536","results":{"size":16384000000,"reads":250000,"blocksize":65536}},{"time":0}],"type":"column"}]
const { run } = require('lib/dd.js')
async function main (blocksize = 65536, count = 1000) {
blocksize = parseInt(blocksize, 10)
count = parseInt(count, 10)
let result
result = await run(blocksize, count, './wc')
just.print(JSON.stringify(result, null, ' '))
result = await run(blocksize, count, './wcb')
just.print(JSON.stringify(result, null, ' '))
result = await run(blocksize, count, 'node', 'wc-node.js')
just.print(JSON.stringify(result, null, ' '))
}
main(...just.args.slice(2)).catch(err => just.error(err.stack))
const { fs } = just.library('fs')
const { sys } = just.library('sys')
const { net } = just.library('net')
const { launch, watch } = require('process')
function readStat (pid = sys.pid()) {
const buf = new ArrayBuffer(4096)
const path = `/proc/${pid}/stat`
const fd = fs.open(path)
net.seek(fd, 0, net.SEEK_SET)
let bytes = net.read(fd, buf)
const parts = []
while (bytes > 0) {
parts.push(buf.readString(bytes))
bytes = net.read(fd, buf)
}
const fields = parts.join('').split(' ')
const comm = fields[1]
const state = fields[2]
const [
ppid,
pgrp,
session,
ttyNr,
tpgid,
flags,
minflt,
cminflt,
majflt,
cmajflt,
utime,
stime,
cutime,
cstime,
priority,
nice,
numThreads,
itrealvalue,
starttime,
vsize,
rssPages,
rsslim,
startcode,
endcode,
startstack,
kstkesp,
kstkeip,
signal,
blocked,
sigignore,
sigcatch,
wchan,
nswap,
cnswap,
exitSignal,
processor,
rtPriority,
policy,
delayacctBlkioTicks,
guestTime,
cguestTime,
startData,
endData,
startBrk,
argStart,
argEnd,
envStart,
envEnd,
exitCode
] = fields.slice(3).map(v => Number(v))
net.close(fd)
return {
pid,
comm,
state,
ppid,
pgrp,
session,
ttyNr,
tpgid,
flags,
minflt,
cminflt,
majflt,
cmajflt,
utime,
stime,
cutime,
cstime,
priority,
nice,
numThreads,
itrealvalue,
starttime,
vsize,
rssPages,
rsslim,
startcode,
endcode,
startstack,
kstkesp,
kstkeip,
signal,
blocked,
sigignore,
sigcatch,
wchan,
nswap,
cnswap,
exitSignal,
processor,
rtPriority,
policy,
delayacctBlkioTicks,
guestTime,
cguestTime,
startData,
endData,
startBrk,
argStart,
argEnd,
envStart,
envEnd,
exitCode
}
}
const ANSI_DEFAULT = '\u001b[0m'
const ANSI_GREEN = '\u001b[32m'
function format (num, len = 5) {
return num.toString().padStart(len, ' ')
}
const stats = { user: 0, system: 0, cuser: 0, csystem: 0 }
function dumpStats (pid) {
const { utime, stime, cutime, cstime, rssPages } = readStat(pid)
const rss = Math.floor((rssPages * just.sys.pageSize) / (1024 * 1024))
const user = utime - stats.user
const system = stime - stats.system
const cuser = cutime - stats.cuser
const csystem = cstime - stats.csystem
stats.user = utime
stats.system = stime
stats.cuser = cutime
stats.csystem = cstime
just.print(`${ANSI_GREEN}rss${ANSI_DEFAULT} ${format(rss)} ${ANSI_GREEN}usr${ANSI_DEFAULT} ${format(user)} ${ANSI_GREEN}sys${ANSI_DEFAULT} ${format(system)} ${ANSI_GREEN} tot${ANSI_DEFAULT} ${format(system + user)} ${ANSI_GREEN}cusr${ANSI_DEFAULT} ${format(cuser)} ${ANSI_GREEN}csys${ANSI_DEFAULT} ${format(csystem)} ${ANSI_GREEN} ctot${ANSI_DEFAULT} ${format(csystem + cuser)}`)
}
async function main (program, ...args) {
const stdout = []
const stderr = []
const start = Date.now()
const process = launch(program, args)
const { pid } = process
const timer = just.setInterval(() => dumpStats(pid), 1000)
process.onStdout = (buf, len) => stdout.push(buf.readString(len))
process.onStderr = (buf, len) => stderr.push(buf.readString(len))
const status = await watch(process)
if (status !== 0) throw new Error(`Bad Status ${status}`)
const time = Date.now() - start
just.print(time)
just.clearTimeout(timer)
}
main(...just.args.slice(2)).catch(err => just.error(err.stack))
const { net, sys, print, error, memoryUsage } = just
const { read, O_NONBLOCK } = net
const { strerror, errno, STDIN_FILENO } = sys
const { EPOLLIN } = just.loop
const { loop } = just.factory
const blocksize = parseInt(just.args[2] || 65536)
const buf = new ArrayBuffer(blocksize)
const flags = sys.fcntl(STDIN_FILENO, sys.F_GETFL, 0) | O_NONBLOCK
sys.fcntl(STDIN_FILENO, sys.F_SETFL, flags)
let size = 0
let reads = 0
function onData (fd) {
const n = read(fd, buf)
if (n < 0) {
error(`read: ${strerror(errno())} (${errno()})`)
loop.remove(fd)
just.exit(1)
}
if (n === 0) {
print(`size ${size} reads ${reads} blocksize ${blocksize} rss ${memoryUsage().rss}`)
loop.remove(fd)
return
}
reads++
size += n
}
loop.add(STDIN_FILENO, onData, EPOLLIN)
const { Pipe } = process.binding('pipe_wrap')
const blocksize = parseInt(process.argv[2] || 65536, 10)
const stdin = new Pipe(0)
stdin.open(0)
let size = 0
let reads = 0
stdin.onread = buf => {
if (!buf) {
console.log(`size ${size} reads ${reads} blocksize ${blocksize}`)
return
}
reads++
size += buf.byteLength
}
stdin.readStart()
let size = 0
const { stdin } = process
let reads = 0
const blocksize = parseInt(process.argv[2] || 65536, 10)
stdin.on('readable', () => {
let chunk
while ((chunk = stdin.read())) {
reads++
size += chunk.length
}
})
stdin.on('end', () => {
console.log(`size ${size} reads ${reads} blocksize ${blocksize}`)
})
stdin.on('error', err => {
console.error(err.stack)
})
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <errno.h>
#include <string.h>
int main(int argc, char *argv[]) {
unsigned int blocksize = 65536;
if (argc == 2) {
blocksize = atoi(argv[1]);
}
char buf[blocksize];
unsigned long size = 0;
unsigned int reads = 0;
int n = read(STDIN_FILENO, buf, blocksize);
while (n > 0) {
reads++;
size += n;
n = read(STDIN_FILENO, buf, blocksize);
}
if (n < 0) {
fprintf(stderr, "read: %s (%i)\n", strerror(errno), errno);
exit(1);
}
fprintf(stdout, "size %lu reads %u blocksize %u\n", size, reads, blocksize);
}
const { print, error, exit, memoryUsage, sys, net } = just
const { read } = net
const { strerror, errno, STDIN_FILENO } = sys
const blocksize = parseInt(just.args[2] || 65536)
const buf = new ArrayBuffer(blocksize)
let size = 0
let reads = 0
let n = read(STDIN_FILENO, buf)
while (n > 0) {
reads++
size += n
n = read(STDIN_FILENO, buf)
}
if (n < 0) {
error(`read: ${strerror(errno())} (${errno()})`)
exit(1)
}
print(`size ${size} reads ${reads} blocksize ${blocksize}`)
module.exports = { main: 'wcb.js' }
// we need to use just.library() to load modules for now as that is how builder pickes them up
just.library = (name, lib = name) => just.load(name)
const { sys } = just.library('sys')
const { net } = just.library('net')
const { read } = net
const { strerror, errno, STDIN_FILENO } = sys
const blocksize = parseInt(just.args[1] || 65536)
const buf = new ArrayBuffer(blocksize)
let size = 0
let reads = 0
let n = 0
while ((n = read(STDIN_FILENO, buf))) {
reads++
size += n
}
if (n < 0) {
just.error(`read: ${strerror(errno())} (${errno()})`)
just.exit(1)
}
just.print(`size ${size} reads ${reads} blocksize ${blocksize}`)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment