Skip to content

Instantly share code, notes, and snippets.

@bellbind
Last active July 3, 2017 02:19
Show Gist options
  • Save bellbind/dd1c0cd9cbe422caff8dcdae1010ad37 to your computer and use it in GitHub Desktop.
Save bellbind/dd1c0cd9cbe422caff8dcdae1010ad37 to your computer and use it in GitHub Desktop.
[webassembly]Compare pure ES/wasm/asm.js with Turing Pattern example (refined)
function asm(global, env, heap) {
"use asm";
var imul = global.Math.imul;
var bufF = new global.Float64Array(heap);
var bufI = new global.Int32Array(heap);
function get(offs, w, x, y) {
offs = offs | 0;
w = w | 0;
x = x | 0;
y = y | 0;
var index = 0;
index = (imul((y + w >>> 0) % (w >>> 0) | 0, w) | 0) +
((x + w >>> 0) % (w >>> 0) | 0) | 0;
return +bufF[(offs + (index << 3)) >> 3];
}
function conv(coffs, clen, soffs, len, w, doffs) {
coffs = coffs | 0;
clen = clen | 0;
soffs = soffs | 0;
len = len | 0;
w = w | 0;
doffs = doffs | 0;
var i = 0, x = 0, y = 0, ci = 0, ct = 0, cx = 0, cy = 0;
var s = 0.0, cf = 0.0;
for (i = 0; (i | 0) < (len | 0); i = i + 1 | 0) {
x = (i >>> 0) % (w >>> 0) | 0;
y = (i >>> 0) / (w >>> 0) | 0;
s = 0.0;
for (ci = 0; (ci | 0) < (clen | 0); ci = ci + 1 | 0) {
ct = coffs + (ci << 4) | 0;
cx = bufI[ct >> 2] | 0;
cy = bufI[(ct + 4) >> 2] | 0;
cf = +bufF[(ct + 8) >> 3];
s = s + cf * +get(soffs, w, x + cx | 0, y + cy | 0);
}
bufF[(doffs + (i << 3)) >> 3] = +s;
}
}
return {conv: conv};
}
(module
(export "heap" (memory $0))
(memory $0 1 256)
(export "conv" (func $conv))
(func $get (param $offs i32) (param $w i32) (param $x i32) (param $y i32) (result f64)
(local $index i32)
(set_local $index
(i32.add
(i32.mul
(i32.rem_u
(i32.add
(get_local $y)
(get_local $w)
)
(get_local $w)
)
(get_local $w)
)
(i32.rem_u
(i32.add
(get_local $x)
(get_local $w)
)
(get_local $w)
)
)
)
(return
(f64.load
(i32.add
(get_local $offs)
(i32.shl
(get_local $index)
(i32.const 3)
)
)
)
)
)
(func $conv (param $coffs i32) (param $clen i32) (param $soffs i32) (param $len i32) (param $w i32) (param $doffs i32)
(local $i i32)
(local $x i32)
(local $y i32)
(local $ci i32)
(local $ct i32)
(local $cx i32)
(local $cy i32)
(local $s f64)
(local $cf f64)
(set_local $i
(i32.const 0)
)
(loop $for-in
(block $for-out
(if
(i32.eqz
(i32.lt_s
(get_local $i)
(get_local $len)
)
)
(br $for-out)
)
(block
(set_local $x
(i32.rem_u
(get_local $i)
(get_local $w)
)
)
(set_local $y
(i32.div_u
(get_local $i)
(get_local $w)
)
)
(set_local $s
(f64.const 0)
)
(block
(set_local $ci
(i32.const 0)
)
(loop $for-in1
(block $for-out0
(if
(i32.eqz
(i32.lt_s
(get_local $ci)
(get_local $clen)
)
)
(br $for-out0)
)
(block
(set_local $ct
(i32.add
(get_local $coffs)
(i32.shl
(get_local $ci)
(i32.const 4)
)
)
)
(set_local $cx
(i32.load
(get_local $ct)
)
)
(set_local $cy
(i32.load
(i32.add
(get_local $ct)
(i32.const 4)
)
)
)
(set_local $cf
(f64.load
(i32.add
(get_local $ct)
(i32.const 8)
)
)
)
(set_local $s
(f64.add
(get_local $s)
(f64.mul
(get_local $cf)
(call $get
(get_local $soffs)
(get_local $w)
(i32.add
(get_local $x)
(get_local $cx)
)
(i32.add
(get_local $y)
(get_local $cy)
)
)
)
)
)
)
(set_local $ci
(i32.add
(get_local $ci)
(i32.const 1)
)
)
(br $for-in1)
)
)
)
(f64.store
(i32.add
(get_local $doffs)
(i32.shl
(get_local $i)
(i32.const 3)
)
)
(get_local $s)
)
)
(set_local $i
(i32.add
(get_local $i)
(i32.const 1)
)
)
(br $for-in)
)
)
)
)
<!doctype html>
<html>
<head>
<meta charset="utf-8" />
<script src="script-asm.js" defer="defer"></script>
</head>
<body>
<div>duration: <span id="duration"></span>ms</div>
<div>average: <span id="average"></span>ms</div>
</body>
</html>
<!doctype html>
<html>
<head>
<meta charset="utf-8" />
<script src="script-es6.js" defer="defer"></script>
</head>
<body>
<div>duration: <span id="duration"></span>ms</div>
<div>average: <span id="average"></span>ms</div>
</body>
</html>
<!doctype html>
<html>
<head>
<meta charset="utf-8" />
<script src="script-wasm.js" defer="defer"></script>
</head>
<body>
<div>duration: <span id="duration"></span>ms</div>
<div>average: <span id="average"></span>ms</div>
</body>
</html>
"use strct";
// Turing Pattern Simulator program
const {
w = 200, s = 2, // w: cell size, s: rect size
ra = 3, ri = 7, //ra/ri: radius as activator/inhibitor
} = Function(`return {${location.hash.slice(1)}}`)();
function force(r) {
const w = r * 2 + 1;
const conv = [...Array(w * w)].map((_, i) => {
const x = i % w - r, y = (i / w | 0) - r;
const f = Math.max((r + 1 / 2) - Math.hypot(x, y), 0) ** 2;
return {x, y, f};
}).filter(({f}) => f > 0);
const total = conv.reduce((s, {f}) => s + f, 0);
return conv.map(({x, y, f}) => ({x, y, f: f / total}));
}
function force2ta(f) {
const buf = new ArrayBuffer(f.length * 16);
const bufF = new Float64Array(buf);
const bufI = new Int32Array(buf);
f.forEach(({x, y, f}, i) => {
bufI[i * 4] = x;
bufI[i * 4 + 1] = y;
bufF[i * 2 + 1] = f;
});
return bufF;
}
const convA = force2ta(force(ra));
const convI = force2ta(force(ri));
// cell automaton
function init() {
const cells = [...Array(w * w)].map((_, i) => i);
return {F: cells.map(i => Math.random())};
}
// visualize
function render(c2d, {F}) {
c2d.clearRect(0, 0, c2d.canvas.width, c2d.canvas.height);
F.forEach((v, i) => {
const x = i % w, y = i / w | 0;
c2d.fillStyle = `hsl(0, 0%, ${v * 100}%)`;
c2d.fillRect(x * s, y * s, s, s);
});
}
const canvas = document.createElement("canvas");
canvas.width = canvas.height = w * s;
document.body.appendChild(canvas);
const c2d = canvas.getContext("2d");
// utils
function clamp(v, min, max) {
return Math.min(Math.max(min, v), max);
}
fetch("./conv.asm.js").then(res => res.text()).
then(src => Function(`return (${src})`)()).
then(asm => {
const atleast = (convA.length + convI.length + w * w * 3) << 3;
let size = 0x100000;
while (size < atleast) size += 0x100000;
const heap = {buffer: new ArrayBuffer(size)};
const {conv} = asm(top, {}, heap.buffer);
let offs = 0;
const cA = new Float64Array(heap.buffer, offs, convA.length);
cA.set(convA);
offs += cA.byteLength;
const cI = new Float64Array(heap.buffer, offs, convI.length);
cI.set(convI);
offs += cI.byteLength;
const sF = new Float64Array(heap.buffer, offs, w * w);
offs += sF.byteLength;
const dA = new Float64Array(heap.buffer, offs, w * w);
offs += dA.byteLength;
const dI = new Float64Array(heap.buffer, offs, w * w);
function next({F}) {
sF.set(F);
const begin = performance.now();
conv(cA.byteOffset, cA.length / 2,
sF.byteOffset, sF.length, w, dA.byteOffset);
conv(cI.byteOffset, cI.length / 2,
sF.byteOffset, sF.length, w, dI.byteOffset);
benchmark(begin, performance.now());
return {F: F.map((v, i) => clamp(v + dA[i] - dI[i], 0, 1))};
}
// main loop
function loop(c2d, channel) {
render(c2d, channel);
requestAnimationFrame(_ => loop(c2d, next(channel)));
}
loop(c2d, init());
});
//benchmark
const duration = document.getElementById("duration");
const average = document.getElementById("average");
const ds = new Float64Array([...Array(100)].map(_ => NaN));
function benchmark(begin, end) {
const d = end - begin;
ds.copyWithin(1, 0)[0] = d;
duration.textContent = d;
average.textContent = isNaN(ds[ds.length - 1]) ? "(wait 100 frames)" :
ds.reduce((s, v) => s + v, 0) / ds.length;
}
"use strct";
// Turing Pattern Simulator program
const {
w = 200, s = 2, // w: cell size, s: rect size
ra = 3, ri = 7, //ra/ri: radius as activator/inhibitor
} = Function(`return {${location.hash.slice(1)}}`)();
// convolution for cell array
function get(ch, x, y) {
return ch[((y + w) % w) * w + ((x + w) % w)];
//return ch[clamp(y, 0, w - 1) * w + clamp(x, 0, w - 1)];
}
function conv(f) {
return (v, i, ch) => {
const x = i % w, y = i / w | 0;
return f.reduce((s, c) => s + c.f * get(ch, x + c.x, y + c.y), 0);
};
}
function force(r) {
const w = r * 2 + 1;
//return [...Array(w * w)].map(
// (_, i) => ({x: i % w - r, y: (i / w | 0) - r, f: 1 / (w * w)}));
const conv = [...Array(w * w)].map((_, i) => {
const x = i % w - r, y = (i / w | 0) - r;
const f = Math.max((r + 1 / 2) - Math.hypot(x, y), 0) ** 2;
return {x, y, f};
}).filter(({f}) => f > 0);
const total = conv.reduce((s, {f}) => s + f, 0);
return conv.map(({x, y, f}) => ({x, y, f: f / total}));
}
const convA = conv(force(ra));
const convI = conv(force(ri));
// cell automaton
function init() {
const cells = [...Array(w * w)].map((_, i) => i);
return {F: cells.map(i => Math.random())};
}
function next({F}) {
const begin = performance.now();
const A = F.map(convA), I = F.map(convI);
benchmark(begin, performance.now());
return {F: F.map((v, i) => clamp(v + A[i] - I[i], 0, 1))};
}
// visualize
function render(c2d, {F}) {
c2d.clearRect(0, 0, c2d.canvas.width, c2d.canvas.height);
F.forEach((v, i) => {
const x = i % w, y = i / w | 0;
c2d.fillStyle = `hsl(0, 0%, ${v * 100}%)`;
c2d.fillRect(x * s, y * s, s, s);
});
}
const canvas = document.createElement("canvas");
canvas.width = canvas.height = w * s;
document.body.appendChild(canvas);
const c2d = canvas.getContext("2d");
loop(c2d, init());
// utils
function clamp(v, min, max) {
return Math.min(Math.max(min, v), max);
}
// main loop
function loop(c2d, channel) {
render(c2d, channel);
requestAnimationFrame(_ => loop(c2d, next(channel)));
}
//benchmark
const duration = document.getElementById("duration");
const average = document.getElementById("average");
const ds = new Float64Array([...Array(100)].map(_ => NaN));
function benchmark(begin, end) {
const d = end - begin;
ds.copyWithin(1, 0)[0] = d;
duration.textContent = d;
average.textContent = isNaN(ds[ds.length - 1]) ? "(wait 100 frames)" :
ds.reduce((s, v) => s + v, 0) / ds.length;
}
"use strct";
// Turing Pattern Simulator program
const {
w = 200, s = 2, // w: cell size, s: rect size
ra = 3, ri = 7, //ra/ri: radius as activator/inhibitor
} = Function(`return {${location.hash.slice(1)}}`)();
function force(r) {
const w = r * 2 + 1;
const conv = [...Array(w * w)].map((_, i) => {
const x = i % w - r, y = (i / w | 0) - r;
const f = Math.max((r + 1 / 2) - Math.hypot(x, y), 0) ** 2;
return {x, y, f};
}).filter(({f}) => f > 0);
const total = conv.reduce((s, {f}) => s + f, 0);
return conv.map(({x, y, f}) => ({x, y, f: f / total}));
}
function force2ta(f) {
const buf = new ArrayBuffer(f.length * 16);
const bufF = new Float64Array(buf);
const bufI = new Int32Array(buf);
f.forEach(({x, y, f}, i) => {
bufI[i * 4] = x;
bufI[i * 4 + 1] = y;
bufF[i * 2 + 1] = f;
});
return bufF;
}
const convA = force2ta(force(ra));
const convI = force2ta(force(ri));
// cell automaton
function init() {
const cells = [...Array(w * w)].map((_, i) => i);
return {F: cells.map(i => Math.random())};
}
// visualize
function render(c2d, {F}) {
c2d.clearRect(0, 0, c2d.canvas.width, c2d.canvas.height);
F.forEach((v, i) => {
const x = i % w, y = i / w | 0;
c2d.fillStyle = `hsl(0, 0%, ${v * 100}%)`;
c2d.fillRect(x * s, y * s, s, s);
});
}
const canvas = document.createElement("canvas");
canvas.width = canvas.height = w * s;
document.body.appendChild(canvas);
const c2d = canvas.getContext("2d");
// utils
function clamp(v, min, max) {
return Math.min(Math.max(min, v), max);
}
fetch("./conv.wasm").then(res => res.arrayBuffer()).
then(buf => WebAssembly.instantiate(buf, {})).
then(({instance}) => {
const {conv, heap} = instance.exports;
const size = (convA.length + convI.length + w * w * 3) << 3;
while (heap.buffer.byteLength < size) heap.grow(1);
let offs = 0;
const cA = new Float64Array(heap.buffer, offs, convA.length);
cA.set(convA);
offs += cA.byteLength;
const cI = new Float64Array(heap.buffer, offs, convI.length);
cI.set(convI);
offs += cI.byteLength;
const sF = new Float64Array(heap.buffer, offs, w * w);
offs += sF.byteLength;
const dA = new Float64Array(heap.buffer, offs, w * w);
offs += dA.byteLength;
const dI = new Float64Array(heap.buffer, offs, w * w);
function next({F}) {
sF.set(F);
const begin = performance.now();
conv(cA.byteOffset, cA.length / 2,
sF.byteOffset, sF.length, w, dA.byteOffset);
conv(cI.byteOffset, cI.length / 2,
sF.byteOffset, sF.length, w, dI.byteOffset);
benchmark(begin, performance.now());
return {F: F.map((v, i) => clamp(v + dA[i] - dI[i], 0, 1))};
}
// main loop
function loop(c2d, channel) {
render(c2d, channel);
requestAnimationFrame(_ => loop(c2d, next(channel)));
}
loop(c2d, init());
});
//benchmark
const duration = document.getElementById("duration");
const average = document.getElementById("average");
const ds = new Float64Array([...Array(100)].map(_ => NaN));
function benchmark(begin, end) {
const d = end - begin;
ds.copyWithin(1, 0)[0] = d;
duration.textContent = d;
average.textContent = isNaN(ds[ds.length - 1]) ? "(wait 100 frames)" :
ds.reduce((s, v) => s + v, 0) / ds.length;
}
@bellbind
Copy link
Author

bellbind commented Jul 1, 2017

Refined conv.asm.js as same algorithm of script-es6.js from:


"binaryen" 33 toolset(asm2wasm, wasm-as) used as:

  • asm2wasm conv.asm.js > conv-asm.wast
  • wasm-as conv.wast > conv.wasm

NOTE: The conv.wast is based on the asm2wasm conv.asm.jsoutput.

  • remove all import, and add export memory
  • remove generated $i32u-rem func, and replace its call part with i32.rem_u operation in $get and $conv bodies.
  • remove generated $i32u-div func, and replace its call part with i32.div_u operation in $conv body.

@bellbind
Copy link
Author

bellbind commented Jul 1, 2017

Demo links: benchmark printed on Web Console

Benchmark Results:

  • firefox-54.0.1 and chrome-59.0.3071.115 on macOS 10.12.5: MacBook Pro (13-inch, Late 2016, Two Thunderbolt 3 ports)
firfox-54 chrome-59
ES6 70ms 240ms
wasm 80ms 130ms
asm.js 80ms 210ms

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment