Skip to content

Instantly share code, notes, and snippets.

@dy
Created February 3, 2025 03:58
Show Gist options
  • Save dy/367a2c40fb166f828869ecb4766e5eea to your computer and use it in GitHub Desktop.
Save dy/367a2c40fb166f828869ecb4766e5eea to your computer and use it in GitHub Desktop.
WASM vs JS – rgb to xyz transform
// test some performance parts
// wasm is surprizingly not as fast, even in full version
import t from 'tst'
import watr from 'watr'
const f64pow = `(func $f64pow (param f64 f64)(result f64)(local f64 i64 i64 i64 f64 f64 f64 f64 f64 f64)(local.set 2(f64.const 0x1p+0))(block(br_if 0(f64.eq(local.get 1)(f64.const 0x0p+0)))(local.set 3(i64.const 0))(block(br_if 0(i64.gt_s(i64.reinterpret_f64(local.get 0))(i64.const -1)))(br_if 0(f64.ne(f64.nearest(local.get 1))(local.get 1)))(local.set 3(i64.shl(i64.extend_i32_u(f64.ne(f64.nearest(local.tee 2(f64.mul(local.get 1)(f64.const 0x1p-1))))(local.get 2)))(i64.const 63)))(local.set 0(f64.neg(local.get 0))))(local.set 2(f64.const 0x1p+0))(block(br_if 0(f64.eq(local.get 0)(f64.const 0x1p+0)))(block(br_if 0(f64.ne(local.get 0)(f64.const 0x0p+0)))(local.set 2(select(f64.const inf)(f64.const 0x0p+0)(i64.lt_s(i64.reinterpret_f64(local.get 1))(i64.const 0))))(br 1))(block(br_if 0(f64.ne(f64.abs(local.get 0))(f64.const inf)))(local.set 2(select(f64.const 0x0p+0)(f64.const inf)(i64.lt_s(i64.reinterpret_f64(local.get 1))(i64.const 0))))(br 1))(block(br_if 0(i64.ge_s(local.tee 4(i64.reinterpret_f64(local.get 0)))(i64.const 0)))(local.set 2(f64.const nan))(br 1))(block(br_if 0(f64.ne(f64.abs(local.get 1))(f64.const inf)))(local.set 2(select(f64.const inf)(f64.const 0x0p+0)(i32.eq(i32.wrap_i64(i64.shr_u(i64.reinterpret_f64(local.get 1))(i64.const 63)))(f64.lt(local.get 0)(f64.const 0x1p+0)))))(br 1))(block(br_if 0(i64.gt_u(local.get 4)(i64.const 4503599627370495)))(local.set 4(i64.sub(i64.shl(local.get 4)(local.tee 5(i64.add(i64.clz(local.get 4))(i64.const -11))))(i64.shl(local.get 5)(i64.const 52)))))(local.set 2(f64.const inf))(br_if 0(f64.gt(local.tee 1(f64.add(local.tee 10(f64.mul(local.tee 6(f64.reinterpret_i64(i64.and(i64.reinterpret_f64(local.get 1))(i64.const -4294967296))))(local.tee 0(f64.reinterpret_i64(i64.and(i64.reinterpret_f64(f64.add(f64.add(local.tee 7(f64.mul(local.tee 0(f64.reinterpret_i64(i64.and(i64.reinterpret_f64(f64.add(local.tee 11(f64.mul(local.tee 0(f64.reinterpret_i64(i64.and(i64.reinterpret_f64(local.tee 9(f64.div(local.tee 7(f64.add(f64.reinterpret_i64(i64.sub(local.get 4)(i64.and(local.tee 5(i64.add(local.get 4)(i64.const -4604544271217802189)))(i64.const -4503599627370496))))(f64.const -0x1p+0)))(local.tee 8(f64.add(local.get 7)(f64.const 0x1p+1))))))(i64.const -134217728))))(local.tee 0(f64.reinterpret_i64(i64.and(i64.reinterpret_f64(f64.add(f64.add(local.tee 10(f64.mul(local.get 0)(local.get 0)))(local.tee 8(f64.add(f64.mul(local.tee 7(f64.div(f64.sub(f64.sub(local.get 7)(f64.mul(local.get 0)(local.tee 11(f64.reinterpret_i64(i64.and(i64.reinterpret_f64(local.get 8))(i64.const -4294967296))))))(f64.mul(local.get 0)(f64.add(local.get 7)(f64.sub(f64.const 0x1p+1)(local.get 11)))))(local.get 8)))(f64.add(local.get 9)(local.get 0)))(f64.mul(f64.mul(local.tee 0(f64.mul(local.get 9)(local.get 9)))(local.get 0))(f64.add(f64.mul(f64.add(f64.mul(f64.add(f64.mul(f64.add(f64.mul(f64.add(f64.mul(f64.add(f64.mul(local.get 0)(f64.const 0x1.91a4911cbce5ap-3))(f64.const 0x1.97a897f8e6cap-3))(local.get 0))(f64.const 0x1.d8a9d6a7940bp-3))(local.get 0))(f64.const 0x1.1745bc213e72fp-2))(local.get 0))(f64.const 0x1.5555557cccac1p-2))(local.get 0))(f64.const 0x1.b6db6db6b8d5fp-2))(local.get 0))(f64.const 0x1.3333333333385p-1))))))(f64.const 0x1.8p+1)))(i64.const -67108864))))))(local.tee 9(f64.add(f64.mul(local.get 7)(local.get 0))(f64.mul(local.get 9)(f64.add(local.get 8)(f64.add(local.get 10)(f64.sub(f64.const 0x1.8p+1)(local.get 0)))))))))(i64.const -4294967296))))(f64.const 0x1.ec709dc4p-1)))(local.tee 9(f64.add(f64.mul(local.get 0)(f64.const -0x1.7f00a2d80faabp-35))(f64.mul(f64.add(local.get 9)(f64.sub(local.get 11)(local.get 0)))(f64.const 0x1.ec709dc3a03fdp-1)))))(local.tee 8(f64.convert_i64_s(i64.shr_s(local.get 5)(i64.const 52))))))(i64.const -2097152))))))(local.tee 0(f64.add(f64.mul(f64.sub(local.get 1)(local.get 6))(local.get 0))(f64.mul(f64.add(local.get 9)(f64.add(local.get 7)(f64.sub(local.get 8)(local.get 0))))(local.get 1))))))(f64.const 0x1p+10)))(local.set 9(f64.sub(local.get 1)(local.get 10)))(block(br_if 0(f64.ne(local.get 1)(f64.const 0x1p+10)))(br_if 1(f64.lt(local.get 9)(local.get 0))))(local.set 2(f64.const 0x0p+0))(br_if 0(f64.lt(local.get 1)(f64.const -0x1.0ccp+10)))(block(br_if 0(f64.ne(local.get 1)(f64.const -0x1.0ccp+10)))(br_if 1(f64.gt(local.get 9)(local.get 0))))(local.set 4(i64.reinterpret_f64(f64.add(f64.add(local.tee 8(f64.mul(local.tee 7(f64.reinterpret_i64(i64.and(i64.reinterpret_f64(local.tee 2(f64.sub(local.get 1)(local.tee 9(f64.nearest(local.get 1))))))(i64.const -4294967296))))(f64.const 0x1.62e42ffp-1)))(f64.add(local.tee 2(f64.add(f64.mul(local.get 2)(f64.const -0x1.718432a1b0e26p-35))(f64.mul(f64.add(local.get 0)(f64.sub(local.get 10)(f64.add(local.get 9)(local.get 7))))(f64.const 0x1.62e42ffp-1))))(f64.div(f64.mul(local.tee 0(f64.add(local.get 8)(local.get 2)))(local.tee 2(f64.sub(local.get 0)(f64.mul(local.tee 2(f64.mul(local.get 0)(local.get 0)))(f64.add(f64.mul(local.get 2)(f64.add(f64.mul(local.get 2)(f64.add(f64.mul(local.get 2)(f64.add(f64.mul(local.get 2)(f64.const 0x1.63f2a09c94b4cp-25))(f64.const -0x1.bbd53273e8fb7p-20)))(f64.const 0x1.1566ab5c2ba0dp-14)))(f64.const -0x1.6c16c16c0ac3cp-9)))(f64.const 0x1.5555555555553p-3))))))(f64.sub(f64.const 0x1p+1)(local.get 2)))))(f64.const 0x1p+0))))(block(block(br_if 0(i32.eqz(f64.lt(f64.abs(local.get 9))(f64.const 0x1p+63))))(local.set 5(i64.trunc_f64_s(local.get 9)))(br 1))(local.set 5(i64.const -9223372036854775808)))(local.set 2(select(f64.mul(f64.reinterpret_i64(i64.add(local.tee 4(i64.add(i64.shl(local.get 5)(i64.const 52))(local.get 4)))(i64.const 4593671619917905920)))(f64.const 0x1p-1020))(f64.reinterpret_i64(local.get 4))(f64.lt(local.get 1)(f64.const -0x1.fep+9)))))(local.set 2(f64.reinterpret_i64(i64.or(local.get 3)(i64.reinterpret_f64(local.get 2))))))(local.get 2))`
const xyz2lrgb3 = (x, y, z) => [
(x * 3.240969941904521) + (y * -1.537383177570093) + (z * -0.498610760293),
(x * -0.96924363628087) + (y * 1.87596750150772) + (z * 0.041555057407175),
(x * 0.055630079696993) + (y * -0.20397695888897) + (z * 1.056971514242878)
]
const lrgb2rgb3 = (r, g, b) => [
r > 0.0031308 ? 1.055 * r ** (1/2.4) - 0.055 : r * 12.92,
g > 0.0031308 ? 1.055 * g ** (1/2.4) - 0.055 : g * 12.92,
b > 0.0031308 ? 1.055 * b ** (1/2.4) - 0.055 : b * 12.92,
]
const xyz2lrgb = ([x, y, z]) => [
(x * 3.240969941904521) + (y * -1.537383177570093) + (z * -0.498610760293),
(x * -0.96924363628087) + (y * 1.87596750150772) + (z * 0.041555057407175),
(x * 0.055630079696993) + (y * -0.20397695888897) + (z * 1.056971514242878)
]
const lrgb2rgb = ([r, g, b]) => [
r > 0.0031308 ? 1.055 * r ** (1/2.4) - 0.055 : r * 12.92,
g > 0.0031308 ? 1.055 * g ** (1/2.4) - 0.055 : g * 12.92,
b > 0.0031308 ? 1.055 * b ** (1/2.4) - 0.055 : b * 12.92,
]
// wasm code
const buf = watr(`
(module
;; Import Math.pow function
;; (import "js" "f64pow" (func $f64pow (param f64 f64) (result f64)))
${f64pow}
(func $xyz2lrgb3 (export "xyz2lrgb3wasm") (param $x f64) (param $y f64) (param $z f64)
(result f64 f64 f64)
;; Calculate first component
(f64.add
(f64.add
(f64.mul (local.get $x) (f64.const 3.240969941904521))
(f64.mul (local.get $y) (f64.const -1.537383177570093))
)
(f64.mul (local.get $z) (f64.const -0.498610760293))
)
;; Calculate second component
(f64.add
(f64.add
(f64.mul (local.get $x) (f64.const -0.96924363628087))
(f64.mul (local.get $y) (f64.const 1.87596750150772))
)
(f64.mul (local.get $z) (f64.const 0.041555057407175))
)
;; Calculate third component
(f64.add
(f64.add
(f64.mul (local.get $x) (f64.const 0.055630079696993))
(f64.mul (local.get $y) (f64.const -0.20397695888897))
)
(f64.mul (local.get $z) (f64.const 1.056971514242878))
)
)
(func $lrgb2rgb3 (export "lrgb2rgb3wasm") (param $r f64) (param $g f64) (param $b f64)
(result f64 f64 f64)
;; Process R component
(local.get $r)
(f64.const 0.0031308)
(f64.gt)
(if (result f64)
(then
(f64.sub
(f64.mul
(f64.const 1.055)
(call $f64pow
(local.get $r)
(f64.const 0.4166666666666667) ;; 1/2.4
)
)
(f64.const 0.055)
)
)
(else
(f64.mul (local.get $r) (f64.const 12.92))
)
)
;; Process G component
(local.get $g)
(f64.const 0.0031308)
(f64.gt)
(if (result f64)
(then
(f64.sub
(f64.mul
(f64.const 1.055)
(call $f64pow
(local.get $g)
(f64.const 0.4166666666666667)
)
)
(f64.const 0.055)
)
)
(else
(f64.mul (local.get $g) (f64.const 12.92))
)
)
;; Process B component
(local.get $b)
(f64.const 0.0031308)
(f64.gt)
(if (result f64)
(then
(f64.sub
(f64.mul
(f64.const 1.055)
(call $f64pow
(local.get $b)
(f64.const 0.4166666666666667)
)
)
(f64.const 0.055)
)
)
(else
(f64.mul (local.get $b) (f64.const 12.92))
)
)
)
;; joined conversion
(func $xyz2rgb3 (export "xyz2rgb3wasm") (param f64 f64 f64) (result f64 f64 f64) (call $xyz2lrgb3 (local.get 0) (local.get 1) (local.get 2)) (call $lrgb2rgb3 ))
;; Define a function to simulate the loop
(func $fullWasm (export "fullWasm") (param $N f64)
(local $i f64)
;; Initialize i to 0
(local.set $i (f64.const 0))
;; Start the loop
(loop $loop
;; Check if i < N
(if (f64.lt (local.get $i) (local.get $N))
(then
;; Call xyz2rgb3wasm with i/N, i/N, 1 - i/N
(call $xyz2rgb3
(f64.div (local.get $i) (local.get $N))
(f64.div (local.get $i) (local.get $N))
(f64.sub (f64.const 1) (f64.div (local.get $i) (local.get $N)))
)
;; Increment i
(local.set $i (f64.add (local.get $i) (f64.const 1)))
;; Continue the loop
(br $loop)
)
)
)
)
)
`)
const mod = new WebAssembly.Module(buf)
const {exports: {xyz2lrgb3wasm, lrgb2rgb3wasm, xyz2rgb3wasm, fullWasm}} = new WebAssembly.Instance(mod, {js: {f64pow: Math.pow}})
const N = 1e6
console.log(lrgb2rgb3(...xyz2lrgb3(100/N, 100/N, 1 - 100/N)), lrgb2rgb3wasm(...xyz2lrgb3wasm(100/N, 100/N, 1 - 100/N)), xyz2rgb3wasm(100/N, 100/N, 1 - 100/N))
console.time('tuple')
for (let i = 0; i < N; i++) lrgb2rgb(xyz2lrgb([i/N, i/N, 1 - i/N]))
console.timeEnd('tuple')
console.time('args')
for (let i = 0; i < N; i++) lrgb2rgb3(...xyz2lrgb3(i/N, i/N, 1 - i/N))
console.timeEnd('args')
console.time('args2')
for (let i = 0, abc; i < N; i++) abc = xyz2lrgb3(i/N, i/N, 1 - i/N), lrgb2rgb3(abc[0], abc[1], abc[2])
console.timeEnd('args2')
console.time('args3')
for (let i = 0, a, b, c; i < N; i++) [a, b, c] = xyz2lrgb3(i/N, i/N, 1 - i/N), lrgb2rgb3(a, b, c)
console.timeEnd('args3')
console.time('args wasm')
for (let i = 0, abc; i < N; i++) abc = xyz2lrgb3wasm(i/N, i/N, 1 - i/N), lrgb2rgb3wasm(abc[0], abc[1], abc[2])
console.timeEnd('args wasm')
console.time('args half wasm')
for (let i = 0, abc; i < N; i++) abc = xyz2lrgb3wasm(i/N, i/N, 1 - i/N), lrgb2rgb3(abc[0], abc[1], abc[2])
console.timeEnd('args half wasm')
console.time('args joined wasm')
for (let i = 0; i < N; i++) xyz2rgb3wasm(i/N, i/N, 1 - i/N)
console.timeEnd('args joined wasm')
console.time('args full wasm')
fullWasm(N)
console.timeEnd('args full wasm')
console.time('tuple')
for (let i = 0; i < N; i++) lrgb2rgb(xyz2lrgb([i/N, i/N, 1 - i/N]))
console.timeEnd('tuple')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment