Separate html files to benchmark with Tachometer.
Run a full benchmark across all four sum techniques using:
npx tachometer 1_simple.html 2_chunks.html 3_simple_wasm.html 4_wasm_simd.html
| <!DOCTYPE html> | |
| <head> | |
| <meta charset="UTF-8"> | |
| <title>1. Simple Sum</title> | |
| <script type="module"> | |
| import * as bench from '/bench.js'; | |
| function sum(nums) { | |
| let result = 0; | |
| for (let val of nums) { | |
| result += val; | |
| } | |
| return result; | |
| } | |
| const randomNumbers = Array.from( | |
| { length: 1_000_000 }, | |
| () => Math.floor(Math.random() * 100)); | |
| bench.start(); | |
| console.log(sum(randomNumbers)); | |
| bench.stop(); | |
| </script> |
| <!DOCTYPE html> | |
| <head> | |
| <meta charset="UTF-8"> | |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
| <title>2. Sum using two accumulators</title> | |
| <script type="module"> | |
| import * as bench from '/bench.js'; | |
| function sum(nums) { | |
| let a = 0, b = 0, c = 0, d = 0; | |
| for (let i = 0; i < nums.length; i += 4) { | |
| a += nums[i]; | |
| b += nums[i + 1] ?? 0; | |
| c += nums[i + 2] ?? 0; | |
| d += nums[i + 3] ?? 0; | |
| } | |
| return a + b + c + d; | |
| } | |
| const randomNumbers = Array.from( | |
| { length: 1_000_000 }, | |
| () => Math.floor(Math.random() * 100)); | |
| bench.start(); | |
| console.log(sum(randomNumbers)); | |
| bench.stop(); | |
| </script> |
| <!DOCTYPE html> | |
| <head> | |
| <meta charset="UTF-8"> | |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
| <title>3. Simple WASM sum</title> | |
| The WAT (which was compiled by https://webassembly.github.io/wabt/demo/wat2wasm/): | |
| <code> | |
| <pre> | |
| (module | |
| ;; Import memory from JavaScript (we'll pass the array through shared memory) | |
| (import "js" "mem" (memory 1)) | |
| ;; Export the sum function | |
| (export "sum" (func $sum)) | |
| ;; Sum function that takes array pointer and length | |
| ;; Parameters: i32 ptr (byte offset in memory), i32 len (number of elements) | |
| ;; Returns: i32 (sum result) | |
| (func $sum (param $ptr i32) (param $len i32) (result i32) | |
| (local $result i32) | |
| (local $i i32) | |
| (local $current_val i32) | |
| ;; Initialize result to 0 | |
| (local.set $result (i32.const 0)) | |
| ;; Initialize counter to 0 | |
| (local.set $i (i32.const 0)) | |
| ;; Loop through array | |
| (block $exit | |
| (loop $continue | |
| ;; Check if we've processed all elements | |
| (br_if $exit (i32.ge_u (local.get $i) (local.get $len))) | |
| ;; Load current value from memory | |
| ;; Each i32 is 4 bytes, so multiply index by 4 and add to base pointer | |
| (local.set $current_val | |
| (i32.load | |
| (i32.add | |
| (local.get $ptr) | |
| (i32.mul (local.get $i) (i32.const 4))))) | |
| ;; Add current value to result | |
| (local.set $result | |
| (i32.add (local.get $result) (local.get $current_val))) | |
| ;; Increment counter | |
| (local.set $i (i32.add (local.get $i) (i32.const 1))) | |
| ;; Continue loop | |
| (br $continue) | |
| ) | |
| ) | |
| ;; Return the result | |
| (local.get $result) | |
| ) | |
| ) | |
| </pre> | |
| </code> | |
| <script type="module"> | |
| import * as bench from '/bench.js'; | |
| // Need ~4MB for 1M integers) | |
| const memory = new WebAssembly.Memory({ initial: 64 }); | |
| async function loadWasm() { | |
| const wasmBase64 = 'AGFzbQEAAAABBwFgAn9/AX8CCwECanMDbWVtAgABAwIBAAcHAQNzdW0AAAo6ATgBA39BACECQQAhAwJAA0AgAyABTw0BIAAgA0EEbGooAgAhBCACIARqIQIgA0EBaiEDDAALCyACCwA0BG5hbWUBBgEAA3N1bQIlAQAFAANwdHIBA2xlbgIGcmVzdWx0AwFpBAtjdXJyZW50X3ZhbA=='; | |
| const wasmBytes = Uint8Array.from(atob(wasmBase64), c => c.charCodeAt(0)); | |
| const wasmModule = await WebAssembly.instantiate( | |
| wasmBytes, | |
| { | |
| js: { | |
| mem: memory | |
| } | |
| } | |
| ); | |
| return wasmModule.instance.exports; | |
| } | |
| function copyArrayToMemory(arr, memory) { | |
| const buffer = new Int32Array(memory.buffer); | |
| for (let i = 0; i < arr.length; i++) { | |
| buffer[i] = arr[i]; | |
| } | |
| return 0; | |
| } | |
| const randomNumbers = Array.from( | |
| { length: 1_000_000 }, | |
| () => Math.floor(Math.random() * 100)); | |
| const wasmExports = await loadWasm(); | |
| const arrayPtr = copyArrayToMemory(randomNumbers, memory); | |
| bench.start(); | |
| console.log(wasmExports.sum(arrayPtr, randomNumbers.length)); | |
| bench.stop(); | |
| </script> |
| <!DOCTYPE html> | |
| <head> | |
| <meta charset="UTF-8"> | |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
| <title>4. WASM Sum using SIMD</title> | |
| The WAT (which was compiled by https://webassembly.github.io/wabt/demo/wat2wasm/): | |
| <code> | |
| <pre> | |
| (module | |
| ;; Import memory from JavaScript (we'll pass the array through shared memory) | |
| (import "js" "mem" (memory 1)) | |
| ;; Export the sum function | |
| (export "sum" (func $sum)) | |
| ;; Sum function that takes array pointer and length using SIMD | |
| ;; Parameters: i32 ptr (byte offset in memory), i32 len (number of elements) | |
| ;; Returns: i32 (sum result) | |
| (func $sum (param $ptr i32) (param $len i32) (result i32) | |
| (local $result i32) | |
| (local $i i32) | |
| (local $simd_elements i32) | |
| (local $remaining i32) | |
| (local $sum_vec v128) | |
| (local $current_vec v128) | |
| (local $final_sum v128) | |
| ;; Initialize result to 0 | |
| (local.set $result (i32.const 0)) | |
| ;; Initialize counter to 0 | |
| (local.set $i (i32.const 0)) | |
| ;; Initialize SIMD sum vector to zeros | |
| (local.set $sum_vec (v128.const i32x4 0 0 0 0)) | |
| ;; Calculate how many SIMD operations we can do (4 i32s per v128) | |
| (local.set $simd_elements (i32.div_u (local.get $len) (i32.const 4))) | |
| ;; Calculate remaining elements after SIMD processing | |
| (local.set $remaining (i32.rem_u (local.get $len) (i32.const 4))) | |
| ;; SIMD loop - process 4 elements at a time | |
| (block $simd_exit | |
| (loop $simd_continue | |
| ;; Check if we've processed all SIMD chunks | |
| (br_if $simd_exit (i32.ge_u (local.get $i) (local.get $simd_elements))) | |
| ;; Load 4 i32 values as a v128 vector | |
| (local.set $current_vec | |
| (v128.load | |
| (i32.add | |
| (local.get $ptr) | |
| (i32.mul (local.get $i) (i32.const 16))))) ;; 4 * 4 bytes = 16 bytes | |
| ;; Add current vector to sum vector | |
| (local.set $sum_vec | |
| (i32x4.add (local.get $sum_vec) (local.get $current_vec))) | |
| ;; Increment counter | |
| (local.set $i (i32.add (local.get $i) (i32.const 1))) | |
| ;; Continue SIMD loop | |
| (br $simd_continue) | |
| ) | |
| ) | |
| ;; Extract the 4 lanes from the SIMD vector and sum them | |
| (local.set $result | |
| (i32.add | |
| (i32.add | |
| (i32x4.extract_lane 0 (local.get $sum_vec)) | |
| (i32x4.extract_lane 1 (local.get $sum_vec))) | |
| (i32.add | |
| (i32x4.extract_lane 2 (local.get $sum_vec)) | |
| (i32x4.extract_lane 3 (local.get $sum_vec))))) | |
| ;; Process remaining elements (if any) with scalar operations | |
| (local.set $i (i32.mul (local.get $simd_elements) (i32.const 4))) | |
| (block $scalar_exit | |
| (loop $scalar_continue | |
| ;; Check if we've processed all remaining elements | |
| (br_if $scalar_exit (i32.ge_u (local.get $i) (local.get $len))) | |
| ;; Add current element to result | |
| (local.set $result | |
| (i32.add | |
| (local.get $result) | |
| (i32.load | |
| (i32.add | |
| (local.get $ptr) | |
| (i32.mul (local.get $i) (i32.const 4)))))) | |
| ;; Increment counter | |
| (local.set $i (i32.add (local.get $i) (i32.const 1))) | |
| ;; Continue scalar loop | |
| (br $scalar_continue) | |
| ) | |
| ) | |
| ;; Return the result | |
| (local.get $result) | |
| ) | |
| ) | |
| </pre> | |
| </code> | |
| <script type="module"> | |
| import * as bench from '/bench.js'; | |
| // Need ~4MB for 1M integers) | |
| const memory = new WebAssembly.Memory({ initial: 64 }); | |
| async function loadWasm() { | |
| const wasmBase64 = 'AGFzbQEAAAABBwFgAn9/AX8CCwECanMDbWVtAgABAwIBAAcHAQNzdW0AAAqoAQGlAQIEfwN7QQAhAkEAIQP9DAAAAAAAAAAAAAAAAAAAAAAhBiABQQRuIQQgAUEEcCEFAkADQCADIARPDQEgACADQRBsav0ABAAhByAGIAf9rgEhBiADQQFqIQMMAAsLIAb9GwAgBv0bAWogBv0bAiAG/RsDamohAiAEQQRsIQMCQANAIAMgAU8NASACIAAgA0EEbGooAgBqIQIgA0EBaiEDDAALCyACCwBiBG5hbWUBBgEAA3N1bQJTAQAJAANwdHIBA2xlbgIGcmVzdWx0AwFpBA1zaW1kX2VsZW1lbnRzBQlyZW1haW5pbmcGB3N1bV92ZWMHC2N1cnJlbnRfdmVjCAlmaW5hbF9zdW0='; | |
| const wasmBytes = Uint8Array.from(atob(wasmBase64), c => c.charCodeAt(0)); | |
| const wasmModule = await WebAssembly.instantiate( | |
| wasmBytes, | |
| { | |
| js: { | |
| mem: memory | |
| } | |
| } | |
| ); | |
| return wasmModule.instance.exports; | |
| } | |
| function copyArrayToMemory(arr, memory) { | |
| const buffer = new Int32Array(memory.buffer); | |
| for (let i = 0; i < arr.length; i++) { | |
| buffer[i] = arr[i]; | |
| } | |
| return 0; | |
| } | |
| const randomNumbers = Array.from( | |
| { length: 1_000_000 }, | |
| () => Math.floor(Math.random() * 100)); | |
| const wasmExports = await loadWasm(); | |
| const arrayPtr = copyArrayToMemory(randomNumbers, memory); | |
| bench.start(); | |
| console.log(wasmExports.sum(arrayPtr, randomNumbers.length)); | |
| bench.stop(); | |
| </script> |
Separate html files to benchmark with Tachometer.
Run a full benchmark across all four sum techniques using:
npx tachometer 1_simple.html 2_chunks.html 3_simple_wasm.html 4_wasm_simd.html