Last active
December 2, 2023 01:34
-
-
Save RoyalIcing/bc37ac2bbae15ab77e20a9ea6dd96860 to your computer and use it in GitHub Desktop.
Lemire’s Parsing 8-bit integers quickly in WebAssembly via Orb. https://lemire.me/blog/2023/11/28/parsing-8-bit-integers-quickly/
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
defmodule ParseU8 do | |
@moduledoc """ | |
https://lemire.me/blog/2023/11/28/parsing-8-bit-integers-quickly/ | |
""" | |
# Orb lets you write WebAssembly with friendly Elixir syntax: https://github.com/RoyalIcing/Orb | |
use Orb | |
Memory.pages(1) | |
wasm_mode(U32) | |
defw parse_uint8_naive(str: I32.String, len: I32), {I32, I32}, | |
i: I32, | |
r: I32, | |
d: I32, | |
n: I32 do | |
r = len &&& 0x3 | |
loop EachChar do | |
d = str[at!: i] - ?0 | |
if d > 9 do | |
0 | |
0 | |
return() | |
end | |
n = n * 10 + d | |
i = i + 1 | |
EachChar.continue(if: i < r) | |
end | |
n < 256 &&& len !== 0 &&& len < 4 | |
n | |
end | |
defw parse_uint8_fastswar(str: I32.String, len: I32), {I32, I32}, digits: I32, n: I32 do | |
if len === 0 or len > 3 do | |
0 | |
0 | |
return() | |
end | |
digits = | |
Memory.load!(I32, str) | |
# Loads as little-endian | |
|> I32.xor(0x30303030) | |
|> I32.shl((4 - len) * 8) | |
n = | |
digits | |
|> I32.mul(0x640A01) | |
|> I32.shr_u(24) | |
|> I32.band(0x000000FF) | |
# Check are valid digits | |
I32.eqz((digits ||| 0x06060606 + digits) &&& 0xF0F0F0F0) | |
# Check is <= 255 (after converting to big-endian) | |
|> I32.band( | |
I32.rotl(digits |> I32.band(0xFF00FF00), 8) | |
|> I32.or(I32.rotr(digits |> I32.band(0x00FF00FF), 8)) <= 0x020505 | |
) | |
n | |
end | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(module $ParseU8 | |
(memory (export "memory") 1) | |
(func $parse_uint8_naive (export "parse_uint8_naive") (param $str i32) (param $len i32) (result i32 i32) | |
(local $i i32) | |
(local $r i32) | |
(local $d i32) | |
(local $n i32) | |
(i32.and (local.get $len) (i32.const 3)) | |
(local.set $r) | |
(loop $EachChar | |
(i32.sub (i32.load8_u (i32.add (local.get $str) (local.get $i))) (i32.const 48)) | |
(local.set $d) | |
(i32.gt_u (local.get $d) (i32.const 9)) | |
(if | |
(then | |
(i32.const 0) | |
(i32.const 0) | |
return | |
) | |
) | |
(i32.add (i32.mul (local.get $n) (i32.const 10)) (local.get $d)) | |
(local.set $n) | |
(i32.add (local.get $i) (i32.const 1)) | |
(local.set $i) | |
(i32.lt_u (local.get $i) (local.get $r)) | |
(br_if $EachChar) | |
) | |
(i32.and (i32.and (i32.lt_u (local.get $n) (i32.const 256)) (i32.ne (local.get $len) (i32.const 0))) (i32.lt_u (local.get $len) (i32.const 4))) | |
(local.get $n) | |
) | |
(func $parse_uint8_fastswar (export "parse_uint8_fastswar") (param $str i32) (param $len i32) (result i32 i32) | |
(local $digits i32) | |
(local $n i32) | |
(i32.or (i32.eqz (local.get $len)) (i32.gt_u (local.get $len) (i32.const 3))) | |
(if | |
(then | |
(i32.const 0) | |
(i32.const 0) | |
return | |
) | |
) | |
(i32.shl (i32.xor (i32.load (local.get $str)) (i32.const 808464432)) (i32.mul (i32.sub (i32.const 4) (local.get $len)) (i32.const 8))) | |
(local.set $digits) | |
(i32.and (i32.shr_u (i32.mul (local.get $digits) (i32.const 6556161)) (i32.const 24)) (i32.const 255)) | |
(local.set $n) | |
(i32.and (i32.eqz (i32.and (i32.or (local.get $digits) (i32.add (i32.const 101058054) (local.get $digits))) (i32.const 4042322160))) (i32.le_u (i32.or (i32.rotl (i32.and (local.get $digits) (i32.const 4278255360)) (i32.const 8)) (i32.rotr (i32.and (local.get $digits) (i32.const 16711935)) (i32.const 8))) (i32.const 132357))) | |
(local.get $n) | |
) | |
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
defmodule ParseU8Test do | |
use ExUnit.Case, async: true | |
alias OrbWasmtime.Instance | |
def naive(str) when is_binary(str) do | |
i = Instance.run(ParseU8) | |
Instance.write_memory(i, 0x100, [0, 0, 0, 0]) | |
Instance.write_memory(i, 0x100, str |> :binary.bin_to_list()) | |
Instance.call(i, :parse_uint8_naive, 0x100, byte_size(str)) | |
end | |
def naive(strings) when is_list(strings) do | |
multi(strings, :parse_uint8_naive) | |
end | |
def fastswar(str) when is_binary(str) do | |
i = Instance.run(ParseU8) | |
Instance.write_memory(i, 0x100, [0, 0, 0, 0]) | |
Instance.write_memory(i, 0x100, str |> :binary.bin_to_list()) | |
Instance.call(i, :parse_uint8_fastswar, 0x100, byte_size(str)) | |
end | |
def fastswar(strings) do | |
multi(strings, :parse_uint8_fastswar) | |
end | |
defp multi(strings, func) | |
when func in ~w(parse_uint8_naive parse_uint8_fastswar)a do | |
i = Instance.run(ParseU8) | |
Stream.map(strings, fn str -> | |
Instance.write_memory(i, 0x100, [0, 0, 0, 0]) | |
Instance.write_memory(i, 0x100, str |> :binary.bin_to_list()) | |
# Instance.write_string_nul_terminated(i, 0x100, str) | |
{str, Instance.call(i, func, 0x100, byte_size(str))} | |
end) | |
end | |
test "naive" do | |
IO.puts(ParseU8.to_wat()) | |
{1, 0} = naive("0") | |
{1, 1} = naive("1") | |
{1, 7} = naive("7") | |
{1, 9} = naive("9") | |
{1, 12} = naive("12") | |
{1, 88} = naive("88") | |
{1, 99} = naive("99") | |
{1, 120} = naive("120") | |
{1, 255} = naive("255") | |
{0, 256} = naive("256") | |
for result <- naive(for i <- 0..255, do: "#{i}") do | |
assert {str, {1, i}} = result | |
assert {i, ""} = Integer.parse(str) | |
end | |
for result <- naive(for i <- 256..9999, do: "#{i}") do | |
assert {_, {0, _}} = result | |
end | |
end | |
defp fuzz_chars() do | |
0..0xFFFFFF | |
|> Stream.map(fn b -> <<b::integer-size(32)>> end) | |
end | |
test "fastswar" do | |
{1, 0} = fastswar("0") | |
{1, 1} = fastswar("1") | |
{1, 7} = fastswar("7") | |
{1, 9} = fastswar("9") | |
{1, 99} = fastswar("99") | |
{1, 120} = fastswar("120") | |
{1, 255} = fastswar("255") | |
{0, _} = fastswar("256") | |
{0, _} = fastswar("257") | |
{0, _} = fastswar("258") | |
{0, _} = fastswar("abc") | |
{0, _} = fastswar("\0") | |
{0, _} = fastswar("\0\0\0\0") | |
for result <- fastswar(for i <- 0..255, do: "#{i}") do | |
assert {str, {1, i}} = result | |
assert {i, ""} = Integer.parse(str) | |
end | |
for result <- fastswar(for i <- 256..9999, do: "#{i}") do | |
assert {_, {0, _}} = result | |
end | |
fuzz_chars() | |
|> Stream.take_every(13..17 |> Enum.random()) | |
|> Stream.chunk_every(1024) | |
|> Stream.each(fn batch -> | |
fastswar(batch) |> Enum.each(fn result -> | |
assert {str, result} = result | |
case Integer.parse(str) do | |
{i, ""} -> | |
assert {1, i} = result | |
:error -> | |
assert {0, _} = result | |
end | |
end) | |
end) | |
|> Stream.run() | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment