Skip to content

Instantly share code, notes, and snippets.

@ryanwinchester
Last active December 8, 2023 18:44
Show Gist options
  • Save ryanwinchester/2176482097224ae3f32c23d53b0c7828 to your computer and use it in GitHub Desktop.
Save ryanwinchester/2176482097224ae3f32c23d53b0c7828 to your computer and use it in GitHub Desktop.
Benchmark Bandit WebSocket Frame Mask
Mix.install([
{:benchee, "~> 1.2"}
])
# ------------------------------------------------------------------------------
# Original code
# ------------------------------------------------------------------------------
defmodule Original do
# Masking is done @mask_size bits at a time until there is less than that number of bits left.
# We then go 32 bits at a time until there is less than 32 bits left. We then go 8 bits at
# a time. This yields some significant perforamnce gains for only marginally more complexity
@mask_size 512
# Note that masking is an involution, so we don't need a separate unmask function
def mask(payload, mask) when bit_size(payload) >= @mask_size do
payload
|> do_mask(String.duplicate(<<mask::32>>, div(@mask_size, 32)), [])
|> Enum.reverse()
|> IO.iodata_to_binary()
end
def mask(payload, mask) do
payload
|> do_mask(<<mask::32>>, [])
|> Enum.reverse()
|> IO.iodata_to_binary()
end
defp do_mask(
<<h::unquote(@mask_size), rest::binary>>,
<<int_mask::unquote(@mask_size)>> = mask,
acc
) do
do_mask(rest, mask, [<<Bitwise.bxor(h, int_mask)::unquote(@mask_size)>> | acc])
end
defp do_mask(<<h::32, rest::binary>>, <<int_mask::32, _mask_rest::binary>> = mask, acc) do
do_mask(rest, mask, [<<Bitwise.bxor(h, int_mask)::32>> | acc])
end
defp do_mask(<<h::8, rest::binary>>, <<current::8, mask::24, _mask_rest::binary>>, acc) do
do_mask(rest, <<mask::24, current::8>>, [<<Bitwise.bxor(h, current)::8>> | acc])
end
defp do_mask(<<>>, _mask, acc), do: acc
end
# ------------------------------------------------------------------------------
# Proposed changes
# ------------------------------------------------------------------------------
defmodule Proposed do
@spec mask(binary(), pos_integer()) :: binary()
def mask(payload, mask) when is_integer(mask) do
payload_size = byte_size(payload)
count_4_bytes = div(payload_size, 4)
mask_repetitions =
if rem(payload_size, 4) == 0, do: count_4_bytes, else: count_4_bytes + 1
fit_mask =
<<mask::32>>
|> :binary.copy(mask_repetitions)
|> :binary.part(0, payload_size)
:crypto.exor(payload, fit_mask)
end
end
# ------------------------------------------------------------------------------
# Inputs
# ------------------------------------------------------------------------------
input_sm = String.duplicate("a", 500)
input_md = String.duplicate("a", 500_000)
input_lg = String.duplicate("a", 16_000_000)
# ------------------------------------------------------------------------------
# Bail if they're not the same.
# ------------------------------------------------------------------------------
masked_1 = Original.mask(input_sm, 1234)
masked_2 = Proposed.mask(input_sm, 1234)
^masked_1 = masked_2
masked_1 = Original.mask(input_md, 1234)
masked_2 = Proposed.mask(input_md, 1234)
^masked_1 = masked_2
masked_1 = Original.mask(input_lg, 1234)
masked_2 = Proposed.mask(input_lg, 1234)
^masked_1 = masked_2
# ------------------------------------------------------------------------------
# Benchmarks
# ------------------------------------------------------------------------------
# You can test the function, or test the function x1000 times.
# Results are the same ratios.
Benchee.run(
%{
"original" => fn input -> Original.mask(input, 1234) end,
"proposed" => fn input -> Proposed.mask(input, 1234) end,
# "original x 1000" => fn input ->
# for _ <- 1..1000, do: Original.mask(input, 1234)
# end,
# "proposed x 1000" => fn input ->
# for _ <- 1..1000, do: Proposed.mask(input, 1234)
# end
},
inputs: %{
"sm" => input_sm,
"md" => input_md,
"lg" => input_lg
},
time: 5,
memory_time: 2
)
@ryanwinchester
Copy link
Author

ryanwinchester commented Dec 8, 2023

Current run (x1000):

$ elixir bandit_mask.exs

Operating System: macOS
CPU Information: Apple M2 Ultra
Number of Available Cores: 24
Available memory: 128 GB
Elixir 1.15.7
Erlang 26.1.2

Benchmark suite executing with the following configuration:
warmup: 2 s
time: 5 s
memory time: 2 s
reduction time: 0 ns
parallel: 1
inputs: lg, md, sm
Estimated total run time: 1.20 min

Benchmarking original with input lg ...
Benchmarking original with input md ...
Benchmarking original with input sm ...
Benchmarking proposed with input lg ...
Benchmarking proposed with input md ...
Benchmarking proposed with input sm ...

##### With input lg #####
Name               ips        average  deviation         median         99th %
proposed        0.0410       0.41 min     ±0.00%       0.41 min       0.41 min
original        0.0157       1.06 min     ±0.00%       1.06 min       1.06 min

Comparison:
proposed        0.0410
original        0.0157 - 2.62x slower +0.66 min

Memory usage statistics:

Name        Memory usage
proposed         0.24 GB
original        68.66 GB - 289.09x memory usage +68.43 GB

**All measurements for memory usage were the same**

##### With input md #####
Name               ips        average  deviation         median         99th %
proposed          1.36         0.74 s     ±0.26%         0.74 s         0.74 s
original          0.81         1.23 s     ±0.48%         1.23 s         1.24 s

Comparison:
proposed          1.36
original          0.81 - 1.67x slower +0.49 s

Memory usage statistics:

Name        Memory usage
proposed      0.00737 GB
original         2.15 GB - 291.07x memory usage +2.14 GB

**All measurements for memory usage were the same**

##### With input sm #####
Name               ips        average  deviation         median         99th %
proposed        1.15 K        0.87 ms     ±4.07%        0.87 ms        0.97 ms
original        0.49 K        2.05 ms     ±8.73%        2.07 ms        2.42 ms

Comparison:
proposed        1.15 K
original        0.49 K - 2.35x slower +1.18 ms

Memory usage statistics:

Name        Memory usage
proposed        0.130 MB
original         3.36 MB - 25.92x memory usage +3.23 MB

**All measurements for memory usage were the same**

@ryanwinchester
Copy link
Author

ryanwinchester commented Dec 8, 2023

Without the x1000

$ elixir bandit_mask.exs

Operating System: macOS
CPU Information: Apple M2 Ultra
Number of Available Cores: 24
Available memory: 128 GB
Elixir 1.15.7
Erlang 26.1.2

Benchmark suite executing with the following configuration:
warmup: 2 s
time: 5 s
memory time: 2 s
reduction time: 0 ns
parallel: 1
inputs: lg, md, sm
Estimated total run time: 54 s

Benchmarking original with input lg ...
Benchmarking original with input md ...
Benchmarking original with input sm ...
Benchmarking proposed with input lg ...
Benchmarking proposed with input md ...
Benchmarking proposed with input sm ...

##### With input lg #####
Name               ips        average  deviation         median         99th %
proposed         44.04       22.71 ms     ±1.24%       22.67 ms       23.68 ms
original         16.02       62.41 ms     ±6.85%       60.32 ms       71.23 ms

Comparison:
proposed         44.04
original         16.02 - 2.75x slower +39.70 ms

Memory usage statistics:

Name        Memory usage
proposed         0.24 MB
original        70.56 MB - 289.15x memory usage +70.32 MB

**All measurements for memory usage were the same**

##### With input md #####
Name               ips        average  deviation         median         99th %
proposed        1.45 K        0.69 ms     ±4.38%        0.68 ms        0.78 ms
original        0.85 K        1.17 ms     ±8.14%        1.17 ms        1.39 ms

Comparison:
proposed        1.45 K
original        0.85 K - 1.70x slower +0.48 ms

Memory usage statistics:

Name        Memory usage
proposed      0.00754 MB
original         2.21 MB - 292.65x memory usage +2.20 MB

**All measurements for memory usage were the same**

##### With input sm #####
Name               ips        average  deviation         median         99th %
proposed        1.02 M        0.98 μs  ±2104.93%        0.79 μs        1.75 μs
original        0.34 M        2.97 μs  ±2272.26%        1.04 μs        2.58 μs

Comparison:
proposed        1.02 M
original        0.34 M - 3.04x slower +1.99 μs

Memory usage statistics:

Name        Memory usage
proposed        0.117 KB
original         3.43 KB - 29.27x memory usage +3.31 KB

**All measurements for memory usage were the same**

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment