-
-
Save zmstone/1d8b71c2eec882dfa20b87269b3f1580 to your computer and use it in GitHub Desktop.
-module(bin_element). | |
-export([run/1]). | |
-define(CHUNK_SIZE, 1049). | |
run(Chunks) -> | |
BinL = [crypto:strong_rand_bytes(?CHUNK_SIZE) || _ <- lists:seq(1, Chunks)], | |
TotalBytes = ?CHUNK_SIZE * Chunks, | |
compare([hd(BinL) | BinL], TotalBytes). | |
compare(BinL, Bytes) -> | |
{T1, _} = timer:tc(fun() -> match(BinL, Bytes, <<>>) end), | |
{T2, _} = timer:tc(fun() -> sizer(BinL, Bytes, <<>>) end), | |
io:format(user, "match: ~p\n" | |
"sizer: ~p\n", [T1, T2]). | |
match([Bin | Rest], N, Acc) -> | |
case Acc of | |
<<Head:N/binary, _/binary>> -> | |
Head; | |
_ -> | |
match(Rest, N, <<Acc/binary, Bin/binary>>) | |
end. | |
sizer([Bin | Rest], N, Acc) -> | |
case size(Acc) >= N of | |
true -> | |
<<Head:N/binary, _/binary>> = Acc, | |
Head; | |
_ -> | |
sizer(Rest, N, <<Acc/binary, Bin/binary>>) | |
end. |
The Erlang VM has an optimization that, in some circumstances, makes repeated binary concatenation have O(N) time complexity (where N is "size of binary" * "number of binaries") instead of O(N*N), which is what one would expect without knowing about this optimization. So what I think is happening is that for the sizer/3
function, this optimization kicks in but not for the matcher/3
function, so for sizer/3
the execution time grows linearly with the size of the input, but for matcher/3
, the execution time grows quadratically with the size of the input list.
The optimization that I'm referring to is described here: https://www.erlang.org/doc/efficiency_guide/binaryhandling.html#constructing-binaries
I think the sentence "Matching a binary will also cause it to shrink and the next append operation will copy the binary data:" from the URL above might explain why the append optimization is not used in matcher/3
.
This optimization is not available in older versions of Erlang, and I also think that which scenarios it is used in depends on the Erlang version. Therefore, I think it might be better to write code that does not rely on this optimization. Here is an alternative that seems to have similar performance as sizer/3
, but it does not depend on the optimization mentioned above:
delayer(BinL, N, <<>>) ->
delayer(BinL, N, []);
delayer(BinL, N, Acc) ->
delayer(BinL, N, 0, Acc).
delayer([Bin | Rest], N, AccSize, Acc) ->
case AccSize >= N of
true ->
AccCombined = erlang:iolist_to_binary(lists:reverse(Acc)),
<<Head:N/binary, _/binary>> = AccCombined,
Head;
_ ->
delayer(Rest,
N,
erlang:size(Bin) + AccSize,
[Bin | Acc])
end.
While measuring this, I noticed that there is quite a big variation in execution time between different runs and depending on input size and which of sizer/3
and delayer/3
runs first so I don't think one can use this simple benchmark to determine which one of sizer/3
and delayer/3
is the fastest. However, intuitively I believe delayer/3
should cause fewer big allocations and therefore be more memory efficient, so I would choose that one if I had to choose one.
Thank you @kjellwinblad
match/3
is significantly slower thansizer/3