Skip to content

Instantly share code, notes, and snippets.

@bjorng
Last active February 25, 2022 21:11
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bjorng/4089cb0a289ad5518abd8aaf59b0a86b to your computer and use it in GitHub Desktop.
Save bjorng/4089cb0a289ad5518abd8aaf59b0a86b to your computer and use it in GitHub Desktop.
Benchmark comparing sofs and the proposed lists:group_by/2,3 functions
-module(gbench).
-export([run/0]).
%%-compile(export_all).
run() ->
Limit = 2_000_000,
Data = gen_data(0, Limit, []),
UniqKeys = length(sofs_groupby_as_list(Data)),
PercentUniq = round(UniqKeys * 100 / Limit),
io:format("generated ~p two-tuples with ~p% unique keys\n",
[Limit,PercentUniq]),
%% io:format("~P\n", [Data,10]),
Tests = [{"groupby/1",fun t_groupby_1/1},
{"sofs_groupby/1",fun t_sofs_groupby_1/1},
{"groupby_as_list/1",fun groupby_as_list/1},
{"sofs_groupby_as_list/1",fun sofs_groupby_as_list/1},
{"groupby/2",fun t_groupby_2/1},
{"sofs_groupby/2",fun t_sofs_groupby_2/1},
{"groupby/3",fun t_groupby_3/1},
{"sofs_groupby/3",fun t_sofs_groupby_3/1}],
run(Tests, Data).
gen_data(Limit, Limit, Acc) ->
Acc;
gen_data(N, Limit, Acc) ->
%% Item = {N div 100,rand:uniform(Limit)},
Item = {erlang:phash2(N div 100),rand:uniform(Limit)},
gen_data(N+1, Limit, [Item|Acc]).
run([{Name,Test}|Ts], Data) ->
{Pid,Ref} = spawn_monitor(fun() ->
{Time,_Res} = timer:tc(Test, [Data]),
%% io:format("~P\n", [_Res,10]),
exit(Time)
end),
receive
{'DOWN',Ref,process,Pid,Time} ->
io:format("~.20ts: ~p\n", [Name,Time / 1_000_000]),
run(Ts, Data)
end;
run([], _Data) ->
ok.
t_sofs_groupby_1(Data) ->
sofs_groupby(Data).
t_sofs_groupby_2(Data) ->
sofs_groupby(fun({K,_}) -> K end,
Data).
t_sofs_groupby_3(Data) ->
sofs_groupby(fun({K,_}) -> K end,
fun({_,V}) -> V end,
Data).
t_groupby_1(Data) ->
groupby(Data).
t_groupby_2(Data) ->
groupby(fun({K,_}) -> K end,
Data).
t_groupby_3(Data) ->
groupby(fun({K,_}) -> K end,
fun({_,V}) -> V end,
Data).
sofs_groupby_as_list(Data) ->
S0 = sofs:relation(Data),
S1 = sofs:relation_to_family(S0),
sofs:to_external(S1).
sofs_groupby(List) ->
S0 = sofs:relation(List),
S1 = sofs:relation_to_family(S0),
S2 = sofs:to_external(S1),
maps:from_list(S2).
sofs_groupby(Fun, List) ->
Data = [{Fun(E),E} || E <- List],
S0 = sofs:relation(Data),
S1 = sofs:relation_to_family(S0),
S2 = sofs:to_external(S1),
maps:from_list(S2).
sofs_groupby(Fun, ValueFun, List) ->
Data = [{Fun(E),ValueFun(E)} || E <- List],
S0 = sofs:relation(Data),
S1 = sofs:relation_to_family(S0),
S2 = sofs:to_external(S1),
maps:from_list(S2).
groupby_as_list(List) ->
maps:to_list(groupby(List)).
groupby(List) ->
groupby_0(lists:reverse(List), #{}).
groupby_0([{K,V} | Tail], Acc) ->
NewAcc = case Acc of
#{K := Vs} -> Acc#{K := [V | Vs]};
#{} -> Acc#{K => [V]}
end,
groupby_0(Tail, NewAcc);
groupby_0([], Acc) ->
Acc.
groupby(_Fun, []) ->
#{};
groupby(Fun, List) when is_function(Fun, 1) ->
groupby_1(Fun, lists:reverse(List), #{}).
groupby_1(Fun, [H | Tail], Acc) ->
K = Fun(H),
NewAcc = case Acc of
#{K := Vs} -> Acc#{K := [H | Vs]};
#{} -> Acc#{K => [H]}
end,
%% maps:update_with(K, fun(Vs) -> [H | Vs] end, [H], Acc),
groupby_1(Fun, Tail, NewAcc);
groupby_1(_Fun, [], Acc) ->
Acc.
groupby(_Fun, _ValueFun, []) ->
#{};
groupby(Fun, ValueFun, List) when is_function(Fun, 1), is_function(ValueFun, 1) ->
groupby_2(Fun, ValueFun, lists:reverse(List), #{}).
groupby_2(Fun, ValueFun, [H | Tail], Acc) ->
K = Fun(H),
V = ValueFun(H),
NewAcc = case Acc of
#{K := Vs} -> Acc#{K := [V | Vs]};
#{} -> Acc#{K => [V]}
end,
%% NewAcc = maps:update_with(K, fun(Vs) -> [V | Vs] end, [V], Acc),
groupby_2(Fun, ValueFun, Tail, NewAcc);
groupby_2(_Fun, _ValueFun, [], Acc) ->
Acc.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment