Skip to content

Instantly share code, notes, and snippets.

@zhongwencool
Last active July 25, 2016 20:40
Show Gist options
  • Save zhongwencool/d7d4db85b094f74f5edf876e413dca0d to your computer and use it in GitHub Desktop.
Save zhongwencool/d7d4db85b094f74f5edf876e413dca0d to your computer and use it in GitHub Desktop.
Find top n items in a unordered list
-module(test).
-compile(export_all).
%% API
run_all_sorted_sublist(List, Num) ->
lists:sublist(lists:sort(
fun({_,A,_},{_,B,_}) -> A > B end,
List), Num).
run_gb_trees(List, Num) ->
run_gb_trees(List, Num, gb_trees:empty()).
run_gb_trees([], _Num, Acc) ->
lists:foldl(fun({Key1, Key2, Key3}, Sum) -> [{Key2, Key1, Key3}|Sum] end, [], gb_trees:keys(Acc));
run_gb_trees([{Key1, Key2, Key3}|List], Num, Acc) ->
NewKey = {Key2, Key1, Key3},
NewAcc =
case gb_trees:size(Acc) < Num of
true ->
gb_trees:insert(NewKey, 0, Acc);
false ->
{Smallest,_} = gb_trees:smallest(Acc),
case Smallest < NewKey of
true ->
Acc2 = gb_trees:delete(Smallest, Acc),
gb_trees:insert(NewKey, 0, Acc2);
false ->
Acc
end
end,
run_gb_trees(List, Num, NewAcc).
run_small_to_big_order_list(List, Num) ->
run_small_to_big_order_list(List, Num, 0, []).
run_small_to_big_order_list([], _Num, _, Acc) ->
lists:foldl(fun({Key1, Key2, Key3}, Sum) -> [{Key2, Key1, Key3}|Sum] end, [], Acc);
run_small_to_big_order_list([{Key1, Key2, Key3}|List], Num, Len, Acc) ->
NewKey = {Key2, Key1, Key3},
{NewLen, NewAcc} =
case Len < Num of
true ->
{Len + 1, lists:sort([NewKey|Acc])};
false ->
[Smallest|Acc2] = Acc,
case Smallest < NewKey of
true ->
{Len, lists:sort([NewKey|Acc2])};
false ->
{Len, Acc}
end
end,
run_small_to_big_order_list(List, Num, NewLen, NewAcc).
run_small_to_big_order_list_v2(List, Num) ->
run_small_to_big_order_list_v2(List, Num, fun({_, X, _}, {_, Y, _}) -> X =< Y end, 0, []).
run_small_to_big_order_list_v2([], _, _, _, Acc) ->
lists:reverse(Acc);
run_small_to_big_order_list_v2([NewKey|List], Num, Fun, Len, Acc) ->
{NewLen, NewAcc} =
if Len < Num -> {Len + 1, insert(NewKey, Acc, Fun)};
true ->
[Smallest|Acc2] = Acc,
case Fun(Smallest,NewKey) of
true -> {Len, insert(NewKey, Acc2, Fun)};
false -> {Len, Acc}
end
end,
run_small_to_big_order_list_v2(List, Num, Fun, NewLen, NewAcc).
insert(K, [], _) -> [K];
insert(K, [H|T], Fun) ->
case Fun(K, H) of
true -> [K, H|T];
false -> [H|insert(K, T, Fun)]
end.
run_big_to_small_order_list(List, Num) ->
run_big_to_small_order_list(List, Num, 0, []).
run_big_to_small_order_list([], _Num, _, Acc) ->
Acc;
run_big_to_small_order_list([Key|List], Num, Len, Acc) ->
{NewLen, NewAcc} =
case Len < Num of
true ->
{Len + 1, lists:sort(fun({_X1, Y1, _Z1},{_X2, Y2, _Z2}) -> Y1 > Y2 end, [Key|Acc])};
false ->
Smallest = lists:last(Acc),
case Smallest < Key of
true ->
Acc2 = lists:sublist(Acc, Len - 1),
{Len, lists:sort(fun({_X1, Y1, _Z1},{_X2, Y2, _Z2}) -> Y1 > Y2 end, [Key|Acc2])};
false ->
{Len, Acc}
end
end,
run_big_to_small_order_list(List, Num, NewLen, NewAcc).
test() ->
List1 = [{5000, 20}, {5000, 30}, {5000, 40}, {5000, 50}, {5000, 60}, {5000, 70}],
List2 = [{10000, 20}, {10000, 30}, {10000, 40}, {10000, 50}, {10000, 60}, {10000, 70}],
List3 = [{15000, 20}, {15000, 30}, {15000, 40}, {15000, 50}, {15000, 60}, {15000, 70}],
List4 = [{20000, 20}, {20000, 30}, {20000, 40}, {20000, 50}, {20000, 60}, {20000, 70}],
List5 = [{25000, 20}, {25000, 30}, {25000, 40}, {25000, 50}, {25000, 60}, {25000, 70}],
Result1 = lists:map(fun({TotalNum1, TopNum1}) -> test(TotalNum1, TopNum1) end, List1),
Result2 = lists:map(fun({TotalNum2, TopNum2}) -> test(TotalNum2, TopNum2) end, List2),
Result3 = lists:map(fun({TotalNum3, TopNum3}) -> test(TotalNum3, TopNum3) end, List3),
Result4 = lists:map(fun({TotalNum4, TopNum4}) -> test(TotalNum4, TopNum4) end, List4),
Result5 = lists:map(fun({TotalNum5, TopNum5}) -> test(TotalNum5, TopNum5) end, List5),
io:format("~p~n", [Result1]),
io:format("~p~n", [Result2]),
io:format("~p~n", [Result3]),
io:format("~p~n", [Result4]),
io:format("~p~n", [Result5]),
ok.
%% Test
test(TotalNum, TopNum) ->
erlang:process_flag(trap_exit, true),
List = prepared_random_list(TotalNum),
%io:format("~p~n", [List]),
Fun1 = fun() -> {Time, Result} = timer:tc(?MODULE, run_all_sorted_sublist, [List, TopNum]), exit({all_sorted_sublist_____, Time, Result}) end,
Fun2 = fun() -> {Time, Result} = timer:tc(?MODULE, run_gb_trees, [List, TopNum]), exit({gb_tree________________, Time, Result}) end,
Fun3 = fun() -> {Time, Result} = timer:tc(?MODULE, run_small_to_big_order_list, [List, TopNum]), exit({small_to_big_order_list, Time, Result}) end,
Fun4 = fun() -> {Time, Result} = timer:tc(?MODULE, run_big_to_small_order_list, [List, TopNum]), exit({big_to_small_order_list, Time, Result}) end,
Fun5 = fun() -> {Time, Result} = timer:tc(?MODULE, run_small_to_big_order_list_v2, [List, TopNum]), exit({small_to_big_order_list_v2, Time, Result}) end,
spawn_link(Fun1),
spawn_link(Fun2),
spawn_link(Fun3),
spawn_link(Fun4),
spawn_link(Fun5),
{Type1, Time1, Res1} = receive {'EXIT', _Pid1, R1} -> R1 end,
{Type2, Time2, Res2} = receive {'EXIT', _Pid2, R2} -> R2 end,
{Type3, Time3, Res3} = receive {'EXIT', _Pid3, R3} -> R3 end,
{Type4, Time4, Res4} = receive {'EXIT', _Pid4, R4} -> R4 end,
{Type5, Time5, Res5} = receive {'EXIT', _Pid5, R5} -> R5 end,
Res1 = Res2 = Res3 = Res4 = Res5,
{{TotalNum, TopNum}, [{Type1, Time1, 100},
{Type2, Time2, Time2/Time1*100},
{Type3, Time3, Time3/Time1*100},
{Type4, Time4, Time4/Time1*100},
{Type5, Time5, Time5/Time1*100}]}.
prepared_random_list(TotalNum) ->
List = lists:seq(1,TotalNum),
List1 = shuffle(List),
lists:map(fun(Num) -> {Num, Num+1, Num+3} end, List1).
% Fisher-Yates shuffle
shuffle(List) -> shuffle(List, []).
shuffle([], Acc) -> Acc;
shuffle(List, Acc) ->
{Leading, [H | T]} = lists:split(random:uniform(length(List)) - 1, List),
shuffle(Leading ++ T, [H | Acc]).
@zhongwencool
Copy link
Author

zhongwencool commented Jul 13, 2016

Erlang/OTP 18 [erts-7.3] [source] [64-bit] [smp:4:4] [async-threads:10] [hipe] [kernel-poll:false] [dtrace]

Eshell V7.3  (abort with ^G)
1> test:test().
[{{5000,20},
  [{small_to_big_order_list_v2,758,100},
   {small_to_big_order_list,1761,232.32189973614777},
   {gb_tree________________,2424,319.78891820580475},
   {big_to_small_order_list,2940,387.8627968337731},
   {all_sorted_sublist_____,5854,772.2955145118734}]},
 {{5000,30},
  [{small_to_big_order_list_v2,1170,100},
   {gb_tree________________,3091,264.1880341880342},
   {small_to_big_order_list,3515,300.4273504273504},
   {big_to_small_order_list,3849,328.97435897435895},
   {all_sorted_sublist_____,6419,548.6324786324786}]},
 {{5000,40},
  [{small_to_big_order_list_v2,1742,100},
   {gb_tree________________,3172,182.08955223880596},
   {small_to_big_order_list,4886,280.4822043628014},
   {big_to_small_order_list,5322,305.5109070034443},
   {all_sorted_sublist_____,7211,413.9494833524684}]},
 {{5000,50},
  [{small_to_big_order_list_v2,1557,100},
   {gb_tree________________,2974,191.00834938985227},
   {small_to_big_order_list,4879,313.35902376364805},
   {all_sorted_sublist_____,7672,492.74245343609505},
   {big_to_small_order_list,7801,501.0276172125883}]},
 {{5000,60},
  [{small_to_big_order_list_v2,2172,100},
   {gb_tree________________,3715,171.04051565377532},
   {all_sorted_sublist_____,6481,298.3885819521179},
   {small_to_big_order_list,7384,339.96316758747696},
   {big_to_small_order_list,9878,454.78821362799266}]},
 {{5000,70},
  [{small_to_big_order_list_v2,2233,100},
   {gb_tree________________,5012,224.45141065830722},
   {all_sorted_sublist_____,6919,309.85221674876846},
   {small_to_big_order_list,7664,343.21540528437083},
   {big_to_small_order_list,13464,602.9556650246305}]}]
[{{10000,20},
  [{small_to_big_order_list_v2,1608,100},
   {small_to_big_order_list,3337,207.52487562189054},
   {gb_tree________________,4395,273.32089552238807},
   {big_to_small_order_list,5116,318.1592039800995},
   {all_sorted_sublist_____,12289,764.2412935323383}]},
 {{10000,30},
  [{small_to_big_order_list_v2,1088,100},
   {gb_tree________________,5734,527.0220588235294},
   {small_to_big_order_list,6112,561.7647058823529},
   {big_to_small_order_list,7733,710.7536764705882},
   {all_sorted_sublist_____,13049,1199.3566176470588}]},
 {{10000,40},
  [{small_to_big_order_list_v2,2551,100},
   {gb_tree________________,5701,223.4809878479028},
   {small_to_big_order_list,6609,259.0748725989808},
   {big_to_small_order_list,8268,324.10819286554295},
   {all_sorted_sublist_____,14159,555.0372402979224}]},
 {{10000,50},
  [{small_to_big_order_list_v2,1286,100},
   {gb_tree________________,4833,375.81648522550546},
   {small_to_big_order_list,8009,622.7838258164852},
   {all_sorted_sublist_____,11928,927.5272161741835},
   {big_to_small_order_list,12485,970.8398133748055}]},
 {{10000,60},
  [{small_to_big_order_list_v2,3054,100},
   {gb_tree________________,6690,219.05697445972496},
   {small_to_big_order_list,10170,333.0058939096267},
   {all_sorted_sublist_____,15475,506.71250818598554},
   {big_to_small_order_list,15560,509.4957432874918}]},
 {{10000,70},
  [{small_to_big_order_list_v2,3692,100},
   {gb_tree________________,6977,188.97616468039},
   {small_to_big_order_list,14226,385.3196099674973},
   {all_sorted_sublist_____,16745,453.5482123510293},
   {big_to_small_order_list,16955,459.2361863488624}]}]
[{{15000,20},
  [{small_to_big_order_list_v2,1763,100},
   {small_to_big_order_list,4362,247.41917186613725},
   {gb_tree________________,7179,407.2036301758367},
   {big_to_small_order_list,7960,451.5031196823596},
   {all_sorted_sublist_____,16597,941.4066931366989}]},
 {{15000,30},
  [{small_to_big_order_list_v2,3800,100},
   {small_to_big_order_list,4698,123.63157894736842},
   {gb_tree________________,5950,156.57894736842107},
   {big_to_small_order_list,8071,212.39473684210526},
   {all_sorted_sublist_____,17902,471.1052631578947}]},
 {{15000,40},
  [{small_to_big_order_list_v2,2043,100},
   {gb_tree________________,6750,330.3964757709251},
   {small_to_big_order_list,7833,383.4067547723935},
   {big_to_small_order_list,12220,598.1399902104748},
   {all_sorted_sublist_____,17744,868.5266764561918}]},
 {{15000,50},
  [{small_to_big_order_list_v2,2883,100},
   {gb_tree________________,8010,277.8355879292404},
   {small_to_big_order_list,9545,331.07873742629204},
   {big_to_small_order_list,16735,580.4717308359347},
   {all_sorted_sublist_____,19682,682.6916406520985}]},
 {{15000,60},
  [{small_to_big_order_list_v2,3357,100},
   {gb_tree________________,8066,240.27405421507297},
   {small_to_big_order_list,13143,391.5102770330652},
   {all_sorted_sublist_____,18146,540.5421507298182},
   {big_to_small_order_list,19475,580.1310694072088}]},
 {{15000,70},
  [{small_to_big_order_list_v2,4992,100},
   {gb_tree________________,9397,188.2411858974359},
   {small_to_big_order_list,16426,329.04647435897436},
   {all_sorted_sublist_____,19218,384.97596153846155},
   {big_to_small_order_list,24281,486.3982371794871}]}]
[{{20000,20},
  [{small_to_big_order_list_v2,4207,100},
   {small_to_big_order_list,4983,118.44544806275255},
   {gb_tree________________,11517,273.75802234371287},
   {big_to_small_order_list,12201,290.0166389351082},
   {all_sorted_sublist_____,29060,690.7535060613264}]},
 {{20000,30},
  [{small_to_big_order_list,4041,100},
   {small_to_big_order_list_v2,3980,98.49047265528334},
   {gb_tree________________,8875,219.6238554813165},
   {big_to_small_order_list,12245,303.0190546894333},
   {all_sorted_sublist_____,27483,680.1039346696363}]},
 {{20000,40},
  [{small_to_big_order_list_v2,4440,100},
   {small_to_big_order_list,6943,156.37387387387386},
   {gb_tree________________,8804,198.28828828828827},
   {big_to_small_order_list,14217,320.2027027027027},
   {all_sorted_sublist_____,29611,666.9144144144144}]},
 {{20000,50},
  [{small_to_big_order_list_v2,5116,100},
   {small_to_big_order_list,8665,169.37060203283815},
   {gb_tree________________,9012,176.1532447224394},
   {big_to_small_order_list,18147,354.71071149335415},
   {all_sorted_sublist_____,28383,554.7888975762314}]},
 {{20000,60},
  [{small_to_big_order_list_v2,4166,100},
   {small_to_big_order_list,6609,158.64138262121938},
   {gb_tree________________,9492,227.84445511281803},
   {big_to_small_order_list,24312,583.5813730196832},
   {all_sorted_sublist_____,26485,635.7417186749881}]},
 {{20000,70},
  [{small_to_big_order_list_v2,6121,100},
   {gb_tree________________,9748,159.2550236889397},
   {small_to_big_order_list,12749,208.2829603006045},
   {big_to_small_order_list,25637,418.8367913739585},
   {all_sorted_sublist_____,29594,483.48309099820295}]}]
[{{25000,20},
  [{small_to_big_order_list_v2,3705,100},
   {small_to_big_order_list,6222,167.93522267206478},
   {big_to_small_order_list,8403,226.80161943319837},
   {gb_tree________________,9653,260.53981106612684},
   {all_sorted_sublist_____,29793,804.1295546558704}]},
 {{25000,30},
  [{small_to_big_order_list_v2,4168,100},
   {small_to_big_order_list,9710,232.9654510556622},
   {gb_tree________________,10825,259.7168905950096},
   {big_to_small_order_list,13461,322.96065259117086},
   {all_sorted_sublist_____,32312,775.2399232245681}]},
 {{25000,40},
  [{small_to_big_order_list_v2,4463,100},
   {small_to_big_order_list,9010,201.8821420569124},
   {gb_tree________________,11527,258.27918440510865},
   {big_to_small_order_list,16557,370.9836432892673},
   {all_sorted_sublist_____,32938,738.023750840242}]},
 {{25000,50},
  [{small_to_big_order_list_v2,4671,100},
   {small_to_big_order_list,10433,223.35688289445517},
   {gb_tree________________,12386,265.1680582316421},
   {big_to_small_order_list,19632,420.29543994861916},
   {all_sorted_sublist_____,34087,729.7580817812031}]},
 {{25000,60},
  [{small_to_big_order_list_v2,5055,100},
   {small_to_big_order_list,13742,271.84965380811076},
   {gb_tree________________,15670,309.9901088031652},
   {big_to_small_order_list,24388,482.45301681503463},
   {all_sorted_sublist_____,35236,697.0524233432245}]},
 {{25000,70},
  [{small_to_big_order_list_v2,6819,100},
   {gb_tree________________,15398,225.81023610500074},
   {small_to_big_order_list,18309,268.4997800263969},
   {big_to_small_order_list,29194,428.12729139169966},
   {all_sorted_sublist_____,37889,555.63865669453}]}]
ok

@pichi
Copy link

pichi commented Jul 25, 2016

The benchmark is flawed in a way that runs test concurrently. It means they interfere with themselves. For example, if some of them use more operations which make more work and use less reduction it will have an unfair advantage over other in this benchmark.

@pichi
Copy link

pichi commented Jul 25, 2016

Pairing heap implementation is significantly faster (up to four times but typically two times). See

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment