Skip to content

Instantly share code, notes, and snippets.

@redink
Forked from pichi/test.erl
Last active July 27, 2016 07:36
Show Gist options
  • Save redink/0367d831b769a5fd79231d11017a675f to your computer and use it in GitHub Desktop.
Save redink/0367d831b769a5fd79231d11017a675f to your computer and use it in GitHub Desktop.
Find top n items in a unordered list
-module(test).
-compile(export_all).
%% API
-compile({inline, [ insert/2
, merge/2
]}).
insert(E, []) -> [E];
insert(E, [E2|_] = H) when E =< E2 -> [E, H];
insert(E, [E2|H]) -> [E2, [E]|H].
merge(H1, []) -> H1;
merge([E1|H1], [E2|_]=H2) when E1 =< E2 -> [E1, H2|H1];
merge(H1, [E2|H2]) -> [E2, H1|H2].
merge_pairs([]) -> [];
merge_pairs([H]) -> H;
merge_pairs([A, B|T]) -> merge(merge(A, B), merge_pairs(T)).
run_pheap(List, Num) ->
run_pheap(List, Num, []).
run_pheap(List, 0, Heap) ->
pheap_full(List, Heap);
run_pheap([], 0, Heap) ->
finish(Heap, []);
run_pheap([{Y, X, _} = H|T], N, Heap) ->
run_pheap(T, N-1, insert({{X, Y}, H}, Heap)).
pheap_full([], Heap) ->
finish(Heap, []);
pheap_full([{Y, X, _} = H|T], [{K, _}|HeapT] = Heap) ->
case {X, Y} of
N when N > K ->
pheap_full(T, insert({N, H}, merge_pairs(HeapT)));
_ ->
pheap_full(T, Heap)
end.
finish([], Acc) -> Acc;
finish([{_, H}|T], Acc) ->
finish(merge_pairs(T), [H|Acc]).
run_all_sorted_sublist(List, Num) ->
L = [ {{X, Y}, Z} || {Y, X, _} = Z <- List],
[ X || {_, X} <- lists:sublist(lists:reverse(lists:sort(L)), Num) ].
run_gb_trees(List, Num) ->
run_gb_trees(List, Num, gb_trees:empty()).
run_gb_trees([], _Num, Acc) ->
lists:foldl(fun({Key1, Key2, Key3}, Sum) -> [{Key2, Key1, Key3}|Sum] end, [], gb_trees:keys(Acc));
run_gb_trees([{Key1, Key2, Key3}|List], Num, Acc) ->
NewKey = {Key2, Key1, Key3},
NewAcc =
case gb_trees:size(Acc) < Num of
true ->
gb_trees:insert(NewKey, 0, Acc);
false ->
{Smallest,_} = gb_trees:smallest(Acc),
case Smallest < NewKey of
true ->
Acc2 = gb_trees:delete(Smallest, Acc),
gb_trees:insert(NewKey, 0, Acc2);
false ->
Acc
end
end,
run_gb_trees(List, Num, NewAcc).
run_ordered_set_ets(List, Num) ->
run_ordered_set_ets(List, Num,
{0, ets:new(t, [ordered_set, public, {keypos, 2}])}).
run_ordered_set_ets([], _Num, {_, Table}) ->
R = lists:reverse(ets:tab2list(Table)),
ets:delete(Table),
R;
run_ordered_set_ets([{_, K, _} = H | T], Num, {I, Table}) ->
case I < Num of
true ->
ets:insert(Table, H);
false ->
SmallestK = ets:first(Table),
case K > SmallestK of
true ->
ets:delete(Table, SmallestK),
ets:insert(Table, H);
false ->
ok
end
end,
run_ordered_set_ets(T, Num, {I + 1, Table}).
run_small_to_big_order_list(List, Num) ->
run_small_to_big_order_list(List, Num, 0, []).
run_small_to_big_order_list([], _Num, _, Acc) ->
lists:foldl(fun({Key1, Key2, Key3}, Sum) -> [{Key2, Key1, Key3}|Sum] end, [], Acc);
run_small_to_big_order_list([{Key1, Key2, Key3}|List], Num, Len, Acc) ->
NewKey = {Key2, Key1, Key3},
{NewLen, NewAcc} =
case Len < Num of
true ->
{Len + 1, lists:sort([NewKey|Acc])};
false ->
[Smallest|Acc2] = Acc,
case Smallest < NewKey of
true ->
{Len, lists:sort([NewKey|Acc2])};
false ->
{Len, Acc}
end
end,
run_small_to_big_order_list(List, Num, NewLen, NewAcc).
run_small_to_big_order_list_v2(List, Num) ->
run_small_to_big_order_list_v2(List, Num, fun({X2, X1, _}, {Y2, Y1, _})
-> {X1, X2} =< {Y1, Y2} end, 0, []).
run_small_to_big_order_list_v2([], _, _, _, Acc) ->
lists:reverse(Acc);
run_small_to_big_order_list_v2([NewKey|List], Num, Fun, Len, Acc) ->
{NewLen, NewAcc} =
if Len < Num -> {Len + 1, insert(NewKey, Acc, Fun)};
true ->
[Smallest|Acc2] = Acc,
case Fun(Smallest,NewKey) of
true -> {Len, insert(NewKey, Acc2, Fun)};
false -> {Len, Acc}
end
end,
run_small_to_big_order_list_v2(List, Num, Fun, NewLen, NewAcc).
insert(K, [], _) -> [K];
insert(K, [H|T], Fun) ->
case Fun(K, H) of
true -> [K, H|T];
false -> [H|insert(K, T, Fun)]
end.
run_big_to_small_order_list(List, Num) ->
run_big_to_small_order_list(List, Num, 0, []).
run_big_to_small_order_list([], _Num, _, Acc) ->
Acc;
run_big_to_small_order_list([Key|List], Num, Len, Acc) ->
{NewLen, NewAcc} =
case Len < Num of
true ->
{Len + 1, lists:sort(fun({X2, X1, _}, {Y2, Y1, _}) -> {X1, X2} > {Y1, Y2} end, [Key|Acc])};
false ->
Smallest = lists:last(Acc),
case Smallest < Key of
true ->
Acc2 = lists:sublist(Acc, Len - 1),
{Len, lists:sort(fun({X2, X1, _}, {Y2, Y1, _}) -> {X1, X2} > {Y1, Y2} end, [Key|Acc2])};
false ->
{Len, Acc}
end
end,
run_big_to_small_order_list(List, Num, NewLen, NewAcc).
test() ->
[ test(TotalNum, TopNum)
|| TotalNum <- [5000, 10000, 15000, 20000, 25000],
TopNum <- [20, 30, 40, 50, 60, 70]].
test_big() ->
[ test(TotalNum, TopNum)
|| TotalNum <- [25000],
TopNum <- [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200]].
%% Test
test(TotalNum, TopNum) ->
List = prepared_random_list(TotalNum),
%io:format("~p~n", [List]),
Self = self(),
L = lists:keysort(2,
[ receive {Pid, X} -> X end
|| {F, Type} <-
[{run_all_sorted_sublist, all_sorted_sublist________},
{run_gb_trees, gb_tree___________________},
{run_small_to_big_order_list, small_to_big_order_list___},
{run_big_to_small_order_list, big_to_small_order_list___},
{run_small_to_big_order_list_v2, small_to_big_order_list_v2},
{run_pheap, pheap_____________________},
{run_ordered_set_ets, ordered_set_ets___________}],
Pid <- [spawn_link(fun() ->
{Time, Result} = timer:tc(?MODULE, F, [List, TopNum]),
Self ! {self(), {Type, Time, Result}}
end)]
]),
[{_, Time1, _}|_] = L,
[_] = lists:ukeysort(3, L),
{{TotalNum, TopNum}, [{Type, Time, Time/Time1*100}
|| {Type, Time, _} <- L]}.
prepared_random_list(TotalNum) ->
[ {X, X+1, X+2}
|| {_, X} <- lists:sort(
[ {rand:uniform(), X}
|| X <- lists:seq(1, TotalNum) ])
].
@redink
Copy link
Author

redink commented Jul 27, 2016

Suppose a scene:
TotalNum is large and TopNum is large too, just like function test_big/0. The test result:

27> rp(test:test_big()).
[{{25000,100},
  [{pheap_____________________,2063,100.0},
   {ordered_set_ets___________,5446,263.9844886088221},
   {gb_tree___________________,7200,349.006301502666},
   {small_to_big_order_list_v2,7698,373.1459040232671},
   {small_to_big_order_list___,15189,736.2578768783325},
   {all_sorted_sublist________,39296,1904.7988366456618},
   {big_to_small_order_list___,46993,2277.896267571498}]},
 {{25000,200},
  [{pheap_____________________,2921,100.0},
   {ordered_set_ets___________,4571,156.4875042793564},
   {gb_tree___________________,6375,218.24717562478605},
   {small_to_big_order_list_v2,13832,473.5364601163985},
   {small_to_big_order_list___,47367,1621.602191030469},
   {all_sorted_sublist________,47619,1630.2293735022254},
   {big_to_small_order_list___,154432,5286.95652173913}]},
 {{25000,300},
  [{ordered_set_ets___________,5163,100.0},
   {pheap_____________________,5639,109.21944605849312},
   {gb_tree___________________,6698,129.7307766802247},
   {small_to_big_order_list_v2,23949,463.8582219639744},
   {all_sorted_sublist________,38126,738.4466395506489},
   {small_to_big_order_list___,108659,2104.5709858609334},
   {big_to_small_order_list___,321678,6230.447414294015}]},
 {{25000,400},
  [{pheap_____________________,4754,100.0},
   {ordered_set_ets___________,6992,147.076146403029},
   {gb_tree___________________,7471,157.15187210769878},
   {all_sorted_sublist________,35830,753.6811106436685},
   {small_to_big_order_list_v2,38805,816.2599915860328},
   {small_to_big_order_list___,171789,3613.5675220866638},
   {big_to_small_order_list___,549906,11567.227597812367}]},
 {{25000,500},
  [{pheap_____________________,6147,100.0},
   {ordered_set_ets___________,6160,100.21148527737107},
   {gb_tree___________________,8410,136.81470636082642},
   {all_sorted_sublist________,40522,659.2158776639011},
   {small_to_big_order_list_v2,50199,816.6422645192777},
   {small_to_big_order_list___,263741,4290.564503009598},
   {big_to_small_order_list___,742793,12083.829510330243}]},
 {{25000,600},
  [{pheap_____________________,5695,100.0},
   {ordered_set_ets___________,6819,119.73661106233537},
   {gb_tree___________________,9436,165.68920105355576},
   {all_sorted_sublist________,38244,671.5364354697103},
   {small_to_big_order_list_v2,69636,1222.7568042142232},
   {small_to_big_order_list___,359873,6319.104477611941},
   {big_to_small_order_list___,1039934,18260.474100087795}]},
 {{25000,700},
  [{pheap_____________________,6299,100.0},
   {ordered_set_ets___________,7729,122.70201619304653},
   {gb_tree___________________,9976,158.37434513414829},
   {all_sorted_sublist________,33651,534.2276551833625},
   {small_to_big_order_list_v2,83654,1328.0520717574218},
   {small_to_big_order_list___,449669,7138.736307350374},
   {big_to_small_order_list___,1403739,22285.108747420225}]},
 {{25000,800},
  [{ordered_set_ets___________,6377,100.0},
   {pheap_____________________,6587,103.29308452250274},
   {gb_tree___________________,11397,178.72040144268465},
   {all_sorted_sublist________,34933,547.796769640897},
   {small_to_big_order_list_v2,112025,1756.7037792065234},
   {small_to_big_order_list___,624920,9799.592284773405},
   {big_to_small_order_list___,1843722,28912.05896189431}]},
 {{25000,900},
  [{ordered_set_ets___________,5893,100.0},
   {pheap_____________________,8009,135.90700831494996},
   {gb_tree___________________,13033,221.16069913456644},
   {all_sorted_sublist________,36387,617.4613948752757},
   {small_to_big_order_list_v2,137859,2339.3687425759376},
   {small_to_big_order_list___,835282,14174.138808756152},
   {big_to_small_order_list___,2215559,37596.453419311045}]},
 {{25000,1000},
  [{ordered_set_ets___________,6317,100.0},
   {pheap_____________________,10963,173.54757004907393},
   {gb_tree___________________,13504,213.77236029760965},
   {all_sorted_sublist________,34961,553.4430900744023},
   {small_to_big_order_list_v2,169699,2686.385942694317},
   {small_to_big_order_list___,972908,15401.42472692734},
   {big_to_small_order_list___,3056883,48391.37248694}]},
 {{25000,1100},
  [{ordered_set_ets___________,6400,100.0},
   {pheap_____________________,9042,141.28125},
   {gb_tree___________________,12045,188.203125},
   {all_sorted_sublist________,32891,513.921875},
   {small_to_big_order_list_v2,198818,3106.53125},
   {small_to_big_order_list___,1124822,17575.34375},
   {big_to_small_order_list___,3102033,48469.265625}]},
 {{25000,1200},
  [{ordered_set_ets___________,7285,100.0},
   {pheap_____________________,7996,109.75978037062457},
   {gb_tree___________________,12231,167.89293067947838},
   {all_sorted_sublist________,35474,486.945778997941},
   {small_to_big_order_list_v2,224545,3082.2923816060397},
   {small_to_big_order_list___,1299779,17841.85312285518},
   {big_to_small_order_list___,3710683,50935.93685655457}]}]
ok

Most of scenes we need not too large TopNum, just for fun. ^^

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment