Skip to content

Instantly share code, notes, and snippets.

@zhongwencool
Last active July 25, 2016 20:40
Show Gist options
  • Save zhongwencool/d7d4db85b094f74f5edf876e413dca0d to your computer and use it in GitHub Desktop.
Save zhongwencool/d7d4db85b094f74f5edf876e413dca0d to your computer and use it in GitHub Desktop.
Find top n items in a unordered list
-module(test).
-compile(export_all).
%% API
run_all_sorted_sublist(List, Num) ->
lists:sublist(lists:sort(
fun({_,A,_},{_,B,_}) -> A > B end,
List), Num).
run_gb_trees(List, Num) ->
run_gb_trees(List, Num, gb_trees:empty()).
run_gb_trees([], _Num, Acc) ->
lists:foldl(fun({Key1, Key2, Key3}, Sum) -> [{Key2, Key1, Key3}|Sum] end, [], gb_trees:keys(Acc));
run_gb_trees([{Key1, Key2, Key3}|List], Num, Acc) ->
NewKey = {Key2, Key1, Key3},
NewAcc =
case gb_trees:size(Acc) < Num of
true ->
gb_trees:insert(NewKey, 0, Acc);
false ->
{Smallest,_} = gb_trees:smallest(Acc),
case Smallest < NewKey of
true ->
Acc2 = gb_trees:delete(Smallest, Acc),
gb_trees:insert(NewKey, 0, Acc2);
false ->
Acc
end
end,
run_gb_trees(List, Num, NewAcc).
run_small_to_big_order_list(List, Num) ->
run_small_to_big_order_list(List, Num, 0, []).
run_small_to_big_order_list([], _Num, _, Acc) ->
lists:foldl(fun({Key1, Key2, Key3}, Sum) -> [{Key2, Key1, Key3}|Sum] end, [], Acc);
run_small_to_big_order_list([{Key1, Key2, Key3}|List], Num, Len, Acc) ->
NewKey = {Key2, Key1, Key3},
{NewLen, NewAcc} =
case Len < Num of
true ->
{Len + 1, lists:sort([NewKey|Acc])};
false ->
[Smallest|Acc2] = Acc,
case Smallest < NewKey of
true ->
{Len, lists:sort([NewKey|Acc2])};
false ->
{Len, Acc}
end
end,
run_small_to_big_order_list(List, Num, NewLen, NewAcc).
run_small_to_big_order_list_v2(List, Num) ->
run_small_to_big_order_list_v2(List, Num, fun({_, X, _}, {_, Y, _}) -> X =< Y end, 0, []).
run_small_to_big_order_list_v2([], _, _, _, Acc) ->
lists:reverse(Acc);
run_small_to_big_order_list_v2([NewKey|List], Num, Fun, Len, Acc) ->
{NewLen, NewAcc} =
if Len < Num -> {Len + 1, insert(NewKey, Acc, Fun)};
true ->
[Smallest|Acc2] = Acc,
case Fun(Smallest,NewKey) of
true -> {Len, insert(NewKey, Acc2, Fun)};
false -> {Len, Acc}
end
end,
run_small_to_big_order_list_v2(List, Num, Fun, NewLen, NewAcc).
insert(K, [], _) -> [K];
insert(K, [H|T], Fun) ->
case Fun(K, H) of
true -> [K, H|T];
false -> [H|insert(K, T, Fun)]
end.
run_big_to_small_order_list(List, Num) ->
run_big_to_small_order_list(List, Num, 0, []).
run_big_to_small_order_list([], _Num, _, Acc) ->
Acc;
run_big_to_small_order_list([Key|List], Num, Len, Acc) ->
{NewLen, NewAcc} =
case Len < Num of
true ->
{Len + 1, lists:sort(fun({_X1, Y1, _Z1},{_X2, Y2, _Z2}) -> Y1 > Y2 end, [Key|Acc])};
false ->
Smallest = lists:last(Acc),
case Smallest < Key of
true ->
Acc2 = lists:sublist(Acc, Len - 1),
{Len, lists:sort(fun({_X1, Y1, _Z1},{_X2, Y2, _Z2}) -> Y1 > Y2 end, [Key|Acc2])};
false ->
{Len, Acc}
end
end,
run_big_to_small_order_list(List, Num, NewLen, NewAcc).
test() ->
List1 = [{5000, 20}, {5000, 30}, {5000, 40}, {5000, 50}, {5000, 60}, {5000, 70}],
List2 = [{10000, 20}, {10000, 30}, {10000, 40}, {10000, 50}, {10000, 60}, {10000, 70}],
List3 = [{15000, 20}, {15000, 30}, {15000, 40}, {15000, 50}, {15000, 60}, {15000, 70}],
List4 = [{20000, 20}, {20000, 30}, {20000, 40}, {20000, 50}, {20000, 60}, {20000, 70}],
List5 = [{25000, 20}, {25000, 30}, {25000, 40}, {25000, 50}, {25000, 60}, {25000, 70}],
Result1 = lists:map(fun({TotalNum1, TopNum1}) -> test(TotalNum1, TopNum1) end, List1),
Result2 = lists:map(fun({TotalNum2, TopNum2}) -> test(TotalNum2, TopNum2) end, List2),
Result3 = lists:map(fun({TotalNum3, TopNum3}) -> test(TotalNum3, TopNum3) end, List3),
Result4 = lists:map(fun({TotalNum4, TopNum4}) -> test(TotalNum4, TopNum4) end, List4),
Result5 = lists:map(fun({TotalNum5, TopNum5}) -> test(TotalNum5, TopNum5) end, List5),
io:format("~p~n", [Result1]),
io:format("~p~n", [Result2]),
io:format("~p~n", [Result3]),
io:format("~p~n", [Result4]),
io:format("~p~n", [Result5]),
ok.
%% Test
test(TotalNum, TopNum) ->
erlang:process_flag(trap_exit, true),
List = prepared_random_list(TotalNum),
%io:format("~p~n", [List]),
Fun1 = fun() -> {Time, Result} = timer:tc(?MODULE, run_all_sorted_sublist, [List, TopNum]), exit({all_sorted_sublist_____, Time, Result}) end,
Fun2 = fun() -> {Time, Result} = timer:tc(?MODULE, run_gb_trees, [List, TopNum]), exit({gb_tree________________, Time, Result}) end,
Fun3 = fun() -> {Time, Result} = timer:tc(?MODULE, run_small_to_big_order_list, [List, TopNum]), exit({small_to_big_order_list, Time, Result}) end,
Fun4 = fun() -> {Time, Result} = timer:tc(?MODULE, run_big_to_small_order_list, [List, TopNum]), exit({big_to_small_order_list, Time, Result}) end,
Fun5 = fun() -> {Time, Result} = timer:tc(?MODULE, run_small_to_big_order_list_v2, [List, TopNum]), exit({small_to_big_order_list_v2, Time, Result}) end,
spawn_link(Fun1),
spawn_link(Fun2),
spawn_link(Fun3),
spawn_link(Fun4),
spawn_link(Fun5),
{Type1, Time1, Res1} = receive {'EXIT', _Pid1, R1} -> R1 end,
{Type2, Time2, Res2} = receive {'EXIT', _Pid2, R2} -> R2 end,
{Type3, Time3, Res3} = receive {'EXIT', _Pid3, R3} -> R3 end,
{Type4, Time4, Res4} = receive {'EXIT', _Pid4, R4} -> R4 end,
{Type5, Time5, Res5} = receive {'EXIT', _Pid5, R5} -> R5 end,
Res1 = Res2 = Res3 = Res4 = Res5,
{{TotalNum, TopNum}, [{Type1, Time1, 100},
{Type2, Time2, Time2/Time1*100},
{Type3, Time3, Time3/Time1*100},
{Type4, Time4, Time4/Time1*100},
{Type5, Time5, Time5/Time1*100}]}.
prepared_random_list(TotalNum) ->
List = lists:seq(1,TotalNum),
List1 = shuffle(List),
lists:map(fun(Num) -> {Num, Num+1, Num+3} end, List1).
% Fisher-Yates shuffle
shuffle(List) -> shuffle(List, []).
shuffle([], Acc) -> Acc;
shuffle(List, Acc) ->
{Leading, [H | T]} = lists:split(random:uniform(length(List)) - 1, List),
shuffle(Leading ++ T, [H | Acc]).
@pichi
Copy link

pichi commented Jul 25, 2016

Pairing heap implementation is significantly faster (up to four times but typically two times). See

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment