Created
July 24, 2020 16:03
-
-
Save rnewson/09678a3eb43866690823346436035351 to your computer and use it in GitHub Desktop.
ebtree.erl with improved collation wrapper
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
% Licensed under the Apache License, Version 2.0 (the "License"); you may not | |
% use this file except in compliance with the License. You may obtain a copy of | |
% the License at | |
% | |
% http://www.apache.org/licenses/LICENSE-2.0 | |
% | |
% Unless required by applicable law or agreed to in writing, software | |
% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT | |
% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the | |
% License for the specific language governing permissions and limitations under | |
% the License. | |
-module(ebtree). | |
-export([ | |
open/3, | |
open/4, | |
min/0, | |
max/0, | |
insert/4, | |
delete/3, | |
lookup/3, | |
range/6, | |
reverse_range/6, | |
fold/4, | |
fold/5, | |
reduce/4, | |
full_reduce/2, | |
group_reduce/7, | |
group_reduce/8, | |
validate_tree/2 | |
]). | |
-record(node, { | |
id, | |
level = 0, | |
prev, | |
next, | |
members = [] %% [{Key0, Value0} | {FirstKey0, LastKey0, Pointer0, Reduction0}, ...] | |
}). | |
-record(tree, { | |
prefix, | |
min, | |
max, | |
collate_fun, | |
reduce_fun, | |
encode_fun | |
}). | |
-define(META, 0). | |
-define(META_ORDER, 0). | |
-define(META_NEXT_ID, 1). | |
-define(NODE, 1). | |
-define(NODE_ROOT_ID, 0). | |
-define(underflow(Tree, Node), Tree#tree.min > length(Node#node.members)). | |
-define(at_min(Tree, Node), Tree#tree.min == length(Node#node.members)). | |
-define(is_full(Tree, Node), Tree#tree.max == length(Node#node.members)). | |
%% two special 1-bit bitstrings that cannot appear in valid keys. | |
-define(MIN, <<0:1>>). | |
-define(MAX, <<1:1>>). | |
%% @equiv open(Db, Prefix, Order, []) | |
-spec open(term(), binary(), pos_integer()) -> #tree{}. | |
open(Db, Prefix, Order) -> | |
open(Db, Prefix, Order, []). | |
%% @doc Open a new ebtree, initialising it if doesn't already exist. | |
%% @param Db An erlfdb database or transaction. | |
%% @param Prefix The key prefix applied to all ebtree keys. | |
%% @param Order The maximum number of items allowed in an ebtree node (must be an even number). Ignored | |
%% if ebtree is already initialised. | |
%% @param Options Supported options are {reduce_fun, Fun} and {collate_fun, Fun}. | |
%% @returns A data structure representing the ebtree, to be passed to all other functions. | |
-spec open(term(), binary(), pos_integer(), list()) -> #tree{}. | |
open(Db, Prefix, Order, Options) when is_binary(Prefix), is_integer(Order), Order > 2, Order rem 2 == 0 -> | |
ReduceFun = proplists:get_value(reduce_fun, Options, fun reduce_noop/2), | |
CollateFun = proplists:get_value(collate_fun, Options, fun collate_raw/2), | |
EncodeFun = proplists:get_value(encode_fun, Options, fun encode_erlang/3), | |
Tree = #tree{ | |
prefix = Prefix, | |
reduce_fun = ReduceFun, | |
collate_fun = CollateFun, | |
encode_fun = EncodeFun | |
}, | |
erlfdb:transactional(Db, fun(Tx) -> | |
case get_meta(Tx, Tree, ?META_ORDER) of | |
not_found -> | |
erlfdb:clear_range_startswith(Tx, Prefix), | |
set_meta(Tx, Tree, ?META_ORDER, Order), | |
set_meta(Tx, Tree, ?META_NEXT_ID, 1), | |
set_node(Tx, Tree, #node{id = ?NODE_ROOT_ID}), | |
init_order(Tree, Order); | |
ActualOrder when is_integer(ActualOrder) -> | |
init_order(Tree, ActualOrder) | |
end | |
end). | |
%% @doc a special value guaranteed to be smaller than any value in an ebtree. | |
min() -> | |
?MIN. | |
%% @doc a special value guaranteed to be larger than any value in an ebtree. | |
max() -> | |
?MAX. | |
%% @doc Lookup a specific key in the ebtree. | |
%% @param Db An erlfdb database or transaction. | |
%% @param Tree the ebtree. | |
%% @param Key the key to lookup | |
%% @returns A key-value tuple if found, false if not present in the ebtree. | |
-spec lookup(Db :: term(), Tree :: #tree{}, Key :: term()) -> | |
{Key :: term(), Value :: term()} | false. | |
lookup(Db, #tree{} = Tree, Key) -> | |
Fun = fun | |
({visit, K, V}, _Acc) when K =:= Key -> | |
{stop, {K, V}}; | |
({visit, K, _V}, Acc) -> | |
case greater_than(Tree, K, Key) of | |
true -> | |
{stop, Acc}; | |
false -> | |
{ok, Acc} | |
end; | |
({traverse, F, L, _R}, Acc) -> | |
case {greater_than(Tree, F, Key), less_than_or_equal(Tree, Key, L)} of | |
{true, _} -> | |
{stop, Acc}; | |
{false, true} -> | |
{ok, Acc}; | |
{false, false} -> | |
{skip, Acc} | |
end | |
end, | |
fold(Db, Tree, Fun, false, []). | |
%% @equiv fold(Db, Tree, Fun, Acc, []) | |
fold(Db, #tree{} = Tree, Fun, Acc) -> | |
fold(Db, Tree, Fun, Acc, []). | |
%% @doc Custom traversal of the ebtree. | |
%% @param Db An erlfdb database or transaction. | |
%% @param Tree the ebtree. | |
%% @param Fun A callback function as nodes are loaded that directs the traversal. | |
%% @param Acc The initial accumulator. | |
%% @param Options Options that control how the fold is executed. | |
%% @returns the final accumulator. | |
-type fold_args() :: | |
{visit, Key :: term(), Value :: term()} | | |
{traverse, First :: term(), Last :: term(), Reduction :: term()}. | |
-type fold_option() :: [{dir, fwd | rev}]. | |
-spec fold(Db, Tree, Fun, Acc0, Options) -> Acc1 when | |
Db :: term(), | |
Tree :: #tree{}, | |
Fun :: fun((fold_args(), Acc0) -> {ok | skip | stop, Acc1}), | |
Acc0 :: term(), | |
Options :: [fold_option()], | |
Acc1 :: term(). | |
fold(Db, #tree{} = Tree, Fun, Acc, Options) -> | |
{_, Reduce} = erlfdb:transactional(Db, fun(Tx) -> | |
Root = get_node(Tx, Tree, ?NODE_ROOT_ID), | |
fold(Db, Tree, Root, Fun, Acc, Options) | |
end), | |
Reduce. | |
fold(Db, #tree{} = Tree, #node{} = Node, Fun, Acc, Options) -> | |
Dir = proplists:get_value(dir, Options, fwd), | |
Members = case Dir of | |
fwd -> Node#node.members; | |
rev -> lists:reverse(Node#node.members) | |
end, | |
fold(Db, #tree{} = Tree, Members, Fun, Acc, Options); | |
fold(_Db, #tree{} = _Tree, [], _Fun, Acc, _Options) -> | |
{ok, Acc}; | |
fold(Db, #tree{} = Tree, [{K, V} | Rest], Fun, Acc0, Options) -> | |
case Fun({visit, K, V}, Acc0) of | |
{ok, Acc1} -> | |
fold(Db, Tree, Rest, Fun, Acc1, Options); | |
{stop, Acc1} -> | |
{stop, Acc1} | |
end; | |
fold(Db, #tree{} = Tree, [{F, L, P, R} | Rest], Fun, Acc0, Options) -> | |
case Fun({traverse, F, L, R}, Acc0) of | |
{ok, Acc1} -> | |
Node = get_node(Db, Tree, P), | |
case fold(Db, Tree, Node, Fun, Acc1, Options) of | |
{ok, Acc2} -> | |
fold(Db, Tree, Rest, Fun, Acc2, Options); | |
{stop, Acc2} -> | |
{stop, Acc2} | |
end; | |
{skip, Acc1} -> | |
fold(Db, Tree, Rest, Fun, Acc1, Options); | |
{stop, Acc1} -> | |
{stop, Acc1} | |
end. | |
%% @doc Calculate the final reduce value for the whole ebtree. | |
%% @param Db An erlfdb database or transaction. | |
%% @param Tree the ebtree. | |
%% @returns the final reduce value | |
-spec full_reduce(Db :: term(), Tree :: #tree{}) -> term(). | |
full_reduce(Db, #tree{} = Tree) -> | |
Fun = fun | |
({visit, K, V}, {MapAcc, ReduceAcc}) -> | |
{ok, {[{K, V} | MapAcc], ReduceAcc}}; | |
({traverse, _F, _L, R}, {MapAcc, ReduceAcc}) -> | |
{skip, {MapAcc, [R | ReduceAcc]}} | |
end, | |
{MapValues, ReduceValues} = fold(Db, Tree, Fun, {[], []}, []), | |
do_reduce(Tree, MapValues, ReduceValues). | |
%% @doc Calculate the reduce value for all keys in the specified range. | |
%% @param Db An erlfdb database or transaction. | |
%% @param Tree The ebtree. | |
%% @param StartKey The beginning of the range | |
%% @param EndKey The end of the range | |
%% @returns the reduce value for the specified range | |
-spec reduce(Db :: term(), Tree :: #tree{}, StartKey :: term(), EndKey :: term()) -> term(). | |
reduce(Db, #tree{} = Tree, StartKey, EndKey) -> | |
Fun = fun | |
({visit, Key, Value}, {MapAcc, ReduceAcc}) -> | |
AfterEnd = greater_than(Tree, Key, EndKey), | |
InRange = greater_than_or_equal(Tree, Key, StartKey) andalso less_than_or_equal(Tree, Key, EndKey), | |
if | |
AfterEnd -> | |
{stop, {MapAcc, ReduceAcc}}; | |
InRange -> | |
{ok, {[{Key, Value} | MapAcc], ReduceAcc}}; | |
true -> | |
{ok, {MapAcc, ReduceAcc}} | |
end; | |
({traverse, FirstKey, LastKey, Reduction}, {MapAcc, ReduceAcc}) -> | |
BeforeStart = less_than(Tree, LastKey, StartKey), | |
AfterEnd = greater_than(Tree, FirstKey, EndKey), | |
Whole = greater_than_or_equal(Tree, FirstKey, StartKey) andalso less_than_or_equal(Tree, LastKey, EndKey), | |
if | |
BeforeStart -> | |
{skip, {MapAcc, ReduceAcc}}; | |
AfterEnd -> | |
{stop, {MapAcc, ReduceAcc}}; | |
Whole -> | |
{skip, {MapAcc, [Reduction | ReduceAcc]}}; | |
true -> | |
{ok, {MapAcc, ReduceAcc}} | |
end | |
end, | |
{MapValues, ReduceValues} = fold(Db, Tree, Fun, {[], []}, []), | |
do_reduce(Tree, MapValues, ReduceValues). | |
do_reduce(#tree{} = Tree, [], ReduceValues) when is_list(ReduceValues) -> | |
reduce_values(Tree, ReduceValues, true); | |
do_reduce(#tree{} = Tree, MapValues, ReduceValues) when is_list(MapValues), is_list(ReduceValues) -> | |
do_reduce(Tree, [], [reduce_values(Tree, MapValues, false) | ReduceValues]). | |
%% @equiv group_reduce(Db, Tree, StartKey, EndKey, GroupKeyFun, UserAccFun, UserAcc0, []) | |
-spec group_reduce( | |
Db :: term(), | |
Tree :: #tree{}, | |
StartKey :: term(), | |
EndKey :: term(), | |
GroupKeyFun :: fun((term()) -> group_key()), | |
UserAccFun :: fun(({group_key(), GroupValue :: term()}, Acc0 :: term()) -> Acc1 :: term()), | |
UserAcc0 :: term()) -> Acc1 :: term(). | |
group_reduce(Db, #tree{} = Tree, StartKey, EndKey, GroupKeyFun, UserAccFun, UserAcc0) -> | |
group_reduce(Db, Tree, StartKey, EndKey, GroupKeyFun, UserAccFun, UserAcc0, []). | |
%% @doc Calculate the reduce value for all groups in the specified range. | |
%% @param Db An erlfdb database or transaction. | |
%% @param Tree The ebtree. | |
%% @param StartKey The beginning of the range | |
%% @param EndKey The end of the range | |
%% @param GroupKeyFun A function that takes a key as a parameter and returns the group key. | |
%% @param UserAccFun A function called when a new group reduction is calculated and returns an acc. | |
%% @param UserAcc0 The initial accumulator. | |
%% @param Options Currently supported options are [{dir, fwd}] and [{dir, rev}] | |
%% @returns the final accumulator. | |
-type group_key() :: term(). | |
-spec group_reduce( | |
Db :: term(), | |
Tree :: #tree{}, | |
StartKey :: term(), | |
EndKey :: term(), | |
GroupKeyFun :: fun((term()) -> group_key()), | |
UserAccFun :: fun(({group_key(), GroupValue :: term()}, Acc0 :: term()) -> Acc1 :: term()), | |
UserAcc0 :: term(), | |
Options :: [fold_option()]) -> Acc1 :: term(). | |
group_reduce(Db, #tree{} = Tree, StartKey, EndKey, GroupKeyFun, UserAccFun, UserAcc0, Options) -> | |
Dir = proplists:get_value(dir, Options, fwd), | |
NoGroupYet = ?MIN, | |
Fun = fun | |
({visit, Key, Value}, {CurrentGroup, UserAcc, MapAcc, ReduceAcc}) -> | |
BeforeStart = less_than(Tree, Key, StartKey), | |
AfterEnd = greater_than(Tree, Key, EndKey), | |
InRange = in_range(Tree, StartKey, Key, EndKey), | |
KeyGroup = GroupKeyFun(Key), | |
SameGroup = CurrentGroup =:= KeyGroup, | |
if | |
Dir == fwd andalso AfterEnd -> | |
{stop, {CurrentGroup, UserAcc, MapAcc, ReduceAcc}}; | |
Dir == rev andalso BeforeStart -> | |
{stop, {CurrentGroup, UserAcc, MapAcc, ReduceAcc}}; | |
SameGroup -> | |
{ok, {CurrentGroup, UserAcc, [{Key, Value} | MapAcc], ReduceAcc}}; | |
InRange andalso CurrentGroup =:= NoGroupYet -> | |
{ok, {KeyGroup, UserAcc, [{Key, Value}], []}}; | |
InRange -> | |
%% implicit end of current group and start of a new one | |
GroupValue = do_reduce(Tree, MapAcc, ReduceAcc), | |
{ok, {KeyGroup, UserAccFun({CurrentGroup, GroupValue}, UserAcc), [{Key, Value}], []}}; | |
true -> | |
{ok, {CurrentGroup, UserAcc, MapAcc, ReduceAcc}} | |
end; | |
({traverse, FirstKey, LastKey, Reduction}, {CurrentGroup, UserAcc, MapAcc, ReduceAcc}) -> | |
BeforeStart = less_than(Tree, LastKey, StartKey), | |
AfterEnd = greater_than(Tree, FirstKey, EndKey), | |
Whole = CurrentGroup =:= GroupKeyFun(FirstKey) andalso CurrentGroup =:= GroupKeyFun(LastKey), | |
FirstInRange = in_range(Tree, StartKey, FirstKey, EndKey), | |
LastInRange = in_range(Tree, StartKey, LastKey, EndKey), | |
if | |
Dir == fwd andalso BeforeStart -> | |
{skip, {CurrentGroup, UserAcc, MapAcc, ReduceAcc}}; | |
Dir == rev andalso AfterEnd -> | |
{skip, {CurrentGroup, UserAcc, MapAcc, ReduceAcc}}; | |
Dir == fwd andalso AfterEnd -> | |
{stop, {CurrentGroup, UserAcc, MapAcc, ReduceAcc}}; | |
Dir == rev andalso BeforeStart -> | |
{stop, {CurrentGroup, UserAcc, MapAcc, ReduceAcc}}; | |
Whole andalso FirstInRange andalso LastInRange -> | |
{skip, {CurrentGroup, UserAcc, MapAcc, [Reduction | ReduceAcc]}}; | |
true -> | |
{ok, {CurrentGroup, UserAcc, MapAcc, ReduceAcc}} | |
end | |
end, | |
{CurrentGroup, UserAcc1, MapValues, ReduceValues} = fold(Db, Tree, Fun, {NoGroupYet, UserAcc0, [], []}, Options), | |
if | |
MapValues /= [] orelse ReduceValues /= [] -> | |
FinalGroup = do_reduce(Tree, MapValues, ReduceValues), | |
UserAccFun({CurrentGroup, FinalGroup}, UserAcc1); | |
true -> | |
UserAcc1 | |
end. | |
%% @doc Finds all key-value pairs for the specified range in forward order. | |
%% @param Db An erlfdb database or transaction. | |
%% @param Tree The ebtree. | |
%% @param StartKey The beginning of the range | |
%% @param EndKey The end of the range | |
%% @param AccFun A function that is called when a key-value pair is found, returning an accumulator. | |
%% @param Acc0 The initial accumulator | |
%% @returns the final accumulator | |
-spec range(Db :: term(), Tree :: #tree{}, StartKey :: term(), EndKey :: term(), | |
AccFun :: fun(), Acc0 :: term()) -> term(). | |
range(Db, #tree{} = Tree, StartKey, EndKey, AccFun, Acc0) -> | |
erlfdb:transactional(Db, fun(Tx) -> | |
range(Tx, Tree, get_node(Tx, Tree, ?NODE_ROOT_ID), StartKey, EndKey, AccFun, Acc0) | |
end). | |
range(Tx, #tree{} = Tree, #node{level = 0} = Node, StartKey, EndKey, AccFun, Acc0) -> | |
InRange = [{K, V} || {K, V} <- Node#node.members, | |
less_than_or_equal(Tree, StartKey, K), less_than_or_equal(Tree, K, EndKey)], | |
Acc1 = AccFun(InRange, Acc0), | |
LastKey = last_key(Node), | |
case Node#node.next /= undefined andalso less_than_or_equal(Tree, LastKey, EndKey) of | |
true -> | |
range(Tx, Tree, get_node(Tx, Tree, Node#node.next), StartKey, EndKey, AccFun, Acc1); | |
false -> | |
Acc1 | |
end; | |
range(Tx, #tree{} = Tree, #node{} = Node, StartKey, EndKey, AccFun, Acc) -> | |
ChildId = find_child_id(Tree, Node, StartKey), | |
range(Tx, Tree, get_node(Tx, Tree, ChildId), StartKey, EndKey, AccFun, Acc). | |
%% @doc Finds all key-value pairs for the specified range in reverse order. | |
%% @param Db An erlfdb database or transaction. | |
%% @param Tree The ebtree. | |
%% @param StartKey The beginning of the range | |
%% @param EndKey The end of the range | |
%% @param AccFun A function that is called when a key-value pair is found, returning an accumulator. | |
%% @param Acc0 The initial accumulator | |
%% @returns the final accumulator | |
-spec reverse_range(Db :: term(), Tree :: #tree{}, StartKey :: term(), EndKey :: term(), | |
AccFun :: fun(), Acc0 :: term()) -> term(). | |
reverse_range(Db, #tree{} = Tree, StartKey, EndKey, AccFun, Acc0) -> | |
erlfdb:transactional(Db, fun(Tx) -> | |
reverse_range(Tx, Tree, get_node(Tx, Tree, ?NODE_ROOT_ID), StartKey, EndKey, AccFun, Acc0) | |
end). | |
reverse_range(Tx, #tree{} = Tree, #node{level = 0} = Node, StartKey, EndKey, AccFun, Acc0) -> | |
InRange = [{K, V} || {K, V} <- Node#node.members, | |
less_than_or_equal(Tree, StartKey, K), less_than_or_equal(Tree, K, EndKey)], | |
Acc1 = AccFun(lists:reverse(InRange), Acc0), | |
FirstKey = first_key(Node), | |
case Node#node.prev /= undefined andalso less_than_or_equal(Tree, StartKey, FirstKey) of | |
true -> | |
reverse_range(Tx, Tree, get_node(Tx, Tree, Node#node.prev), StartKey, EndKey, AccFun, Acc1); | |
false -> | |
Acc1 | |
end; | |
reverse_range(Tx, #tree{} = Tree, #node{} = Node, StartKey, EndKey, AccFun, Acc) -> | |
ChildId = find_child_id(Tree, Node, EndKey), | |
reverse_range(Tx, Tree, get_node(Tx, Tree, ChildId), StartKey, EndKey, AccFun, Acc). | |
%% @doc Inserts or updates a value in the ebtree | |
%% @param Db An erlfdb database or transaction. | |
%% @param Tree The ebtree. | |
%% @param Key The key to store the value under. | |
%% @param Value The value to store. | |
%% @returns the tree. | |
-spec insert(Db :: term(), Tree :: #tree{}, Key :: term(), Value :: term()) -> #tree{}. | |
insert(_Db, #tree{} = _Tree, ?MIN, _Value) -> | |
erlang:error(min_not_allowed); | |
insert(_Db, #tree{} = _Tree, ?MAX, _Value) -> | |
erlang:error(max_not_allowed); | |
insert(Db, #tree{} = Tree, Key, Value) -> | |
erlfdb:transactional(Db, fun(Tx) -> | |
Root0 = get_node(Tx, Tree, ?NODE_ROOT_ID), | |
case ?is_full(Tree, Root0) of | |
true -> | |
OldRoot = Root0#node{id = new_node_id(Tx, Tree)}, | |
FirstKey = first_key(OldRoot), | |
LastKey = last_key(OldRoot), | |
Root1 = #node{ | |
id = ?NODE_ROOT_ID, | |
level = Root0#node.level + 1, | |
members = [{FirstKey, LastKey, OldRoot#node.id, []}]}, | |
{Root2, _, _} = split_child(Tx, Tree, Root1, OldRoot), | |
insert_nonfull(Tx, Tree, Root2, Key, Value); | |
false -> | |
insert_nonfull(Tx, Tree, Root0, Key, Value) | |
end | |
end), | |
Tree. | |
split_child(Tx, #tree{} = Tree, #node{} = Parent0, #node{} = Child) -> | |
{LeftMembers, RightMembers} = lists:split(Tree#tree.min, Child#node.members), | |
LeftId = new_node_id(Tx, Tree), | |
RightId = new_node_id(Tx, Tree), | |
LeftChild = remove_pointers_if_not_leaf(#node{ | |
id = LeftId, | |
level = Child#node.level, | |
prev = Child#node.prev, | |
next = RightId, | |
members = LeftMembers | |
}), | |
RightChild = remove_pointers_if_not_leaf(#node{ | |
id = RightId, | |
level = Child#node.level, | |
prev = LeftId, | |
next = Child#node.next, | |
members = RightMembers | |
}), | |
update_prev_neighbour(Tx, Tree, LeftChild), | |
update_next_neighbour(Tx, Tree, RightChild), | |
%% adjust parent members | |
FirstLeftKey = first_key(LeftMembers), | |
LastLeftKey = last_key(LeftMembers), | |
FirstRightKey = first_key(RightMembers), | |
LastRightKey = last_key(RightMembers), | |
%% adjust parent reductions | |
LeftReduction = reduce_node(Tree, LeftChild), | |
RightReduction = reduce_node(Tree, RightChild), | |
Parent1 = Parent0#node{ | |
members = | |
umerge(Tree, [{FirstLeftKey, LastLeftKey, LeftId, LeftReduction}], | |
umerge(Tree, [{FirstRightKey, LastRightKey, RightId, RightReduction}], | |
lists:keydelete(Child#node.id, 3, Parent0#node.members))) | |
}, | |
clear_node(Tx, Tree, Child), | |
set_nodes(Tx, Tree, [LeftChild, RightChild, Parent1]), | |
{Parent1, LeftChild, RightChild}. | |
update_prev_neighbour(_Tx, #tree{} = _Tree, #node{prev = undefined} = _Node) -> | |
ok; | |
update_prev_neighbour(Tx, #tree{} = Tree, #node{} = Node) -> | |
Left = get_node(Tx, Tree, Node#node.prev), | |
set_node(Tx, Tree, Left#node{next = Node#node.id}). | |
update_next_neighbour(_Tx, #tree{} = _Tree, #node{next = undefined} = _Node) -> | |
ok; | |
update_next_neighbour(Tx, #tree{} = Tree, #node{} = Node) -> | |
Left = get_node(Tx, Tree, Node#node.next), | |
set_node(Tx, Tree, Left#node{prev = Node#node.id}). | |
insert_nonfull(Tx, #tree{} = Tree, #node{level = 0} = Node0, Key, Value) -> | |
Node1 = Node0#node{ | |
members = umerge(Tree, [{Key, Value}], Node0#node.members) | |
}, | |
set_node(Tx, Tree, Node0, Node1), | |
reduce_node(Tree, Node1); | |
insert_nonfull(Tx, #tree{} = Tree, #node{} = Node0, Key, Value) -> | |
ChildId0 = find_child_id(Tree, Node0, Key), | |
Child0 = get_node(Tx, Tree, ChildId0), | |
{Node1, Child1} = case ?is_full(Tree, Child0) of | |
true -> | |
{Parent, LeftChild, RightChild} = split_child(Tx, Tree, Node0, Child0), | |
ChildId = find_child_id(Tree, Parent, Key), | |
Child = if | |
ChildId =:= LeftChild#node.id -> | |
LeftChild; | |
ChildId =:= RightChild#node.id -> | |
RightChild | |
end, | |
{Parent, Child}; | |
false -> | |
{Node0, Child0} | |
end, | |
ChildId1 = Child1#node.id, | |
NewReduction = insert_nonfull(Tx, Tree, Child1, Key, Value), | |
{CurrentFirstKey, CurrentLastKey, ChildId1, _OldReduction} = lists:keyfind(ChildId1, 3, Node1#node.members), | |
[NewFirstKey, _] = sort(Tree, [Key, CurrentFirstKey]), | |
[_, NewLastKey] = sort(Tree, [Key, CurrentLastKey]), | |
Node2 = Node1#node{ | |
members = lists:keyreplace(ChildId1, 3, Node1#node.members, | |
{NewFirstKey, NewLastKey, ChildId1, NewReduction}) | |
}, | |
set_node(Tx, Tree, Node0, Node2), | |
reduce_node(Tree, Node2). | |
%% @doc Deletes an entry from the ebtree | |
%% @param Db An erlfdb database or transaction. | |
%% @param Tree The ebtree. | |
%% @param Key The key of the entry to delete. | |
%% @returns the tree. | |
-spec delete(Db :: term(), Tree :: #tree{}, Key :: term()) -> #tree{}. | |
delete(Db, #tree{} = Tree, Key) -> | |
erlfdb:transactional(Db, fun(Tx) -> | |
Root0 = get_node(Tx, Tree, ?NODE_ROOT_ID), | |
case delete(Tx, Tree, Root0, Key) of | |
% if only one child, make it the new root. | |
#node{level = L, members = [_]} = Root1 when L > 0 -> | |
[{_, _, ChildId, _}] = Root1#node.members, | |
Root2 = get_node(Tx, Tree, ChildId), | |
clear_node(Tx, Tree, Root2), | |
set_node(Tx, Tree, Root2#node{id = ?NODE_ROOT_ID}); | |
Root1 -> | |
set_node(Tx, Tree, Root0, Root1) | |
end | |
end), | |
Tree. | |
delete(_Tx, #tree{} = _Tree, #node{level = 0} = Node, Key) -> | |
Node#node{ | |
members = lists:keydelete(Key, 1, Node#node.members) | |
}; | |
delete(Tx, #tree{} = Tree, #node{} = Parent0, Key) -> | |
ChildId0 = find_child_id(Tree, Parent0, Key), | |
Child0 = get_node(Tx, Tree, ChildId0), | |
Child1 = delete(Tx, Tree, Child0, Key), | |
case ?underflow(Tree, Child1) of | |
true -> | |
SiblingId = find_sibling_id(Tree, Parent0, ChildId0, Key), | |
Sibling = get_node(Tx, Tree, SiblingId), | |
NewNodes = case ?at_min(Tree, Sibling) of | |
true -> | |
Merged = merge(Tx, Tree, Child1, Sibling), | |
update_prev_neighbour(Tx, Tree, Merged), | |
update_next_neighbour(Tx, Tree, Merged), | |
[Merged]; | |
false -> | |
{Left, Right} = rebalance(Tx, Tree, Child1, Sibling), | |
update_prev_neighbour(Tx, Tree, Left), | |
update_next_neighbour(Tx, Tree, Right), | |
[Left, Right] | |
end, | |
%% remove old members and insert new members | |
Members0 = Parent0#node.members, | |
Members1 = lists:keydelete(ChildId0, 3, Members0), | |
Members2 = lists:keydelete(Sibling#node.id, 3, Members1), | |
Members3 = lists:foldl(fun(N, Acc) -> | |
umerge(Tree, [{first_key(N), last_key(N), N#node.id, reduce_node(Tree, N)}], Acc) | |
end, Members2, NewNodes), | |
Parent1 = Parent0#node{ | |
%% TODO change id | |
members = Members3 | |
}, | |
clear_nodes(Tx, Tree, [Child0, Sibling]), | |
set_nodes(Tx, Tree, NewNodes), | |
Parent1; | |
false -> | |
set_node(Tx, Tree, Child0, Child1), | |
{_OldFirstKey, _OldLastKey, ChildId0, _OldReduction} = lists:keyfind(ChildId0, 3, Parent0#node.members), | |
Parent0#node{ | |
members = lists:keyreplace(ChildId0, 3, Parent0#node.members, | |
{first_key(Child1), last_key(Child1), Child1#node.id, reduce_node(Tree, Child1)}) | |
} | |
end. | |
merge(Tx, #tree{} = Tree, #node{level = Level} = Node1, #node{level = Level} = Node2) -> | |
[Left, Right] = sort_nodes(Tree, [Node1, Node2]), | |
#node{ | |
id = new_node_id(Tx, Tree), | |
level = Level, | |
prev = Left#node.prev, | |
next = Right#node.next, | |
members = lists:append(Left#node.members, Right#node.members) | |
}. | |
rebalance(Tx, #tree{} = Tree, #node{level = Level} = Node1, #node{level = Level} = Node2) -> | |
[Left0, Right0] = sort_nodes(Tree, [Node1, Node2]), | |
Members = lists:append(Left0#node.members, Right0#node.members), | |
{LeftMembers, RightMembers} = lists:split(length(Members) div 2, Members), | |
Left1Id = new_node_id(Tx, Tree), | |
Right1Id = new_node_id(Tx, Tree), | |
Left1 = remove_pointers_if_not_leaf(Left0#node{ | |
id = Left1Id, | |
next = Right1Id, | |
members = LeftMembers | |
}), | |
Right1 = remove_pointers_if_not_leaf(Right0#node{ | |
id = Right1Id, | |
prev = Left1Id, | |
members = RightMembers | |
}), | |
{Left1, Right1}. | |
%% lookup functions | |
find_child_id(#tree{} = Tree, #node{} = Node, Key) -> | |
element(3, find_child(Tree, Node, Key)). | |
find_sibling_id(#tree{} = Tree, #node{level = L} = Node0, Id, Key) when L > 0 -> | |
Node1 = Node0#node{members = lists:keydelete(Id, 3, Node0#node.members)}, | |
find_child_id(Tree, Node1, Key). | |
find_child(#tree{} = Tree, #node{level = L} = Node, Key) when L > 0 -> | |
find_child_int(Tree, Node#node.members, Key). | |
find_child_int(#tree{} = _Tree, [Child], _Key) -> | |
Child; | |
find_child_int(#tree{} = Tree, [{_F, L, _P, _R} = Child| Rest], Key) -> | |
case less_than_or_equal(Tree, Key, L) of | |
true -> | |
Child; | |
false -> | |
find_child_int(Tree, Rest, Key) | |
end. | |
%% metadata functions | |
get_meta(Tx, #tree{} = Tree, MetaKey) -> | |
#tree{prefix = Prefix, encode_fun = EncodeFun} = Tree, | |
Key = meta_key(Prefix, MetaKey), | |
Future = erlfdb:get(Tx, Key), | |
case erlfdb:wait(Future) of | |
not_found -> | |
not_found; | |
Bin when is_binary(Bin) -> | |
EncodeFun(decode, Key, Bin) | |
end. | |
set_meta(Tx, #tree{} = Tree, MetaKey, MetaValue) -> | |
#tree{prefix = Prefix, encode_fun = EncodeFun} = Tree, | |
Key = meta_key(Prefix, MetaKey), | |
erlfdb:set( | |
Tx, | |
Key, | |
EncodeFun(encode, Key, MetaValue) | |
). | |
meta_key(Prefix, MetaKey) when is_binary(Prefix) -> | |
erlfdb_tuple:pack({?META, MetaKey}, Prefix). | |
%% node persistence functions | |
get_node(Tx, #tree{} = Tree, Id) -> | |
Key = node_key(Tree#tree.prefix, Id), | |
Future = erlfdb:get(Tx, Key), | |
Value = erlfdb:wait(Future), | |
decode_node(Tree, Id, Key, Value). | |
clear_nodes(Tx, #tree{} = Tree, Nodes) -> | |
lists:foreach(fun(Node) -> | |
clear_node(Tx, Tree, Node) | |
end, Nodes). | |
clear_node(Tx, #tree{} = Tree, #node{} = Node) -> | |
Key = node_key(Tree#tree.prefix, Node#node.id), | |
erlfdb:clear(Tx, Key). | |
set_nodes(Tx, #tree{} = Tree, Nodes) -> | |
lists:foreach(fun(Node) -> | |
set_node(Tx, Tree, Node) | |
end, Nodes). | |
set_node(_Tx, #tree{} = _Tree, #node{} = Same, #node{} = Same) -> | |
ok; | |
set_node(Tx, #tree{} = Tree, #node{} = _From, #node{} = To) -> | |
set_node(Tx, Tree, To). | |
set_node(Tx, #tree{} = Tree, #node{} = Node) -> | |
validate_node(Tree, Node), | |
Key = node_key(Tree#tree.prefix, Node#node.id), | |
Value = encode_node(Tree, Key, Node), | |
erlfdb:set(Tx, Key, Value). | |
node_key(Prefix, Id) when is_binary(Prefix), is_integer(Id) -> | |
erlfdb_tuple:pack({?NODE, Id}, Prefix). | |
%% @doc Walks the whole tree and checks it for consistency. | |
%% It also prints it to screen. | |
validate_tree(Db, #tree{} = Tree) -> | |
erlfdb:transactional(Db, fun(Tx) -> | |
Root = get_node(Db, Tree, ?NODE_ROOT_ID), | |
validate_tree(Tx, Tree, Root) | |
end). | |
validate_tree(_Tx, #tree{} = Tree, #node{level = 0} = Node) -> | |
print_node(Node), | |
validate_node(Tree, Node); | |
validate_tree(Tx, #tree{} = Tree, #node{} = Node) -> | |
print_node(Node), | |
validate_node(Tree, Node), | |
validate_tree(Tx, Tree, Node#node.members); | |
validate_tree(_Tx, #tree{} = _Tree, []) -> | |
ok; | |
validate_tree(Tx, #tree{} = Tree, [{_F, _L, P, _R} | Rest]) -> | |
Node = get_node(Tx, Tree, P), | |
validate_tree(Tx, Tree, Node), | |
validate_tree(Tx, Tree, Rest). | |
validate_node(#tree{} = Tree, #node{} = Node) -> | |
NumKeys = length(Node#node.members), | |
IsLeaf = Node#node.level =:= 0, | |
IsRoot = ?NODE_ROOT_ID == Node#node.id, | |
OutOfOrder = Node#node.members /= sort_nodes(Tree, Node#node.members), | |
Duplicates = Node#node.members /= usort_nodes(Tree, Node#node.members), | |
if | |
Node#node.id == undefined -> | |
erlang:error({node_without_id, Node}); | |
not IsRoot andalso NumKeys < Tree#tree.min -> | |
erlang:error({too_few_keys, Node}); | |
NumKeys > Tree#tree.max -> | |
erlang:error({too_many_keys, Node}); | |
not IsLeaf andalso Node#node.prev /= undefined -> | |
erlang:error({non_leaf_with_prev, Node}); | |
not IsLeaf andalso Node#node.next /= undefined -> | |
erlang:error({non_leaf_with_next, Node}); | |
OutOfOrder -> | |
erlang:error({out_of_order, Node}); | |
Duplicates -> | |
erlang:error({duplicates, Node}); | |
true -> | |
ok | |
end. | |
%% data marshalling functions (encodes unnecesary fields as a NIL_REF) | |
encode_node(#tree{} = Tree, Key, #node{prev = undefined} = Node) -> | |
encode_node(Tree, Key, Node#node{prev = []}); | |
encode_node(#tree{} = Tree, Key, #node{next = undefined} = Node) -> | |
encode_node(Tree, Key, Node#node{next = []}); | |
encode_node(#tree{} = Tree, Key, #node{} = Node) -> | |
#tree{encode_fun = EncodeFun} = Tree, | |
EncodeFun(encode, Key, Node#node{id = []}). | |
decode_node(#tree{} = Tree, Id, Key, Value) when is_binary(Value) -> | |
#tree{encode_fun = EncodeFun} = Tree, | |
Term = EncodeFun(decode, Key, Value), | |
decode_node(Id, Term). | |
decode_node(Id, #node{prev = []} = Node) -> | |
decode_node(Id, Node#node{prev = undefined}); | |
decode_node(Id, #node{next = []} = Node) -> | |
decode_node(Id, Node#node{next = undefined}); | |
decode_node(Id, #node{} = Node) -> | |
Node#node{id = Id}. | |
%% built-in reduce functions. | |
reduce_noop(_KVs, _Rereduce) -> | |
[]. | |
reduce_node(#tree{} = Tree, #node{level = 0} = Node) -> | |
reduce_values(Tree, Node#node.members, false); | |
reduce_node(#tree{} = Tree, #node{} = Node) -> | |
Rs = [R || {_F, _L, _P, R} <- Node#node.members], | |
reduce_values(Tree, Rs, true). | |
reduce_values(#tree{} = Tree, Values, Rereduce) when is_list(Values) -> | |
#tree{reduce_fun = ReduceFun} = Tree, | |
ReduceFun(Values, Rereduce). | |
%% collation functions | |
in_range(#tree{} = Tree, StartOfRange, Key, EndOfRange) -> | |
greater_than_or_equal(Tree, Key, StartOfRange) andalso less_than_or_equal(Tree, Key, EndOfRange). | |
greater_than(#tree{} = Tree, A, B) -> | |
not less_than_or_equal(Tree, A, B). | |
greater_than_or_equal(#tree{} = _Tree, A, A) -> | |
true; | |
greater_than_or_equal(#tree{} = Tree, A, B) -> | |
greater_than(Tree, A, B). | |
less_than(#tree{} = _Tree, A, A) -> | |
false; | |
less_than(#tree{} = Tree, A, B) -> | |
less_than_or_equal(Tree, A, B). | |
less_than_or_equal(#tree{} = _Tree, ?MIN, _B) -> | |
true; | |
less_than_or_equal(#tree{} = _Tree, _A, ?MIN) -> | |
false; | |
less_than_or_equal(#tree{} = _Tree, ?MAX, _B) -> | |
false; | |
less_than_or_equal(#tree{} = _Tree, _A, ?MAX) -> | |
true; | |
less_than_or_equal(#tree{} = Tree, A, B) -> | |
#tree{collate_fun = CollateFun} = Tree, | |
CollateFun(A, B). | |
umerge(#tree{} = Tree, List1, List2) -> | |
#tree{collate_fun = CollateFun} = Tree, | |
CollateWrapper = fun | |
({K1, _V1}, {K2, _V2}) -> | |
CollateFun(K1, K2); | |
({_F1, L1, _V1, _R1}, {_F2, L2, _V2, _R2}) -> | |
CollateFun(L1, L2) | |
end, | |
lists:umerge(CollateWrapper, List1, List2). | |
sort(#tree{} = Tree, List) -> | |
#tree{collate_fun = CollateFun} = Tree, | |
lists:sort(CollateFun, List). | |
sort_nodes(#tree{} = Tree, List) -> | |
#tree{collate_fun = CollateFun} = Tree, | |
CollateWrapper = fun | |
(#node{} = N1, #node{} = N2) -> | |
CollateFun(first_key(N1), first_key(N2)); | |
({K1, _V1}, {K2, _V2}) -> | |
CollateFun(K1, K2); | |
({_F1, L1, _V1, _R1}, {_F2, L2, _V2, _R2}) -> | |
CollateFun(L1, L2) | |
end, | |
lists:sort(CollateWrapper, List). | |
usort_nodes(#tree{} = Tree, List) -> | |
#tree{collate_fun = CollateFun} = Tree, | |
CollateWrapper = fun | |
(#node{} = N1, #node{} = N2) -> | |
CollateFun(first_key(N1), first_key(N2)); | |
({K1, _V1}, {K2, _V2}) -> | |
CollateFun(K1, K2); | |
({_F1, L1, _V1, _R1}, {_F2, L2, _V2, _R2}) -> | |
CollateFun(L1, L2) | |
end, | |
lists:usort(CollateWrapper, List). | |
collate_raw(K1, K2) -> | |
K1 =< K2. | |
%% encoding function | |
encode_erlang(encode, _Key, Value) -> | |
term_to_binary(Value, [compressed, {minor_version, 2}]); | |
encode_erlang(decode, _Key, Value) -> | |
binary_to_term(Value, [safe]). | |
%% private functions | |
init_order(#tree{} = Tree, Order) | |
when is_integer(Order), Order > 2, Order rem 2 == 0 -> | |
Tree#tree{ | |
min = Order div 2, | |
max = Order | |
}. | |
first_key(#node{} = Node) -> | |
first_key(Node#node.members); | |
first_key(Members) when is_list(Members) -> | |
element(1, hd(Members)). | |
last_key(#node{} = Node) -> | |
last_key(Node#node.members); | |
last_key(Members) when is_list(Members) -> | |
case lists:last(Members) of | |
{K, _V} -> | |
K; | |
{_F, L, _P, _R} -> | |
L | |
end. | |
new_node_id(Tx, Tree) -> | |
NextId = get_meta(Tx, Tree, ?META_NEXT_ID), | |
set_meta(Tx, Tree, ?META_NEXT_ID, NextId + 1), | |
NextId. | |
%% remove prev/next pointers for nonleaf nodes | |
remove_pointers_if_not_leaf(#node{level = 0} = Node) -> | |
Node; | |
remove_pointers_if_not_leaf(#node{} = Node) -> | |
Node#node{prev = undefined, next = undefined}. | |
print_node(#node{} = Node) -> | |
io:format("#node{id = ~w, level = ~w, prev = ~w, next = ~w, members = ~w}~n~n", | |
[Node#node.id, Node#node.level, Node#node.prev, Node#node.next, Node#node.members]). | |
%% tests | |
-ifdef(TEST). | |
-include_lib("eunit/include/eunit.hrl"). | |
reduce_sum(KVs, false) -> | |
{_, Vs} = lists:unzip(KVs), | |
lists:sum(Vs); | |
reduce_sum(Rs, true) -> | |
lists:sum(Rs). | |
reduce_count(KVs, false) -> | |
length(KVs); | |
reduce_count(Rs, true) -> | |
lists:sum(Rs). | |
reduce_stats(KVs, false) -> | |
{_, Vs} = lists:unzip(KVs), | |
{ | |
lists:sum(Vs), | |
lists:min(Vs), | |
lists:max(Vs), | |
length(Vs), | |
lists:sum([V * V || V <- Vs]) | |
}; | |
reduce_stats(Rs, true) -> | |
lists:foldl( | |
fun({Sum, Min, Max, Count, SumSqr}, | |
{SumAcc, MinAcc, MaxAcc, CountAcc, SumSqrAcc}) -> | |
{ | |
Sum + SumAcc, | |
erlang:min(Min, MinAcc), | |
erlang:max(Max, MaxAcc), | |
Count + CountAcc, | |
SumSqr + SumSqrAcc | |
} end, hd(Rs), tl(Rs)). | |
collation_fun_test_() -> | |
Tree = #tree{collate_fun = fun collate_raw/2}, | |
[ | |
?_test(?assert(greater_than(Tree, 4, 3))), | |
?_test(?assertNot(greater_than(Tree, 3, 4))), | |
?_test(?assert(greater_than_or_equal(Tree, 3, 3))), | |
?_test(?assert(greater_than_or_equal(Tree, 3, 3))), | |
?_test(?assert(less_than(Tree, 3, 4))), | |
?_test(?assertNot(less_than(Tree, 3, 3))), | |
?_test(?assertNot(less_than(Tree, 4, 3))), | |
?_test(?assert(less_than_or_equal(Tree, 3, 3))), | |
?_test(?assert(less_than_or_equal(Tree, 3, 4))), | |
?_test(?assertNot(less_than_or_equal(Tree, 4, 3))) | |
]. | |
lookup_test() -> | |
Db = erlfdb_util:get_test_db([empty]), | |
Tree = open(Db, <<1,2,3>>, 4), | |
Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, 100)])], | |
lists:foreach(fun(Key) -> insert(Db, Tree, Key, Key + 1) end, Keys), | |
lists:foreach(fun(Key) -> ?assertEqual({Key, Key + 1}, lookup(Db, Tree, Key)) end, Keys), | |
?assertEqual(false, lookup(Db, Tree, 101)). | |
delete_test() -> | |
Db = erlfdb_util:get_test_db([empty]), | |
Tree = open(Db, <<1,2,3>>, 4), | |
Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, 100)])], | |
lists:foreach(fun(Key) -> insert(Db, Tree, Key, Key + 1) end, Keys), | |
lists:foreach(fun(Key) -> ?assertEqual({Key, Key + 1}, lookup(Db, Tree, Key)) end, Keys), | |
lists:foreach(fun(Key) -> delete(Db, Tree, Key) end, Keys), | |
lists:foreach(fun(Key) -> ?assertEqual(false, lookup(Db, Tree, Key)) end, Keys). | |
range_after_delete_test() -> | |
Db = erlfdb_util:get_test_db([empty]), | |
Tree = open(Db, <<1,2,3>>, 4), | |
Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, 100)])], | |
lists:foreach(fun(Key) -> insert(Db, Tree, Key, Key + 1) end, Keys), | |
lists:foreach(fun(Key) -> ?assertEqual({Key, Key + 1}, lookup(Db, Tree, Key)) end, Keys), | |
lists:foreach(fun(Key) -> delete(Db, Tree, Key) end, lists:seq(1, 100, 2)), | |
?assertEqual(50, range(Db, Tree, 1, 100, fun(E, A) -> length(E) + A end, 0)), | |
?assertEqual(50, reverse_range(Db, Tree, 1, 100, fun(E, A) -> length(E) + A end, 0)). | |
full_reduce_test_() -> | |
Db = erlfdb_util:get_test_db([empty]), | |
Tree = open(Db, <<1,2,3>>, 4, [{reduce_fun, fun reduce_sum/2}]), | |
TestFun = fun(Max) -> | |
Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, Max)])], | |
lists:foreach(fun(Key) -> insert(Db, Tree, Key, Key) end, Keys), | |
?assertEqual(round(Max * ((1 + Max) / 2)), full_reduce(Db, Tree)) | |
end, | |
[ | |
?_test(TestFun(4)), | |
?_test(TestFun(8)) | |
]. | |
full_reduce_after_delete_test() -> | |
Db = erlfdb_util:get_test_db([empty]), | |
Tree = open(Db, <<1,2,3>>, 4, [{reduce_fun, fun reduce_sum/2}]), | |
Max = 100, | |
Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, Max)])], | |
lists:foreach(fun(Key) -> insert(Db, Tree, Key, Key) end, Keys), | |
?assertEqual(round(Max * ((1 + Max) / 2)), full_reduce(Db, Tree)), | |
lists:foreach(fun(Key) -> delete(Db, Tree, Key) end, Keys), | |
?assertEqual(0, full_reduce(Db, Tree)). | |
count_reduce_test_() -> | |
Db = erlfdb_util:get_test_db([empty]), | |
Tree = open(Db, <<1,2,3>>, 4, [{reduce_fun, fun reduce_count/2}]), | |
Max = 100, | |
Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, Max)])], | |
lists:foreach(fun(Key) -> insert(Db, Tree, Key, Key) end, Keys), | |
Expected = fun(S, E) -> E - S + 1 end, | |
[ | |
?_test(?assertEqual(Expected(1, 5), reduce(Db, Tree, 1, 5))), | |
?_test(?assertEqual(Expected(50, 60), reduce(Db, Tree, 50, 60))), | |
?_test(?assertEqual(Expected(21, 83), reduce(Db, Tree, 21, 83))), | |
?_test(?assertEqual(Expected(1, 1), reduce(Db, Tree, 1, 1))), | |
?_test(?assertEqual(Expected(1, 100), reduce(Db, Tree, 0, 200))), | |
?_test(?assertEqual(Expected(5, 7), reduce(Db, Tree, 5, 7))) | |
]. | |
sum_reduce_test_() -> | |
Db = erlfdb_util:get_test_db([empty]), | |
Tree = open(Db, <<1,2,3>>, 4, [{reduce_fun, fun reduce_sum/2}]), | |
Max = 100, | |
Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, Max)])], | |
lists:foreach(fun(Key) -> insert(Db, Tree, Key, Key) end, Keys), | |
Expected = fun(S, E) -> lists:sum(lists:seq(S, E)) end, | |
[ | |
?_test(?assertEqual(Expected(1, 5), reduce(Db, Tree, 1, 5))), | |
?_test(?assertEqual(Expected(50, 60), reduce(Db, Tree, 50, 60))), | |
?_test(?assertEqual(Expected(21, 83), reduce(Db, Tree, 21, 83))), | |
?_test(?assertEqual(Expected(1, 1), reduce(Db, Tree, 1, 1))), | |
?_test(?assertEqual(Expected(1, 100), reduce(Db, Tree, 0, 200))), | |
?_test(?assertEqual(Expected(5, 7), reduce(Db, Tree, 5, 7))) | |
]. | |
stats_reduce_test_() -> | |
Db = erlfdb_util:get_test_db([empty]), | |
Tree = open(Db, <<1,2,3>>, 4, [{reduce_fun, fun reduce_stats/2}]), | |
Max = 100, | |
Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, Max)])], | |
lists:foreach(fun(Key) -> insert(Db, Tree, Key, Key) end, Keys), | |
[ | |
?_test(?assertEqual({15,1,5,5,55}, reduce(Db, Tree, 1, 5))), | |
?_test(?assertEqual({605,50,60,11,33385}, reduce(Db, Tree, 50, 60))), | |
?_test(?assertEqual({3276,21,83,63,191184}, reduce(Db, Tree, 21, 83))), | |
?_test(?assertEqual({1,1,1,1,1}, reduce(Db, Tree, 1, 1))), | |
?_test(?assertEqual({5050,1,100,100,338350}, reduce(Db, Tree, 0, 200))), | |
?_test(?assertEqual({18,5,7,3,110}, reduce(Db, Tree, 5, 7))) | |
]. | |
group_reduce_level_test_() -> | |
Db = erlfdb_util:get_test_db([empty]), | |
Tree = open(Db, <<1,2,3>>, 4, [{reduce_fun, fun reduce_sum/2}]), | |
Max = 100, | |
Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, Max)])], | |
GroupKeyFun = fun(Key) -> lists:sublist(Key, 2) end, | |
UserAccFun = fun({K,V}, Acc) -> Acc ++ [{K, V}] end, | |
lists:foreach(fun(Key) -> insert(Db, Tree, [Key rem 4, Key rem 3, Key], Key) end, Keys), | |
[ | |
?_test(?assertEqual([{[1, 0], 408}, {[1, 1], 441}, {[1, 2], 376}], | |
group_reduce(Db, Tree, [1], [2], GroupKeyFun, UserAccFun, []))), | |
?_test(?assertEqual([{[1, 0], 408}, {[1, 1], 441}, {[1, 2], 376}], | |
group_reduce(Db, Tree, [1], [2], GroupKeyFun, UserAccFun, [], [{dir, fwd}]))), | |
?_test(?assertEqual([{[1, 2], 376}, {[1, 1], 441}, {[1, 0], 408}], | |
group_reduce(Db, Tree, [1], [2], GroupKeyFun, UserAccFun, [], [{dir, rev}]))), | |
?_test(?assertEqual([{[0,0],432}, {[0,1],468}, {[0,2],400}, {[1,0],408}, {[1,1],441}, {[1,2],376}, | |
{[2,0],384}, {[2,1],416}, {[2,2],450}, {[3,0],459}, {[3,1],392}, {[3,2],424}], | |
group_reduce(Db, Tree, ebtree:min(), ebtree:max(), GroupKeyFun, UserAccFun, []))) | |
]. | |
group_reduce_int_test_() -> | |
Db = erlfdb_util:get_test_db([empty]), | |
Tree = open(Db, <<1,2,3>>, 4, [{reduce_fun, fun reduce_count/2}]), | |
Max = 100, | |
Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, Max)])], | |
GroupKeyFun = fun(_Key) -> null end, | |
UserAccFun = fun({K,V}, Acc) -> Acc ++ [{K, V}] end, | |
lists:foreach(fun(Key) -> insert(Db, Tree, Key, Key) end, Keys), | |
[ | |
?_test(?assertEqual([{null, 100}], group_reduce(Db, Tree, | |
ebtree:min(), ebtree:max(), GroupKeyFun, UserAccFun, []))), | |
?_test(?assertEqual([{null, 99}], group_reduce(Db, Tree, 2, ebtree:max(), GroupKeyFun, UserAccFun, []))), | |
?_test(?assertEqual([{null, 96}], group_reduce(Db, Tree, 3, 98, GroupKeyFun, UserAccFun, []))) | |
]. | |
raw_collation_test() -> | |
Db = erlfdb_util:get_test_db([empty]), | |
Tree = open(Db, <<1,2,3>>, 4), | |
insert(Db, Tree, null, null), | |
insert(Db, Tree, 1, 1), | |
?assertEqual([{1, 1}, {null, null}], range(Db, Tree, 1, null, fun(E, A) -> A ++ E end, [])). | |
custom_collation_test() -> | |
Db = erlfdb_util:get_test_db([empty]), | |
CollateFun = fun(A, B) -> B =< A end, | |
Tree = open(Db, <<1,2,3>>, 4, [{collate_fun, CollateFun}]), | |
insert(Db, Tree, 1, 1), | |
insert(Db, Tree, 2, 2), | |
?assertEqual([{2, 2}, {1, 1}], range(Db, Tree, 3, 0, fun(E, A) -> A ++ E end, [])). | |
intense_lookup_test_() -> | |
[ | |
{timeout, 1000, fun() -> lookup_test_fun(1000, 20) end}, | |
{timeout, 1000, fun() -> lookup_test_fun(1000, 50) end}, | |
{timeout, 1000, fun() -> lookup_test_fun(1000, 500) end} | |
]. | |
lookup_test_fun(Max, Order) -> | |
Db = erlfdb_util:get_test_db([empty]), | |
Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, Max, 2)])], | |
T0 = erlang:monotonic_time(), | |
Tree = lists:foldl(fun(Key, T) -> insert(Db, T, Key, Key + 1) end, open(Db, <<1,2,3>>, Order), Keys), | |
T1 = erlang:monotonic_time(), | |
lists:foreach(fun(Key) -> ?assertEqual({Key, Key + 1}, lookup(Db, Tree, Key)) end, Keys), | |
T2 = erlang:monotonic_time(), | |
?debugFmt("~B order. ~B iterations. insert rate: ~.2f/s, lookup rate: ~.2f/s", | |
[Order, Max, 1000 * (Max / msec(T1 - T0)), 1000 * (Max / msec(T2 - T1))]). | |
range_test_() -> | |
{timeout, 1000, fun() -> | |
Db = erlfdb_util:get_test_db([empty]), | |
Max = 1000, | |
Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, Max)])], | |
Tree = lists:foldl(fun(Key, T) -> insert(Db, T, Key, Key + 1) end, open(Db, <<1,2,3>>, 10), Keys), | |
lists:foreach( | |
fun(_) -> | |
[StartKey, EndKey] = lists:sort([rand:uniform(Max), rand:uniform(Max)]), | |
?assertEqual([{K, K + 1} || K <- lists:seq(StartKey, EndKey)], | |
range(Db, Tree, StartKey, EndKey, fun(E, A) -> A ++ E end, []) | |
) end, | |
lists:seq(1, 1000)) | |
end}. | |
reverse_range_test_() -> | |
{timeout, 1000, fun() -> | |
Db = erlfdb_util:get_test_db([empty]), | |
Max = 1000, | |
Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, Max)])], | |
Tree = lists:foldl(fun(Key, T) -> insert(Db, T, Key, Key + 1) end, open(Db, <<1,2,3>>, 10), Keys), | |
lists:foreach( | |
fun(_) -> | |
[StartKey, EndKey] = lists:sort([rand:uniform(Max), rand:uniform(Max)]), | |
?assertEqual([{K, K + 1} || K <- lists:seq(EndKey, StartKey, -1)], | |
reverse_range(Db, Tree, StartKey, EndKey, fun(E, A) -> A ++ E end, []) | |
) end, | |
lists:seq(1, 1000)) | |
end}. | |
custom_collation_range_test_() -> | |
{timeout, 1000, fun() -> | |
Db = erlfdb_util:get_test_db([empty]), | |
Max = 1000, | |
Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, Max)])], | |
CollateFun = fun(A, B) -> B =< A end, | |
Tree = open(Db, <<1,2,3>>, 10, [{collate_fun, CollateFun}]), | |
lists:foldl(fun(Key, T) -> insert(Db, T, Key, Key + 1) end, Tree, Keys), | |
lists:foreach( | |
fun(_) -> | |
[StartKey, EndKey] = sort(Tree, [rand:uniform(Max), rand:uniform(Max)]), | |
Seq = if | |
StartKey < EndKey -> | |
lists:seq(StartKey, EndKey); | |
true -> | |
lists:seq(StartKey, EndKey, -1) | |
end, | |
?assertEqual([{K, K + 1} || K <- Seq], | |
range(Db, Tree, StartKey, EndKey, fun(E, A) -> A ++ E end, []) | |
) end, | |
lists:seq(1, 1000)) | |
end}. | |
custom_collation_reverse_range_test_() -> | |
{timeout, 1000, fun() -> | |
Db = erlfdb_util:get_test_db([empty]), | |
Max = 1000, | |
Keys = [X || {_, X} <- lists:sort([ {rand:uniform(), N} || N <- lists:seq(1, Max)])], | |
CollateFun = fun(A, B) -> B =< A end, | |
Tree = open(Db, <<1,2,3>>, 10, [{collate_fun, CollateFun}]), | |
lists:foldl(fun(Key, T) -> insert(Db, T, Key, Key + 1) end, Tree, Keys), | |
lists:foreach( | |
fun(_) -> | |
[StartKey, EndKey] = sort(Tree, [rand:uniform(Max), rand:uniform(Max)]), | |
Seq = if | |
StartKey < EndKey -> | |
lists:seq(StartKey, EndKey); | |
true -> | |
lists:seq(StartKey, EndKey, -1) | |
end, | |
?assertEqual([{K, K + 1} || K <- lists:reverse(Seq)], | |
reverse_range(Db, Tree, StartKey, EndKey, fun(E, A) -> A ++ E end, []) | |
) end, | |
lists:seq(1, 1000)) | |
end}. | |
msec(Native) -> | |
erlang:max(1, erlang:convert_time_unit(Native, native, millisecond)). | |
-endif. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment