Skip to content

Instantly share code, notes, and snippets.

@kjnilsson
Last active January 4, 2018 16:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kjnilsson/1ba3f3455fb23d517bd495ff232d4716 to your computer and use it in GitHub Desktop.
Save kjnilsson/1ba3f3455fb23d517bd495ff232d4716 to your computer and use it in GitHub Desktop.
spreading sampler module
-module(ra_thw).
-export([
% new/1, % size,
new/2, % size, distance
append/3,
to_list/1,
get/2, % kv lookup
truncate/2 % state, first entry to keep
]).
-define(DEFAULT_SIZE, 32).
-define(DEFAULT_DISTANCE, 32).
-record(state, {size = ?DEFAULT_SIZE :: non_neg_integer(),
dist = ?DEFAULT_DISTANCE :: non_neg_integer(),
sorted_keys = [] :: [],
kv = #{} :: #{term() => term()},
cur_size = 0,
dist_count = 0 :: non_neg_integer()
}).
-type state() :: #state{}.
-export_type([state/0]).
%% A fixed size, sorted-by-insertion map-like data structure that samples incoming
%% key-values every 'dist' (distance) inputs. When it's full it grows the 'dist' value
%% and "thins" the current key space by removing every other key in the key space.
%% The keyspace can also be reduced by truncating all keys before and including
%% some given key.
new(Size, Distance) ->
#state{size = Size,
dist = Distance,
% we want to capture the first sample
dist_count = Distance}.
append(#state{cur_size = Size, size = Size,
dist = Dist0, dist_count = Dist0} = State0,
_Key, _Value) ->
% time to thin out the samples and grow the distance
Dist = (Dist0 * 2) + 1,
State = thin_out(State0),
State#state{dist = Dist,
% we've received one sample
% Add one to current dist count
dist_count = Dist0 + 1,
cur_size = length(State#state.sorted_keys)};
append(#state{dist = Dist, dist_count = Dist, sorted_keys = Keys,
kv = Kv, cur_size = CurSize} = State,
Key, Value) ->
State#state{sorted_keys = [Key | Keys], kv = Kv#{Key => Value},
dist_count = 0, cur_size = CurSize + 1};
append(#state{dist_count = C} = State, _Key, _Value) ->
% it's not time to sample yet
State#state{dist_count = C + 1}.
get(#state{kv = Kv}, Key) ->
maps:get(Key, Kv, undefined).
% mostly for testing
to_list(#state{sorted_keys = Keys, kv = Kv}) ->
to_list0(Keys, Kv, []).
to_list0([Key | T], Kv, Acc) ->
#{Key := Value} = Kv,
to_list0(T, Kv, [{Key, Value} | Acc]);
to_list0([], _Kv, Acc) ->
Acc.
truncate(#state{kv = _Kv0} = State, _Key) ->
% case Kv0 of
% #{Key := _} ->
% % key exists - we can truncate:vspl
State.
%% Internal
thin_out(#state{sorted_keys = Keys0, kv = Kv0} = State) ->
{Keys, Kv} = thin_out0(Keys0, Kv0, {[], #{}}),
State#state{sorted_keys = Keys, kv = Kv}.
thin_out0([KeepKey, _ | Tail], Kv, {KeyAcc, KvAcc}) ->
#{KeepKey := KeepVal} = Kv,
thin_out0(Tail, Kv, {[KeepKey | KeyAcc], KvAcc#{KeepKey => KeepVal}});
thin_out0([KeepKey | []], Kv, {KeyAcc, KvAcc}) ->
#{KeepKey := KeepVal} = Kv,
{lists:reverse([KeepKey | KeyAcc]), KvAcc#{KeepKey => KeepVal}};
thin_out0([], _Kv, {Keys, Kv}) ->
{lists:reverse(Keys), Kv}.
-ifdef(TEST).
-include_lib("eunit/include/eunit.hrl").
append_test() ->
Thw0 = new(5, 1),
Thw1 = lists:foldl(fun (S, Acc) ->
append(Acc, S, S)
end, Thw0, [a,b,c,d,e]),
[a, c, e] = map_fst(to_list(Thw1)),
Thw2 = lists:foldl(fun (S, Acc) ->
append(Acc, S, S)
end, Thw1, [f, g, h, i, j, k, l]),
% after 'i' it will grow the distance from 1 to 3
% and thin the current samples
% so no sample will be taken when 'k' nd 'l' are received
[a, e, i] = map_fst(to_list(Thw2)),
% when 'm' is received we need to take another sample and thus
% need to thin previous samples to make room
Thw3 = append(Thw2, m, m),
[a, e, i, m] = map_fst(to_list(Thw3)),
Thw4 = lists:foldl(fun (S, Acc) ->
append(Acc, S, S)
end, Thw3, [l, m, n]),
[a, e, i, m] = map_fst(to_list(Thw4)),
Thw = append(Thw4, o, o),
[a, e, i, m, o] = map_fst(to_list(Thw)),
a = get(Thw, a),
undefined = get(Thw, c),
ok.
% truncate_test() ->
% Thw0 = new(5, 1),
% Thw1 = lists:foldl(fun (S, Acc) ->
% append(Acc, S, S)
% end, Thw0, [a,b,c,d,e]),
% Thw = truncate(Thw1, c),
% [e] = map_fst(to_list(Thw)),
% ok.
map_fst(L) ->
[element(1, X) || X <- L].
-endif.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment