Skip to content

Instantly share code, notes, and snippets.

@kuniyoshi
Last active August 29, 2015 13:58
Show Gist options
  • Save kuniyoshi/9956938 to your computer and use it in GitHub Desktop.
Save kuniyoshi/9956938 to your computer and use it in GitHub Desktop.
Count many labels by Lossy Count Method.
-module(lcm).
-export([start_loop/1, start_loop/0]).
-export([loop/5]).
cut_off_bucket(Counter, BucketSize) ->
Counter2 = lists:filter(fun({_Label, {Count, Bucket}}) ->
Count =< BucketSize - Bucket
end,
Counter),
Counter2.
loop(Caller, Options, 0, DataSize, Counter) ->
Counter2 = cut_off_bucket(Counter, proplists:get_value(bucket_size, Options)),
Ttl = proplists:get_value(interval, Options),
loop(Caller, Options, Ttl, DataSize, Counter2);
loop(Caller, Options, Ttl, DataSize, Counter) ->
receive
{Caller, stop} ->
ok;
{Caller, counter} ->
Caller ! {self(), {counter, Counter}},
loop(Caller, Options, Ttl, DataSize, Counter);
{Caller, frequency} ->
Frequency = lists:map(fun({Label, {Count, _}}) -> {Label, Count} end,
Counter),
Caller ! {self(), Frequency},
loop(Caller, Options, Ttl, DataSize, Counter);
{Caller, {count, Labels}} ->
DefaultProperty = {0, proplists:get_value(bucket_size, Options) - 1},
Counter2 = lists:foldl(fun(Label, CounterX) ->
{Count, Bucket} = proplists:get_value(Label,
CounterX,
DefaultProperty),
lists:keystore(Label, 1, CounterX, {Label, {Count + 1, Bucket}})
end,
Counter,
Labels),
loop(Caller, Options, Ttl - 1, DataSize + 1, Counter2)
end.
start_loop(Options) ->
Gamma = proplists:get_value(gamma, Options, 0.005),
Epsilon = proplists:get_value(epsilon, Options, 0.004),
BucketSize = 1,
Interval = trunc(1 / Epsilon) + 1,
Options2 = [{gamma, Gamma},
{epsilon, Epsilon},
{interval, Interval},
{bucket_size, BucketSize}],
Counter = [],
Pid = spawn(?MODULE, loop, [self(), Options2, Interval, 0, Counter]),
Pid.
start_loop() ->
start_loop([]).
@kuniyoshi
Copy link
Author

12> Pid = lcm:start_loop().
<0.52.0>
13> Pid ! {self(), {count, [a, b, c, a, b, c, a, a, z, c, a]}}.
{<0.49.0>,{count,[a,b,c,a,b,c,a,a,z,c,a]}}
14> Pid ! {self(), counter}.
{<0.49.0>,counter}
15> flush().
Shell got {<0.52.0>,{counter,[{a,{5,0}},{b,{2,0}},{c,{3,0}},{z,{1,0}}]}}
ok
16> Pid ! {self(), frequency}.
{<0.49.0>,frequency}
17> flush().
Shell got {<0.52.0>,[{a,5},{b,2},{c,3},{z,1}]}
ok
18>

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment