Skip to content

Instantly share code, notes, and snippets.

@saulbensach
Last active May 17, 2024 23:33
Show Gist options
  • Save saulbensach/0cd6003a02479d674f624e1c75e6451e to your computer and use it in GitHub Desktop.
Save saulbensach/0cd6003a02479d674f624e1c75e6451e to your computer and use it in GitHub Desktop.
# implementation of https://arxiv.org/pdf/2301.10191 in Elixir :)
# uses ets so it does not copies to much when testing the set.
```elixir
defmodule Distinct do
def run() do
items = for i <- 1..500, do: Enum.random(1..400)
error = 0.01
confidence = 0.001
expected = Enum.uniq(items) |> Enum.count()
observed = distinct_ets(items, error, confidence)
"Expected: #{expected} Observed: #{observed}"
end
def distinct_ets(items, error, confidence) do
table = :ets.new(:distinct, [:set])
thresh = :math.ceil((12/(error*error)) * :math.log2((8*Enum.count(items)) / confidence))
result = process_ets(items, thresh, 1, table)
:ets.delete(table)
trunc(result)
end
defp process_ets([], thresh, p, table), do: ets_count(table) / p
defp process_ets([item | items], thresh, p, table) do
with true <- not :ets.member(table, {item, item}),
true <- :random.uniform() < p,
count_acc <- ets_count(table) do
:ets.insert(table, {item, item})
if count_acc == thresh do
reduce_set_ets(table)
process_ets(items, thresh, p/2, table)
else
process_ets(items, thresh, p, table)
end
else
_ ->
process_ets(items, thresh, p, table)
end
end
defp reduce_set_ets(table) do
:ets.foldl(fn {k, v}, acc ->
if :random.uniform() < 0.5, do: :ets.delete_object(table, {k, v}), else: acc
end, 0, table)
end
defp ets_count(table) do
:ets.info(table)[:size]
end
end
```
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment