Skip to content

Instantly share code, notes, and snippets.

@ShreyanJain9
Last active March 28, 2024 20:48
Show Gist options
  • Save ShreyanJain9/dfc7d6252cecd334f6266564893735b8 to your computer and use it in GitHub Desktop.
Save ShreyanJain9/dfc7d6252cecd334f6266564893735b8 to your computer and use it in GitHub Desktop.
A Bloom Filter in Elixir
defmodule BloomFilter do
@moduledoc """
A small Bloom Filter in Elixir
"""
@default_hash_fns [:sha256, :sha512, :blake2b]
defstruct bits: <<0::size(64)>>, hash_fns: @default_hash_fns
@type hashfn :: :crypto.hash_algorithm() | :phash2 | function()
@type hashfns :: [hashfn()]
@type t :: %BloomFilter{
bits: bitstring(),
hash_fns: hashfns()
}
@spec new(pos_integer(), hashfns()) :: t()
def new(size \\ 64, hash_fns \\ @default_hash_fns) do
%BloomFilter{bits: <<0::size(size)>>, hash_fns: hash_fns}
end
@spec default_hash_fns() :: hashfns()
def default_hash_fns, do: @default_hash_fns
@spec hash(hashfn(), term()) :: binary()
defp hash(:phash2, key), do: <<:erlang.phash2(key)>>
defp hash(hash_fn, key) when is_function(hash_fn), do: hash_fn.(key)
defp hash(hash_fn, key) when is_atom(hash_fn), do: :crypto.hash(hash_fn, key)
@spec hashes(t(), term()) :: [non_neg_integer()]
defp hashes(%{hash_fns: hash_fns, bits: bits}, key) do
for hash_fn <- hash_fns do
hash_fn
|> hash(key)
|> :binary.decode_unsigned
|> rem(bit_size(bits))
end
end
@spec bit_at(bitstring(), non_neg_integer()) :: 0 | 1
defp bit_at(bits, i) do
<<slice::bitstring-size(i), _rest::bitstring>> = bits
<<_::size(i - 1), bit::1>> = slice
bit
end
@spec set_bit(bitstring(), non_neg_integer(), 0 | 1) :: bitstring()
defp set_bit(bits, i, value) do
<<slice::bitstring-size(i), rest::bitstring>> = bits
<<a::bitstring-size(i - 1), _::1>> = slice
<<a::bitstring, value::1, rest::bitstring>>
end
@spec add(t(), term()) :: t()
def add(%BloomFilter{bits: bits} = bf, key) do
Enum.reduce(hashes(bf, key), bits, fn i, bit_string ->
set_bit(bit_string, i, 1)
end)
|> then(fn bits -> %BloomFilter{bf | bits: bits} end)
end
@spec contains?(t(), term()) :: boolean()
def contains?(%BloomFilter{} = bf, key) do
Enum.all?(hashes(bf, key), fn i ->
bit_at(bf.bits, i) == 1
end)
end
defimpl String.Chars, for: __MODULE__ do
def to_string(bloom_filter) do
"#BloomFilter\n\tbits: " <> (bloom_filter.bits
|> print_bits) <> "\n\thash_fns: " <> inspect(bloom_filter.hash_fns)
end
def print_bits(bitstring) when is_bitstring(bitstring) do
print_bits(bitstring, 0, "")
end
defp print_bits(<<bit::size(1), rest::bitstring>>, index, result) do
print_bits(rest, index + 1, "#{result}#{bit}")
end
defp print_bits(<<>>, _index, result) do
result
end
end
defimpl Inspect, for: __MODULE__ do
def inspect(bloom_filter, _opts) do
bloom_filter
|> String.Chars.to_string
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment