Last active
March 28, 2024 20:48
-
-
Save ShreyanJain9/dfc7d6252cecd334f6266564893735b8 to your computer and use it in GitHub Desktop.
A Bloom Filter in Elixir
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
defmodule BloomFilter do | |
@moduledoc """ | |
A small Bloom Filter in Elixir | |
""" | |
@default_hash_fns [:sha256, :sha512, :blake2b] | |
defstruct bits: <<0::size(64)>>, hash_fns: @default_hash_fns | |
@type hashfn :: :crypto.hash_algorithm() | :phash2 | function() | |
@type hashfns :: [hashfn()] | |
@type t :: %BloomFilter{ | |
bits: bitstring(), | |
hash_fns: hashfns() | |
} | |
@spec new(pos_integer(), hashfns()) :: t() | |
def new(size \\ 64, hash_fns \\ @default_hash_fns) do | |
%BloomFilter{bits: <<0::size(size)>>, hash_fns: hash_fns} | |
end | |
@spec default_hash_fns() :: hashfns() | |
def default_hash_fns, do: @default_hash_fns | |
@spec hash(hashfn(), term()) :: binary() | |
defp hash(:phash2, key), do: <<:erlang.phash2(key)>> | |
defp hash(hash_fn, key) when is_function(hash_fn), do: hash_fn.(key) | |
defp hash(hash_fn, key) when is_atom(hash_fn), do: :crypto.hash(hash_fn, key) | |
@spec hashes(t(), term()) :: [non_neg_integer()] | |
defp hashes(%{hash_fns: hash_fns, bits: bits}, key) do | |
for hash_fn <- hash_fns do | |
hash_fn | |
|> hash(key) | |
|> :binary.decode_unsigned | |
|> rem(bit_size(bits)) | |
end | |
end | |
@spec bit_at(bitstring(), non_neg_integer()) :: 0 | 1 | |
defp bit_at(bits, i) do | |
<<slice::bitstring-size(i), _rest::bitstring>> = bits | |
<<_::size(i - 1), bit::1>> = slice | |
bit | |
end | |
@spec set_bit(bitstring(), non_neg_integer(), 0 | 1) :: bitstring() | |
defp set_bit(bits, i, value) do | |
<<slice::bitstring-size(i), rest::bitstring>> = bits | |
<<a::bitstring-size(i - 1), _::1>> = slice | |
<<a::bitstring, value::1, rest::bitstring>> | |
end | |
@spec add(t(), term()) :: t() | |
def add(%BloomFilter{bits: bits} = bf, key) do | |
Enum.reduce(hashes(bf, key), bits, fn i, bit_string -> | |
set_bit(bit_string, i, 1) | |
end) | |
|> then(fn bits -> %BloomFilter{bf | bits: bits} end) | |
end | |
@spec contains?(t(), term()) :: boolean() | |
def contains?(%BloomFilter{} = bf, key) do | |
Enum.all?(hashes(bf, key), fn i -> | |
bit_at(bf.bits, i) == 1 | |
end) | |
end | |
defimpl String.Chars, for: __MODULE__ do | |
def to_string(bloom_filter) do | |
"#BloomFilter\n\tbits: " <> (bloom_filter.bits | |
|> print_bits) <> "\n\thash_fns: " <> inspect(bloom_filter.hash_fns) | |
end | |
def print_bits(bitstring) when is_bitstring(bitstring) do | |
print_bits(bitstring, 0, "") | |
end | |
defp print_bits(<<bit::size(1), rest::bitstring>>, index, result) do | |
print_bits(rest, index + 1, "#{result}#{bit}") | |
end | |
defp print_bits(<<>>, _index, result) do | |
result | |
end | |
end | |
defimpl Inspect, for: __MODULE__ do | |
def inspect(bloom_filter, _opts) do | |
bloom_filter | |
|> String.Chars.to_string | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment