Skip to content

Instantly share code, notes, and snippets.

@philcrissman
Last active August 29, 2015 14:27
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save philcrissman/3cff7f2ba6a0509cfd97 to your computer and use it in GitHub Desktop.
Save philcrissman/3cff7f2ba6a0509cfd97 to your computer and use it in GitHub Desktop.
require 'minitest/autorun'
class Bloom
attr_accessor :array
def initialize(array_size=10_000_000, hashes_per_word=4, words_file="/usr/share/dict/words")
@array_size = array_size
@hashes_per_word = hashes_per_word
@words_file = words_file
initialize_array!
populate_array!
end
def hash(word)
result = []
@hashes_per_word.times do |n|
srand((word+n.to_s).hash)
result << rand(@array_size)
end
result
end
def populate_array!
io = IO.new(IO.sysopen(@words_file),"r")
while !io.eof?
indices = hash(io.readline.strip)
indices.each{|n| array[n] = 1}
end
return nil
end
def initialize_array!
self.array = Array.new(@array_size){0}
end
def in_dictionary? word
indices = hash(word)
indices.map{|n| array[n] == 1}.all?{|el| el == true}
end
end
class BloomTest < MiniTest::Test
def setup
@bloom = Bloom.new(1000000, 4)
end
def test_real_word
assert @bloom.in_dictionary?("house")
assert @bloom.in_dictionary?("garage")
end
def test_not_real
refute @bloom.in_dictionary?("sadgsgdf")
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment