Skip to content

Instantly share code, notes, and snippets.

@bmjames
Created November 25, 2009 14:13
Show Gist options
  • Save bmjames/242725 to your computer and use it in GitHub Desktop.
Save bmjames/242725 to your computer and use it in GitHub Desktop.
import scala.collection.mutable.{Set => MutableSet}
import scala.io.Source
import java.security.MessageDigest
class BloomSet(numHashes: Int) {
protected val hashes = MutableSet.empty[String]
def add(word: String) {
hashes ++= digest(word)
}
def contains(word: String) = digest(word).forall { x => hashes contains x }
protected def digest(word: String) = {
val md = MessageDigest getInstance "sha"
md update word.toLowerCase.getBytes
val hash = md.digest map { byte => (byte.toInt.abs).toChar }
for (i <- 0 until numHashes) yield hash.slice(i, i+3).mkString
}
}
val bs = new BloomSet(3)
for (line <- Source.fromFile("/usr/share/dict/words").getLines)
bs add line.trim.toLowerCase
println(bs contains "hello")
println(bs contains "fhqwhgads")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment