Skip to content

Instantly share code, notes, and snippets.

@archie
Last active September 4, 2019 09:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save archie/7076239 to your computer and use it in GitHub Desktop.
Save archie/7076239 to your computer and use it in GitHub Desktop.
Implementation of various hash functions for strings in Scala. Details here: http://hs.ljungblad.nu/post/64624649234/a-dive-into-the-mystery-of-hashes
object Hashing {
val BUCKETSIZE = 1000
val words = io.Source.fromFile("/usr/share/dict/words").getLines
.map(x => x.toLowerCase).toList
def initBuckets(size: Int): Array[List[String]] = {
var arr = new Array[List[String]](size)
arr.map(_ => List[String]())
}
def stupidHash(word: String, seed: Int = 0): Int = {
word.getBytes.foldLeft(0)(_+_)
}
def javaHash(word: String, seed: Int = 0): Int = {
var hash = 0
for (ch <- word.toCharArray)
hash = 31 * hash + ch.toInt
hash = hash ^ (hash >> 20) ^ (hash >> 12)
hash ^ (hash >> 7) ^ (hash >> 4)
}
def murmurHash(word: String, seed: Int): Int = {
val c1 = 0xcc9e2d51
val c2 = 0x1b873593
val r1 = 15
val r2 = 13
val m = 5
val n = 0xe6546b64
var hash = seed
for (ch <- word.toCharArray) {
var k = ch.toInt
k = k * c1
k = (k << r1) | (hash >> (32 - r1))
k = k * c2
hash = hash ^ k
hash = (hash << r2) | (hash >> (32 - r2))
hash = hash * m + n
}
hash = hash ^ word.length
hash = hash ^ (hash >> 16)
hash = hash * 0x85ebca6b
hash = hash ^ (hash >> 13)
hash = hash * 0xc2b2ae35
hash = hash ^ (hash >> 16)
hash
}
def knuthHash(word: String, constant: Int): Int = {
var hash = 0
for (ch <- word.toCharArray)
hash = ((hash << 5) ^ (hash >> 27)) ^ ch.toInt
hash % constant
}
def main(args: Array[String]): Unit = {
var buckets = initBuckets(BUCKETSIZE)
val diff = words.length/BUCKETSIZE
import scala.util.Random
val seed = Random.nextInt
for (word <- words) {
// val bucket = stupidHash(word, seed) % BUCKETSIZE
// val bucket = javaHash(word, seed) & (BUCKETSIZE-1)
// val bucket = murmurHash(word, seed) % BUCKETSIZE
// val bucket = knuthHash(word, 1009) & (BUCKETSIZE-1)
val bucket = knuthHash(word, 31) & (BUCKETSIZE-1)
buckets(bucket) ::= word
}
println(buckets.map(x => (x.length :: x.take(3)).mkString("\t")).mkString("\n"))
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment