Skip to content

Instantly share code, notes, and snippets.

@manuzhang
Last active September 15, 2017 02:35
Show Gist options
  • Save manuzhang/052e83dfda44f6f34678720bb4b93e75 to your computer and use it in GitHub Desktop.
Save manuzhang/052e83dfda44f6f34678720bb4b93e75 to your computer and use it in GitHub Desktop.
Encode with Murmur Hash
import java.nio.{ByteBuffer, ByteOrder}
// "com.google.guava" % "guava" % "16.0.1"
import com.google.common.hash.Hashing
object MurmurHash {
private val seed = 0x3c074a61
def encode(prefix: Int, value: Long): Long = {
val pb = ByteBuffer.allocate(4).order(ByteOrder.LITTLE_ENDIAN).putInt(prefix).array()
val vb = ByteBuffer.allocate(8).order(ByteOrder.LITTLE_ENDIAN).putLong(value).array()
// TODO: will this be slow ?
val bs = pb ++ vb
encodeBytes(bs)
}
def encode(prefix1: Int, value1: Long, prefix2: Int, value2: Long): Long = {
val pb1 = ByteBuffer.allocate(4).order(ByteOrder.LITTLE_ENDIAN).putInt(prefix1).array()
val vb1 = ByteBuffer.allocate(8).order(ByteOrder.LITTLE_ENDIAN).putLong(value1).array()
val pb2 = ByteBuffer.allocate(4).order(ByteOrder.LITTLE_ENDIAN).putInt(prefix2).array()
val vb2 = ByteBuffer.allocate(8).order(ByteOrder.LITTLE_ENDIAN).putLong(value2).array()
// TODO: will this be slow ?
val bs = pb1 ++ vb1 ++ pb2 ++ vb2
encodeBytes(bs)
}
private def encodeBytes(bytes: Array[Byte]): Long = {
// the guava version is little-endian variant of the original cpp version
// so we need to reverse the result byte array
val hashed = Hashing.murmur3_128(seed).hashBytes(bytes).asBytes().reverse
// the hash value is 128 bit while we only need 64
val div = ByteBuffer.wrap(hashed.drop(8)).getLong()
val mod = 1L << 60
((div % mod) + mod) % mod
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment