Skip to content

Instantly share code, notes, and snippets.

@miguno
Created January 7, 2015 15:57
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save miguno/0fd6b36d52f477762514 to your computer and use it in GitHub Desktop.
Save miguno/0fd6b36d52f477762514 to your computer and use it in GitHub Desktop.
Benchmark for hashing implementations for CMS[BigInt] in Algebird
package com.twitter.algebird.caliper
import com.google.caliper.{ Param, SimpleBenchmark }
import com.google.common.hash.{ HashFunction, Hashing }
/**
* Benchmarks the hashing algorithms used by Count-Min sketch for CMS[BigInt].
*
* The input values are generated ahead of time to ensure that each trial uses the same input (and that the RNG is not
* influencing the runtime of the trials).
*/
// Once we can convince cappi (https://github.com/softprops/capp) -- the sbt plugin we use to run
// caliper benchmarks -- to work with the latest caliper 1.0-beta-1, we would:
// - Let `CMSHashingBenchmark` extend `Benchmark` (instead of `SimpleBenchmark`)
// - Annotate `timePlus` with `@MacroBenchmark`.
class CMSHashingBenchmark extends SimpleBenchmark {
/**
* The `a` parameter for CMS' default ("legacy") hashing algorithm: `h_i(x) = a_i * x + b_i (mod p)`.
*/
@Param(Array("5123456"))
val a: Int = 0
/**
* The `b` parameter for CMS' default ("legacy") hashing algorithm: `h_i(x) = a_i * x + b_i (mod p)`.
*
* Algebird's CMS implementation hard-codes `b` to `0`.
*/
@Param(Array("0"))
val b: Int = 0
/**
* Width of the counting table.
*/
@Param(Array("11" /* eps = 0.271 */ , "544" /* eps = 0.005 */ , "2719" /* eps = 1E-3 */ , "271829" /* eps = 1E-5 */ ))
val width: Int = 0
/**
* Number of operations per benchmark repetition.
*/
@Param(Array("100000"))
val operations: Int = 0
/**
* Maximum number of bits for randomly generated BigInt instances.
*/
@Param(Array("128", "1024", "2048"))
val maxBits: Int = 0
var random: scala.util.Random = _
var inputs: Seq[BigInt] = _
override def setUp() {
random = new scala.util.Random
// We draw numbers randomly from a 2^maxBits address space.
inputs = (1 to operations).view.map { _ => scala.math.BigInt(maxBits, random) }
}
private def murmurHashScala(a: Int, b: Int, width: Int)(x: BigInt) = {
val hash: Int = scala.util.hashing.MurmurHash3.arrayHash(x.toByteArray, a)
val h = {
// We only want positive integers for the subsequent modulo. This method mimics Java's Hashtable implementation,
// and it requires `hash` to be an `Int` = have 32 bits (to match with `0x7FFFFFFF`).
val positiveHash = hash & 0x7FFFFFFF
positiveHash % width
}
assert(h >= 0, "hash must not be negative")
h
}
private def murmurHashGuava(hasher: HashFunction, width: Int)(x: BigInt) = {
val h = Hashing.consistentHash(hasher.hashBytes(x.toByteArray), width)
assert(h >= 0, "hash must not be negative")
h
}
private val PRIME_MODULUS = (1L << 31) - 1
private def brokenCurrentHash(a: Int, b: Int, width: Int)(x: BigInt) = {
val unModded: BigInt = (x * a) + b
val modded: BigInt = (unModded + (unModded >> 32)) & PRIME_MODULUS
val h = modded.toInt % width
assert(h >= 0, "hash must not be negative")
h
}
def timeBrokenCurrentHashWithRandomMaxBitsNumbers(operations: Int): Int = {
var dummy = 0
while (dummy < operations) {
inputs.foreach { input => brokenCurrentHash(a, b, width)(input) }
dummy += 1
}
dummy
}
def timeMurmurHashScalaWithRandomMaxBitsNumbers(operations: Int): Int = {
var dummy = 0
while (dummy < operations) {
inputs.foreach { input => murmurHashScala(a, b, width)(input) }
dummy += 1
}
dummy
}
def timeMurmurHash32GuavaWithRandomMaxBitsNumbers(operations: Int): Int = {
var dummy = 0
val hasher = Hashing.murmur3_32(a)
while (dummy < operations) {
inputs.foreach { input => murmurHashGuava(hasher, width)(input) }
dummy += 1
}
dummy
}
def timeMurmurHash128GuavaWithRandomMaxBitsNumbers(operations: Int): Int = {
var dummy = 0
val hasher = Hashing.murmur3_128(a)
while (dummy < operations) {
inputs.foreach { input => murmurHashGuava(hasher, width)(input) }
dummy += 1
}
dummy
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment