miguno/CMSBenchmark.scala

## CMSBenchmark.scala
package com.twitter.algebird.caliper

import com.google.caliper.api.Macrobenchmark
import com.google.caliper.{Param, Benchmark}
import com.twitter.algebird.CountMinSketchMonoid

/**
 * Run via [[com.twiter.algebird.caliper.Runner]], using the following CLI options:
 *
 * "--time-limit 90s --instrument macro com.twitter.algebird.caliper.CMSBenchmark"
 *
 * JVM parameters: "-server -Xms256m -Xmx512m"
 */
class CMSBenchmark extends Benchmark {

  @Param(Array("0.1", "0.005"))
  val eps: Double = 0.0

  @Param(Array("0.0000001" /* 1E-8 */))
  val delta: Double = 0.0

  @Param(Array("0.2"))
  val heavyHittersPct: Double = 0.0

  @Param(Array("100"))
  val operations: Int = 0

  var cmsMonoid: CountMinSketchMonoid = _

  override def setUp {
    cmsMonoid = {
      val seed = 1
      new CountMinSketchMonoid(eps, delta, seed, heavyHittersPct)
    }
  }

  @Macrobenchmark
  def timePlus() {
    (1 to operations).view.foldLeft(cmsMonoid.zero)((l, r) => l ++ cmsMonoid.create(r))
  }

}

## GenericCMSBenchmark.scala
package com.twitter.algebird.caliper

import com.google.caliper.api.Macrobenchmark
import com.google.caliper.{Param, Benchmark}
import com.twitter.algebird.{GenCountMinSketchImplicits, GenCountMinSketchMonoid}

/**
 * Run via [[com.twiter.algebird.caliper.Runner]], using the following CLI options:
 *
 * "--time-limit 90s --instrument macro com.twitter.algebird.caliper.GenericCMSBenchmark"
 *
 * JVM parameters: "-server -Xms256m -Xmx512m"
 */
class GenericCMSBenchmark extends Benchmark {

  type NUM = BigInt // or Long

  @Param(Array("0.1", "0.005"))
  val eps: Double = 0.0

  @Param(Array("0.0000001" /* 1E-8 */))
  val delta: Double = 0.0

  @Param(Array("0.2"))
  val heavyHittersPct: Double = 0.0

  @Param(Array("100"))
  val operations: Int = 0

  @Param(Array("2048"))
  val maxBits: Int = 0

  var random: scala.util.Random = _

  var cmsMonoid: GenCountMinSketchMonoid[NUM] = _

  override def setUp {
    // Required import of implicit values (e.g. for BigInt- or Long-backed CMS instances)
    import GenCountMinSketchImplicits._

    cmsMonoid = {
      val seed = 1
      new GenCountMinSketchMonoid[NUM](eps, delta, seed, heavyHittersPct)
    }

    random = new scala.util.Random
  }

  @Macrobenchmark
  def timePlus() {
    (1 to operations).view.foldLeft(cmsMonoid.zero)((l, r) => {

      // Case A: K=Long, counting the sequence (1, 2, ..., operations)
      //l ++ cmsMonoid.create(r)

      // Case B.1: K=BigInt, counting the sequence (1, 2, ..., operations)
      //l ++ cmsMonoid.create(r)

      // Case B.2: K=BigInt, drawing numbers randomly from 2^maxBits address space
      val n = scala.math.BigInt(maxBits, random)
      l ++ cmsMonoid.create(n)
    })
  }

}

## SketchMapBenchmark.scala
package com.twitter.algebird.caliper

import com.google.caliper.api.Macrobenchmark
import com.google.caliper.{Param, Benchmark}
import com.twitter.algebird.{SketchMapMonoid, SketchMapParams}

/**
 * Run via [[com.twiter.algebird.caliper.Runner]], using the following CLI options:
 *
 * "--time-limit 300s --instrument macro com.twitter.algebird.caliper.SketchMapBenchmark"
 *
 * JVM parameters: "-server -Xms256m -Xmx512m"
 */
class SketchMapBenchmark extends Benchmark {

  type K = BigInt // or Long
  type V = Long

  @Param(Array("0.1", "0.005"))
  val eps: Double = 0.0

  @Param(Array("0.0000001" /* 1E-8 */))
  val delta: Double = 0.0

  @Param(Array("100", "1000"))
  val heavyHittersCount: Int = 0

  @Param(Array("100"))
  val operations: Int = 0

  @Param(Array("2048"))
  val maxBits: Int = 0

  var random: scala.util.Random = _

  var sketchMapMonoid: SketchMapMonoid[K, V] = _

  override def setUp {
    sketchMapMonoid = {
      // for SketchMap[K=Long, Long]
      //import com.twitter.bijection.Conversion.asMethod
      //def explicitSerialization(i: K): Array[Byte] = i.as[Array[Byte]]

      // for SketchMap[K=BigInt, Long]
      def explicitSerialization(i: K): Array[Byte] = i.toByteArray

      val seed = 1
      val params = SketchMapParams[K](seed, eps, delta, heavyHittersCount)(explicitSerialization)
      new SketchMapMonoid[K, V](params)
    }

    random = new scala.util.Random
  }

  @Macrobenchmark
  def timePlus() {
    (1 to operations).view.foldLeft(sketchMapMonoid.zero)((l, r) => {
      // Case A: K=Long, counting the sequence (1, 2, ..., operations)
      //val pair = (r.toLong, 1L)
      //sketchMapMonoid.plus(l, sketchMapMonoid.create(pair))

      // Case B.1: K=BigInt, counting the sequence (1, 2, ..., operations)
      //val pair = (BigInt(r), 1L)
      //sketchMapMonoid.plus(l, sketchMapMonoid.create(pair))

      // Case B.2: K=BigInt, drawing numbers randomly from 2^maxBits address space
      val n = scala.math.BigInt(maxBits, random)
      val pair = (n, 1L)

      sketchMapMonoid.plus(l, sketchMapMonoid.create(pair))
    })
  }

}

## Summary.md

      
    Raw
  

              Summary.md
            
          
    Summary results, variant A

For the following parameters:

eps = 0.005
delta = 1E-8
seed = 1
heavyHittersPct: 0.2 (CMS variants) / heavyHittersCount: 1000 (SketchMap)

Input data: 100 small numbers, i.e. the sequence (1, 2, ..., 100).
Results:

CMS (hardcoded K=Long): 354,000 ns (details) / previous benchmark: 160k/s
CMS[Long] (*): 360,000 ns (details) / previous benchmark: 160k/s
CMS[BigInt] (*): 912,000 ns (details) / previous benchmark: 65k/s
SketchMap[Long, Long]: 38,382,000 ns (details)
SketchMap[BigInt, Long]: 38,377,000 ns (details) / previous benchmark: 0.3k/s

(*) denotes new, generic CMS implementation
Summary results, variant B

Same parameters as above but different input data.  This benchmark, when run against BigInt, is expected to run slower than benchmark variant A above because BigInt operations become slower the larger the numbers represented by a BigInt are.
Input data: large(r) numbers, i.e. randomly drawn numbers from a 2^2048 address space.

Because of the input data characteristics this benchmark variant cannot be run for Long.

Results:

CMS[BigInt] (*): 3,067,000 ns (details) / previous benchmark: 22k/s
SketchMap[BigInt, Long]: 119,841,000 ns (details)

(*) denotes new, generic CMS implementation
Notes

Several benchmarks logged warnings and/or errors similar to the following:
WARNING: Hotspot compilation occurred during timing. Depending on the scope of the benchmark, this might significantly impact results. Consider running with a longer warmup.

WARNING: Hotspot compilation occurred after warmup, but outside of timing. Depending on the scope of the benchmark, this might significantly impact results. Consider running with a longer warmup.

WARNING: GC occurred during timing. Depending on the scope of the benchmark, this might significantly impact results. Consider running with a larger heap size.

Benchmark environment


MacBook Pro Retina (Late 2013), 2.3 GHz Intel Core i7, 16 GB RAM
Mac OS X 10.9.5
Oracle Java 7 (1.7.0_55)
IntelliJ IDEA 13.1

Caliper references


Caliper CLI options
Caliper default configuration
Document how to deal with "ERROR: Hotspot compilation occurred during timing. Warmup is likely insufficent." when running tests
	package com.twitter.algebird.caliper

	import com.google.caliper.api.Macrobenchmark
	import com.google.caliper.{Param, Benchmark}
	import com.twitter.algebird.CountMinSketchMonoid

	/**
	* Run via [[com.twiter.algebird.caliper.Runner]], using the following CLI options:
	*
	* "--time-limit 90s --instrument macro com.twitter.algebird.caliper.CMSBenchmark"
	*
	* JVM parameters: "-server -Xms256m -Xmx512m"
	*/
	class CMSBenchmark extends Benchmark {

	@Param(Array("0.1", "0.005"))
	val eps: Double = 0.0

	@Param(Array("0.0000001" /* 1E-8 */))
	val delta: Double = 0.0

	@Param(Array("0.2"))
	val heavyHittersPct: Double = 0.0

	@Param(Array("100"))
	val operations: Int = 0

	var cmsMonoid: CountMinSketchMonoid = _

	override def setUp {
	cmsMonoid = {
	val seed = 1
	new CountMinSketchMonoid(eps, delta, seed, heavyHittersPct)
	}
	}

	@Macrobenchmark
	def timePlus() {
	(1 to operations).view.foldLeft(cmsMonoid.zero)((l, r) => l ++ cmsMonoid.create(r))
	}

	}