Skip to content

Instantly share code, notes, and snippets.

@tixxit
Last active September 12, 2016 19:46
Show Gist options
  • Save tixxit/82de5077ccc1d3a8143a20f5a4742f31 to your computer and use it in GitHub Desktop.
Save tixxit/82de5077ccc1d3a8143a20f5a4742f31 to your computer and use it in GitHub Desktop.
package com.stripe.mapbench
import java.util.Arrays
import scala.collection.mutable.ArrayBuilder
class ArrayMap(val keys: Array[Int], val values: Array[Long]) {
def get(n: Int): Option[Long] =
scala.util.Try(apply(n)).toOption
def apply(n: Int): Long =
values(Arrays.binarySearch(keys, n))
def foreach(f: (Int, Long) => Unit): Unit = {
var i = 0
while (i < keys.length) {
f(keys(i), values(i))
i += 1
}
}
def filter(f: (Int, Long) => Boolean): ArrayMap = {
var i = 0
val keysBldr = ArrayBuilder.make[Int]()
val valsBldr = ArrayBuilder.make[Long]()
while (i < keys.length) {
val k = keys(i)
val v = values(i)
if (f(k, v)) {
keysBldr += k
valsBldr += v
}
i += 1
}
new ArrayMap(keysBldr.result(), valsBldr.result())
}
def merge(that: ArrayMap)(f: (Long, Long) => Long): ArrayMap = {
val lKeys = keys
val lVals = values
val rKeys = that.keys
val rVals = that.values
var l = 0
var r = 0
val keysBldr = ArrayBuilder.make[Int]()
val valsBldr = ArrayBuilder.make[Long]()
while (l < lKeys.size && r < rKeys.size) {
val lKey = lKeys(l)
val rKey = rKeys(r)
if (lKey < rKey) {
keysBldr += lKey
valsBldr += lVals(l)
l += 1
} else if (rKey < lKey) {
keysBldr += rKey
valsBldr += rVals(r)
r += 1
} else {
keysBldr += lKey
valsBldr += f(lVals(l), rVals(r))
l += 1
r += 1
}
}
while (l < lKeys.size) {
keysBldr += lKeys(l)
valsBldr += lVals(l)
l += 1
}
while (r < rKeys.size) {
keysBldr += rKeys(r)
valsBldr += rVals(r)
r += 1
}
new ArrayMap(keysBldr.result(), valsBldr.result())
}
}
object ArrayMap {
def empty: ArrayMap = new ArrayMap(new Array[Int](0), new Array[Long](0))
def create(key: Int, value: Long): ArrayMap = {
val keys = new Array[Int](1)
keys(0) = key
val vals = new Array[Long](1)
vals(0) = value
new ArrayMap(keys, vals)
}
def apply(kvs: (Int, Long)*): ArrayMap = {
val (keys, values) = kvs.sorted.unzip
new ArrayMap(keys.toArray, values.toArray)
}
}
package com.stripe.mapbench
import java.util.Arrays
import scala.collection.mutable.LongMap
import scala.util.Random
import com.twitter.algebird._
import org.openjdk.jmh.annotations.Benchmark
import org.openjdk.jmh.annotations.State
import org.openjdk.jmh.annotations.Scope
object MapTraversal {
@State(Scope.Benchmark)
class Maps {
val items: List[Int] = List.range(0, 4096)
val immutableMap: Map[Int, Long] =
Map(items.map { i => i -> i.toLong }: _*)
val longMap: LongMap[Long] =
LongMap(items.map { i => i.toLong -> i.toLong }: _*)
val arrayMap: ArrayMap =
ArrayMap(immutableMap.toList: _*)
}
}
class MapTraversal {
import MapTraversal._
@Benchmark
def immutableMap(maps: Maps): Long = {
var i = 0L
maps.immutableMap.foreach { case (key, value) =>
i += key + value
}
i
}
@Benchmark
def longMapForeach(maps: Maps): Long = {
var i = 0L
maps.longMap.foreach { case (key, value) =>
i += key + value
}
i
}
@Benchmark
def longMapForeachKey(maps: Maps): Long = {
val m = maps.longMap
var i = 0L
m.foreachKey { key =>
i += key + m(key)
}
i
}
@Benchmark
def arrayMapForeach(maps: Maps): Long = {
var i = 0L
maps.arrayMap.foreach { (key, value) =>
i += key + value
}
i
}
}
object HLLSeriesBenchmark {
@State(Scope.Benchmark)
class Data {
def int2bytes(n: Int): Array[Byte] = {
val bytes = new Array[Byte](4)
bytes(0) = n.toByte
bytes(1) = (n >>> 8).toByte
bytes(2) = (n >>> 16).toByte
bytes(3) = (n >>> 24).toByte
bytes
}
def genRandomData(seed: Int)(f: Random => Int): List[(Array[Byte], Long)] = {
val rng = new Random(seed)
List.fill(1000)(f(rng)).map(int2bytes).map(_ -> rng.nextLong)
}
val randomDenseData = genRandomData(23)(_.nextGaussian.toInt)
val randomSparseData = genRandomData(91)(_.nextInt)
}
}
class HLLSeriesBenchmark {
import HLLSeriesBenchmark._
def bits = 8
@Benchmark
def myHLLSeriesDense(data: Data): MyHLLSeries = {
val hllSeriesMonoid = new MyHyperLogLogSeriesMonoid(bits)
hllSeriesMonoid.sum(data.randomDenseData.map { case (bytes, ts) =>
hllSeriesMonoid.create(bytes, ts)
})
}
@Benchmark
def myHLLSeriesSparse(data: Data): MyHLLSeries = {
val hllSeriesMonoid = new MyHyperLogLogSeriesMonoid(bits)
hllSeriesMonoid.sum(data.randomSparseData.map { case (bytes, ts) =>
hllSeriesMonoid.create(bytes, ts)
})
}
@Benchmark
def hllSeriesDense(data: Data): HLLSeries = {
val hllSeriesMonoid = new HyperLogLogSeriesMonoid(bits)
hllSeriesMonoid.sum(data.randomDenseData.map { case (bytes, ts) =>
hllSeriesMonoid.create(bytes, ts)
})
}
@Benchmark
def hllSeriesSparse(data: Data): HLLSeries = {
val hllSeriesMonoid = new HyperLogLogSeriesMonoid(bits)
hllSeriesMonoid.sum(data.randomSparseData.map { case (bytes, ts) =>
hllSeriesMonoid.create(bytes, ts)
})
}
}
scalaVersion := "2.11.7"
enablePlugins(JmhPlugin)
libraryDependencies += "com.twitter" %% "algebird-core" % "0.11.0"
package com.stripe.mapbench
import scala.collection.mutable.ArrayBuilder
import com.twitter.algebird._
case class MyHLLSeries(bits: Int, rows: Vector[ArrayMap]) {
def since(threshold: Long) = {
val newRows = rows.map(_.filter { (j, ts) => ts >= threshold })
MyHLLSeries(bits, newRows)
}
// def toHLL: HLL = {
// if (rows.isEmpty)
// SparseHLL(bits, Map())
// else
// rows.zipWithIndex.map{
// case (map, i) =>
// SparseHLL(bits, map.mapValues{ ts => Max((i + 1).toByte) }): HLL
// }.reduce{ _ + _ }
// }
}
class MyHyperLogLogSeriesMonoid(val bits: Int) extends Monoid[MyHLLSeries] {
import HyperLogLog._
val zero = MyHLLSeries(bits, Vector())
def create(example: Array[Byte], timestamp: Long): MyHLLSeries = {
val hashed = hash(example)
val (j, rhow) = jRhoW(hashed, bits)
val emptyMap = ArrayMap.empty
val rows = Vector.fill(rhow - 1)(emptyMap) :+ ArrayMap.create(j, timestamp)
MyHLLSeries(bits, rows)
}
def plus(lhs: MyHLLSeries, rhs: MyHLLSeries): MyHLLSeries = {
val lRows = lhs.rows
val rRows = rhs.rows
if (lRows.size > rRows.size) {
plus(rhs, lhs)
} else {
var i = 0
val len = math.min(lRows.size, rRows.size)
val bldr = Vector.newBuilder[ArrayMap]
while (i < len) {
val z = lRows(i).merge(rRows(i)) { (x, y) =>
math.max(x, y)
}
bldr += z
i += 1
}
while (i < lRows.size) {
bldr += lRows(i)
i += 1
}
while (i < rRows.size) {
bldr += rRows(i)
i += 1
}
MyHLLSeries(bits, bldr.result())
}
}
}
addSbtPlugin("pl.project13.scala" % "sbt-jmh" % "0.2.4")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment