Last active
November 24, 2015 23:29
-
-
Save erikerlandson/d96dc553bc51e0eb5e4b to your computer and use it in GitHub Desktop.
Code comparing algebird map/reduce style aggregation using `prepare` versus `update`
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
scala> :load /home/eje/scala/prepare.scala | |
Loading /home/eje/scala/prepare.scala... | |
defined module prepare | |
scala> import prepare._ | |
import prepare._ | |
scala> val data = Vector.fill(1000000) { scala.util.Random.nextInt(10) } | |
data: scala.collection.immutable.Vector[Int] = Vector(7, 9, 4, 2, 7, 0, 3, 0, 4, 0, 7, 6, 3, 0, 8, 7, 2, 0, 8, 4, 5, 7, 0, 2, 9, 6, 1, 1, 5, 4, 0, 4, 8, 3, 2, 1, 8, 1, 2, 6, 5, 7, 2, 7, 6, 1, 0, 1, 7, 0, 8, 4, 3, 4, 3, 7, 7, 2, 7, 3, 9, 3, 8, 7, 5, 4, 3, 2, 9, 8, 1, 2, 4, 6, 3, 2, 9, 5, 8, 8, 1, 1, 3, 6, 1, 1, 5, 6, 0, 3, 8, 7, 0, 1, 4, 8, 6, 2, 6, 2, 4, 1, 0, 3, 9, 6, 3, 1, 3, 6, 3, 1, 9, 0, 3, 6, 6, 2, 5, 9, 5, 0, 2, 2, 9, 6, 8, 8, 3, 9, 4, 5, 9, 2, 0, 2, 4, 1, 4, 6, 5, 4, 6, 8, 1, 1, 8, 8, 3, 5, 5, 1, 3, 0, 8, 3, 3, 6, 9, 6, 1, 4, 0, 9, 9, 2, 0, 0, 3, 9, 8, 2, 9, 6, 4, 0, 8, 2, 8, 8, 5, 3, 8, 2, 9, 5, 3, 4, 2, 7, 3, 4, 2, 8, 3, 2, 0, 0, 2, 4, 8, 9, 3, 4, 0, 8, 7, 9, 8, 6, 3, 0, 4, 5, 5, 5, 2, 1, 9, 6, 4, 8, 8, 0, 8, 5, 0, 8, 6, 2, 1, 7, 3, 6, 9, 8, 9, 3, 0, 3, 4, 3, 4, 6, 7, 6, 2, 4,... | |
scala> benchmark(10) { data.mrPrepared(intSetPrepared) } | |
res0: Double = 0.2957673056 | |
scala> benchmark(10) { data.mrUpdatedAggregate(intSetUpdated) } | |
res1: Double = 0.027041249300000004 | |
scala> benchmark(10) { data.mrPreparedAggregate(intSetPrepared) } | |
res2: Double = 0.1754636707 | |
scala> benchmark(10) { data.mrUpdatedAggregate(UpdatedMonoid(intSetPrepared)) } | |
res3: Double = 0.1817913008 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
object prepare { | |
// an algebird-like monoid with the 'prepare' operation | |
trait PreparedMonoid[M, E] { | |
val zero: M | |
def plus(m1: M, m2: M): M | |
def prepare(e: E): M | |
} | |
// an algebird-like monoid with 'update' operation | |
trait UpdatedMonoid[M, E] { | |
val zero: M | |
def plus(m1: M, m2: M): M | |
def update(m: M, e: E): M | |
} | |
object UpdatedMonoid { | |
// create an UpdatedMonoid from a PreparedMonoid | |
def apply[M, E](mon: PreparedMonoid[M, E]) = new UpdatedMonoid[M, E] { | |
val zero = mon.zero | |
def plus(m1: M, m2: M) = mon.plus(m1, m2) | |
def update(m: M, e: E) = mon.plus(m, mon.prepare(e)) | |
} | |
} | |
// a PreparedMonoid for a set of integers. monoid operator is set union. | |
object intSetPrepared extends PreparedMonoid[Set[Int], Int] { | |
val zero = Set.empty[Int] | |
def plus(m1: Set[Int], m2: Set[Int]) = m1 ++ m2 | |
def prepare(e: Int) = Set(e) | |
} | |
// an equivalent UpdatedMonoid for a set of integers | |
object intSetUpdated extends UpdatedMonoid[Set[Int], Int] { | |
val zero = Set.empty[Int] | |
def plus(m1: Set[Int], m2: Set[Int]) = m1 ++ m2 | |
def update(m: Set[Int], e: Int) = m + e | |
} | |
implicit class SeqWithMapReduce[E](seq: Seq[E]) { | |
// algebird map/reduce Aggregator model | |
def mrPrepared[M](mon: PreparedMonoid[M, E]): M = { | |
seq.map(mon.prepare).reduceLeft(mon.plus) | |
} | |
// map/reduce taking advantage of scala 'aggregate' | |
def mrUpdatedAggregate[M](mon: UpdatedMonoid[M, E]): M = { | |
seq.aggregate(mon.zero)(mon.update, mon.plus) | |
} | |
// using 'aggregate' with prepared op | |
def mrPreparedAggregate[M](mon: PreparedMonoid[M, E]): M = { | |
seq.aggregate(mon.zero)((m, e) => mon.plus(m, mon.prepare(e)), mon.plus) | |
} | |
} | |
// take the mean of (k) times, discarding lowest and highest times | |
def benchmark[U](k: Int, trim: Int = 1)(blk: => U) = { | |
Vector.fill(k + 2 * trim) { | |
val t0 = System.nanoTime | |
blk | |
(System.nanoTime - t0) / 1e9 | |
}.sorted.slice(trim, k + trim).sum / k.toDouble | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment