Created
August 3, 2014 02:56
-
-
Save bigsnarfdude/c48dff7c79228b6180f3 to your computer and use it in GitHub Desktop.
algebird 0.7.0 REPL script
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import com.twitter.algebird._ | |
import HyperLogLog._ | |
import com.twitter.algebird.Monoid | |
import com.twitter.algebird.DecayedValue | |
import com.twitter.algebird.Operators._ | |
val hll = new HyperLogLogMonoid(4) | |
val data = List(1, 1, 2, 2, 3, 3, 4, 4, 5, 5) | |
val seqHll = data.map { hll(_) } | |
val sumHll = hll.sum(seqHll) | |
val approxSizeOf = hll.sizeOf(sumHll) | |
val actualSize = data.toSet.size | |
val estimate = approxSizeOf.estimate | |
val NUM_HASHES = 6 | |
val WIDTH = 32 | |
val SEED = 1 | |
val bfMonoid = new BloomFilterMonoid(NUM_HASHES, WIDTH, SEED) | |
val bf = bfMonoid.create("1", "2", "3", "4", "100") | |
val approxBool = bf.contains("1") | |
val res = approxBool.isTrue | |
val DELTA = 1E-10 | |
val EPS = 0.001 | |
val SEED = 1 | |
val CMS_MONOID = new CountMinSketchMonoid(EPS, DELTA, SEED) | |
val data = List(1L, 1L, 3L, 4L, 5L) | |
val cms = CMS_MONOID.create(data) | |
cms.totalCount | |
cms.frequency(1L).estimate | |
cms.frequency(2L).estimate | |
cms.frequency(3L).estimate | |
val data = List("1", "2", "3", "4", "5") | |
val data = { | |
val rnd = new scala.util.Random | |
(1 to 100).map { _ => rnd.nextInt(1000).toDouble }.toSeq | |
} | |
val HalfLife = 10.0 | |
val normalization = HalfLife / math.log(2) | |
implicit val monoid = DecayedValue.monoidWithEpsilon(1e-3) | |
data.zipWithIndex.scanLeft(Monoid.zero[DecayedValue]) { (previous, data) => | |
val (value, time) = data | |
val decayed = Monoid.plus(previous, DecayedValue.build(value, time, HalfLife)) | |
println("At %d: decayed=%f".format(time, (decayed.value / normalization))) | |
decayed | |
} | |
val DELTA = 1E-8 | |
val EPS = 0.001 | |
val SEED = 1 | |
val HEAVY_HITTERS_COUNT = 10 | |
implicit def string2Bytes(i : String) = i.toCharArray.map(_.toByte) | |
val PARAMS = SketchMapParams[String](SEED, EPS, DELTA, HEAVY_HITTERS_COUNT) | |
val MONOID = SketchMap.monoid[String, Long](PARAMS) | |
val data = List( ("1", 1L), ("3", 2L), ("4", 1L), ("5", 1L) ) | |
val sm = MONOID.create(data) | |
sm.totalValue | |
MONOID.frequency(sm, "1") | |
MONOID.frequency(sm, "2") | |
MONOID.frequency(sm, "3") | |
Max(10) + Max(30) + Max(20) | |
case class TwitterUser(val name: String, val numFollowers: Int) extends Ordered[TwitterUser] { | |
def compare(that: TwitterUser): Int = { | |
val c = this.numFollowers - that.numFollowers | |
if (c == 0) this.name.compareTo(that.name) else c | |
} | |
} | |
val barackobama = TwitterUser("BarackObama", 40267391) | |
val katyperry = TwitterUser("katyperry", 48013573) | |
val ladygaga = TwitterUser("ladygaga", 40756470) | |
val miguno = TwitterUser("miguno", 731) | |
val taylorswift = TwitterUser("taylorswift13", 37125055) | |
val winner: Max[TwitterUser] = Max(barackobama) + Max(katyperry) + Max(ladygaga) + Max(miguno) + Max(taylorswift) | |
Min(10) + Min(20) + Min(30) | |
val data2 = Map(1 -> 1, 2 -> 1) | |
val data1 = Map(1 -> 3, 2 -> 5, 3 -> 7, 5 -> 1) | |
data1 + data2 | |
Set(1,2,3) + Set(3,4,5) | |
List(1,2,3) + List(3,4,5) | |
Map(1 -> 3, 2 -> 4, 3 -> 1) * Map(2 -> 2) | |
Map(1 -> Set(2,3), 2 -> Set(1)) + Map(2 -> Set(2,3)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment