Skip to content

Instantly share code, notes, and snippets.

@ianoc
ianoc / ClockCache.scala
Created February 14, 2014 00:16
Scala clock cache
import com.twitter.algebird.Semigroup
case class ClockCache[K, V](cacheSize: Int, maxHits: Int = 10 )
(implicit sg: Semigroup[V]) {
private class CacheElement {
var hits: Int = 0
var cacheHit: Int = 0
var key: Any = null
@ianoc
ianoc / Riemann
Created October 3, 2014 17:28
Scalding Riemann
/*
GUI settings used:
{
"server": "127.0.0.1:5556",
"server_type": "ws",
"workspaces": [
{
"name": "Riemann",
"view": {
"type": "Balloon",
@ianoc
ianoc / New code for hll present
Created March 17, 2015 19:57
New code for hll present
[info] Formatting 1 Scala source {file:/Users/ioconnell/workspace/github/algebird/}algebird-caliper(test) ...
[info] Formatting 1 Scala source {file:/Users/ioconnell/workspace/github/algebird/}algebird-core(compile) ...
[info] Compiling 1 Scala source to /Users/ioconnell/workspace/github/algebird/algebird-core/target/scala-2.10/classes...
[info] Compiling 6 Scala sources to /Users/ioconnell/workspace/github/algebird/algebird-caliper/target/scala-2.10/test-classes...
[info] Running com.google.caliper.Runner com.twitter.algebird.caliper.HLLPresentBenchmark
[info] 0% Scenario{vm=java, trial=0, benchmark=BatchCreate, bits=5, max=10, numHLL=10} 1217.34 ns; σ=60.94 ns @ 10 trials
[info] 2% Scenario{vm=java, trial=0, benchmark=BatchCreate, bits=10, max=10, numHLL=10} 1065.77 ns; σ=21.95 ns @ 10 trials
[info] 3% Scenario{vm=java, trial=0, benchmark=BatchCreate, bits=17, max=10, numHLL=10} 1127.64 ns; σ=7.16 ns @ 3 trials
[info] 5% Scenario{vm=java, trial=0, benchmark=BatchCreate, bits=20, max=10, numHLL=10} 1054.
@ianoc
ianoc / old code for hll present benchmark
Created March 17, 2015 19:58
old code for hll present benchmark
[info] 0% Scenario{vm=java, trial=0, benchmark=BatchCreate, bits=5, max=10, numHLL=10} 1549.60 ns; σ=14.49 ns @ 3 trials
[info] 2% Scenario{vm=java, trial=0, benchmark=BatchCreate, bits=10, max=10, numHLL=10} 1747.76 ns; σ=59.56 ns @ 10 trials
[info] 3% Scenario{vm=java, trial=0, benchmark=BatchCreate, bits=17, max=10, numHLL=10} 1706.26 ns; σ=1.00 ns @ 3 trials
[info] 5% Scenario{vm=java, trial=0, benchmark=BatchCreate, bits=20, max=10, numHLL=10} 1672.16 ns; σ=33.41 ns @ 10 trials
[info] 7% Scenario{vm=java, trial=0, benchmark=BatchCreate, bits=5, max=100, numHLL=10} 1289.51 ns; σ=12.45 ns @ 3 trials
[info] 8% Scenario{vm=java, trial=0, benchmark=BatchCreate, bits=10, max=100, numHLL=10} 1687.68 ns; σ=35.01 ns @ 10 trials
[info] 10% Scenario{vm=java, trial=0, benchmark=BatchCreate, bits=17, max=100, numHLL=10} 1744.20 ns; σ=30.19 ns @ 10 trials
[info] 12% Scenario{vm=java, trial=0, benchmark=BatchCreate, bits=20, max=100, numHLL=10} 1675.37 ns; σ=30.35 ns @ 10 trials
[info] 13% Scenario{vm=java, trial
@ianoc
ianoc / Nested Type
Created April 2, 2015 21:49
Accessing nested type
val mappedName = "com.blah.X.y.MyType"
val splitName = mappedName.split('.')
val exceptLast = splitName.dropRight(1)
val terms = exceptLast.map(newTermName(_))
val (first, second) = (terms(0), terms(1))
val accessorSelect = terms.drop(2).foldLeft(Select(Ident(first), second)) {
case (existing, next) =>
@ianoc
ianoc / tpeSelect.scala
Last active August 29, 2015 14:19
Tpe Info + Select
object exp {
sealed trait ExprTpe[T]
sealed trait ExprPrimitiveTpe[T] extends ExprTpe[T]
sealed trait ExprContainerTpe[T] extends ExprTpe[T] {
def mapping: List[(String, ExprTpe[_])]
}
case class ContainerTpe2[A, B](_1: ExprTpe[A], nme1: String, _2: ExprTpe[B], nme2: String) extends ExprContainerTpe[(A, B)] {
override def toString = s"""Type container of ${_1} and ${_2}"""
@ianoc
ianoc / KMeansJob.scala
Last active September 29, 2015 17:56 — forked from azymnis/KMeansJob.scala
K-Means in scalding
import com.twitter.algebird.{Aggregator, Semigroup}
import com.twitter.scalding._
import scala.util.Random
/**
* This job is a tutorial of sorts for scalding's Execution[T] abstraction.
* It is a simple implementation of Lloyd's algorithm for k-means on 2D data.
*
* http://en.wikipedia.org/wiki/K-means_clustering
@ianoc
ianoc / readInput.hs
Last active January 2, 2016 22:08
Reading stdin without buffering
import qualified Turtle as T
import qualified System.IO as IO
import Control.DeepSeq
mergeUntilTwoBlankLines :: (() -> IO (Maybe String)) -> IO [String]
mergeUntilTwoBlankLines fn = do
rd <- fn ()
processFunc [] rd
where processFunc ("" : existing) (Just "") = return existing
processFunc existing (Just nxt) = do
@ianoc
ianoc / minimum.hs
Created January 2, 2016 22:16
Minimum app in haskell
import qualified Turtle as T
main :: IO ()
main = do
ln <- T.readline
putStrLn $ show ln
def getTiming(ebScaldVariant:String, idx: Int, runName: String, tp: TypedPipe[_]) = {
val ms = System.currentTimeMillis
val cntrs = execute(tp.filter(_ => false).toIterableExecution.getAndResetCounters.map(_._2))
val after = System.currentTimeMillis
val cpuMS = cntrs.get(StatKey("CPU_MILLISECONDS", "org.apache.hadoop.mapreduce.TaskCounter")).get
val gcMS = cntrs.get(StatKey("GC_TIME_MILLIS", "org.apache.hadoop.mapreduce.TaskCounter")).get
val millisMap = cntrs.get(StatKey("MILLIS_MAPS", "org.apache.hadoop.mapreduce.JobCounter")).get
(ebScaldVariant, idx, runName, cpuMS, gcMS, millisMap, after - ms)
}