Skip to content

Instantly share code, notes, and snippets.

@gregakespret
Created March 15, 2015 19:26
Show Gist options
  • Save gregakespret/570998fccd6ca6e24ad4 to your computer and use it in GitHub Desktop.
Save gregakespret/570998fccd6ca6e24ad4 to your computer and use it in GitHub Desktop.
Histogram using TDigest
import java.io.File
import java.nio.charset.Charset
import com.tdunning.math.stats.{ArrayDigest, TDigest}
import scala.collection.JavaConversions._ // needed for java Collection -> scala Seq
import scala.io.Source
import com.google.common.io.Files
object Histogram extends App {
val distribution: TDigest = TDigest.createArrayDigest(35, 1000)
val data: Iterator[Double] = Source.fromFile("rawdata").getLines().map(_.toDouble)
data.foreach(point => distribution.add(point))
val histogram = distribution.centroids().map(c => (c.mean, c. count())).map{case (c,v) => s"$c,$v\n"}
val file = new File("histogram-from-tdigest")
file.delete()
file.createNewFile()
histogram.foreach { row =>
Files.append(row, file, Charset.defaultCharset())
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment