Skip to content

Instantly share code, notes, and snippets.

@geoHeil
Created January 11, 2017 14:32
Show Gist options
  • Save geoHeil/bf944a9ac04d20973c479057128c7fd2 to your computer and use it in GitHub Desktop.
Save geoHeil/bf944a9ac04d20973c479057128c7fd2 to your computer and use it in GitHub Desktop.
summary statistics
case class MeasureUnit(name: String, value: Double)
val measureSeq = Seq(MeasureUnit("metric1", 0.04), MeasureUnit("metric1", 0.09),
MeasureUnit("metric2", 0.64), MeasureUnit("metric2", 0.34), MeasureUnit("metric2", 0.84))
type Name = String
// "metric1" -> Seq(0.04, 0.09), "metric2" -> Seq(0.64, 0.34, 0.84)
val groupedMeasures: Map[Name, Seq[Double]] =
measureSeq
.groupBy(_.name)
.mapValues(_ map (_.value))
type Mean = Double
def mean[T](item: Traversable[T])(implicit n: Numeric[T]) = {
n.toDouble(item.sum) / item.size.toDouble
}
def variance[T](items: Traversable[T])(implicit n: Numeric[T]): Double = {
val itemMean = mean(items)
val count = items.size
val sumOfSquares = items.foldLeft(0.0d)((total, item) => {
val itemDbl = n.toDouble(item)
val square = math.pow(itemDbl - itemMean, 2)
total + square
})
sumOfSquares / count.toDouble
}
val meanMapping: Map[Name, Mean] = groupedMeasures mapValues mean
type Variance = Double
val varianceMapping: Map[Name, Variance] = groupedMeasures mapValues variance
type Summary = Tuple2[Mean, Variance]
val summaryMapping: Map[Name, Summary] =
groupedMeasures mapValues { s => (mean(s), variance(s)) }
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment