Skip to content

Instantly share code, notes, and snippets.

@MishaelRosenthal
Created March 4, 2014 17:43
Show Gist options
  • Save MishaelRosenthal/9351719 to your computer and use it in GitHub Desktop.
Save MishaelRosenthal/9351719 to your computer and use it in GitHub Desktop.
A Scala implementation and testing of incremental mean and variance calculation. Also includes a pimp my library implementation for general Iterable collection.
package com.liveperson.predictivedialer.common.utils
/**
* Created with IntelliJ IDEA.
* User: mishaelr
* Date: 3/4/14
* Time: 7:18 PM
*
* This object contains static methods that can be used for calculating mean in variance incrementally.
* Based on the following:
* http://nfs-uxsup.csx.cam.ac.uk/~fanf2/hermes/doc/antiforgery/stats.pdf
*/
object IncrementalStatistics {
/**
* @param index Note: index starts from one, not from zero!
* Namely, the index for the first element should be 1.
*/
case class MeanTuple(mean: Double, index: Int)
case class VarianceTuple(meanTuple: MeanTuple, sum: Double)
/**
* Initial meanTuple value should be:
* MeanTuple(0.0, 0)
*/
def updateMean(meanTuple: MeanTuple, newValue: Double) = {
import meanTuple._
MeanTuple(mean + (1.0/(index+1)) * (newValue - mean), index+1)
}
/**
* Initial varianceTuple should be:
* VarianceTuple(MeanTuple(0.0, 0), 0.0)
*/
def updateVarianceTuple(varianceTuple: VarianceTuple, newValue: Double) = {
import varianceTuple._
val newMean = updateMean(meanTuple, newValue)
val newS = sum + (newValue - meanTuple.mean) * (newValue - newMean.mean)
VarianceTuple(newMean, newS)
}
/**
* Calculates the variance from the tuple.
*/
def getVarianceFromTuple(varianceTuple: VarianceTuple) = {
varianceTuple.sum / varianceTuple.meanTuple.index
}
}
package com.liveperson.predictivedialer.common.utils
/**
* Created with IntelliJ IDEA.
* User: mishaelr
* Date: 1/16/14
* Time: 3:12 PM
*/
object IterableWithStatistics {
import IncrementalStatistics._
implicit class RichIterable[+A](collection: Iterable[A]){
def mean[B >: A](implicit num: Numeric[B]) = {
if(collection.isEmpty) {
0.0
} else {
val meanTuple = collection.foldLeft(MeanTuple(0.0, 0)){case (mean, x) => updateMean(mean, num.toDouble(x))}
meanTuple.mean
}
}
def variance[B >: A](implicit num: Numeric[B]) = {
if(collection.isEmpty) {
0.0
} else {
getVarianceFromTuple {
collection.foldLeft(VarianceTuple(MeanTuple(0.0, 0), 0.0)){ case (tuple, x) => updateVarianceTuple(tuple, num.toDouble(x))}
}
}
}
}
}
package com.liveperson.predictivedialer.common.utils
import org.scalatest.junit.JUnitRunner
import org.scalatest.prop.Checkers
import org.scalacheck.Prop._
import org.scalacheck.{Prop, Gen}
import org.junit.runner.RunWith
import org.scalatest.{Matchers, FunSuite}
import com.liveperson.predictivedialer.common.utils.IterableWithStatistics._
/**
* Created with IntelliJ IDEA.
* User: mishaelr
* Date: 2/2/14
* Time: 11:07 AM
*
* Error tolerance of 0.001%.
*/
@RunWith(classOf[JUnitRunner])
class IterableWithStatisticsTest extends FunSuite with Checkers with Matchers{
val smallDouble = Gen.chooseNum(-1000.0, 1000.0)
test("mean") {
check {
Prop.forAll(Gen.containerOf[List,Double](smallDouble)){
(list: List[Double]) =>
if(list.isEmpty || list.sum / list.size == 0) {
0.0 === list.mean
} else {
val expected = list.sum / list.size
val actual = list.mean
expected === actual +- (0.00001 * math.abs(expected))
}
}
}
}
test("variance") {
check {
Prop.forAll(Gen.containerOf[List,Double](smallDouble)){
(list: List[Double]) =>
if(list.isEmpty || list.map{x => math.pow(x-list.mean, 2)}.mean == 0) {
0.0 === list.variance
} else {
val expected = list.map{x => math.pow(x-list.mean, 2)}.mean
val actual = list.variance
actual === (expected +- 0.00001 * expected)
}
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment