Created
August 6, 2015 23:33
-
-
Save steveash/f9fabd193f19400f063c to your computer and use it in GitHub Desktop.
Class to do streaming count, mean, standard deviation allows replacements of particular values from the data population
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Class that allows for streaming collection of count, mean, stddev | |
* Allows combining multiple independent instances | |
* Allows _replacement_ of values in the stream (i.e. if one of the values of your data population changes | |
* you can replace it). Replacing values introduces some tiny error, but in testing that error is < 1e-9 even | |
* in extreme conditions. | |
*/ | |
public class StreamStats { | |
private long count = 0; | |
private double m = 0; | |
private double s = 0; | |
public StreamStats() { | |
} | |
public StreamStats(long count, double mean, double varianceDeltaSum) { | |
this.count = count; | |
this.m = mean; | |
this.s = varianceDeltaSum; | |
} | |
public void add(double value) { | |
if (count == 0) { | |
m = value; | |
count = 1; | |
return; | |
} | |
count += 1; | |
double prevM = m; | |
double delta = value - prevM; | |
m = prevM + (delta / ((double)count)); | |
s += (delta * (value - m)); | |
} | |
public void replace(double oldValue, double newValue) { | |
if (count == 0) { | |
add(newValue); | |
return; | |
} | |
// precisely update the mean | |
double prevM = m; | |
double sum = m * count; | |
sum -= oldValue; | |
sum += newValue; | |
m = sum / count; | |
s -= ((oldValue - prevM) * (oldValue - m)); | |
s += ((newValue - prevM) * (newValue - m)); | |
} | |
public long count() { | |
return count; | |
} | |
public double mean() { | |
return m; | |
} | |
public double stdDev() { | |
return Math.sqrt(variance()); | |
} | |
public double variance() { | |
return s / (count - 1); | |
} | |
public double varianceDeltaSum() { | |
return s; | |
} | |
public void mergeFrom(StreamStats that) { | |
// from http://www.johndcook.com/blog/skewness_kurtosis/ | |
long both = this.count + that.count; | |
double newMean = (this.count * this.m + that.count * that.m) / both; | |
double delta = this.m - that.m; | |
double delta2 = delta * delta; | |
double varianceDeltaSum = this.s + that.s + (delta2 * this.count * that.count) / both; | |
this.count = both; | |
this.m = newMean; | |
this.s = varianceDeltaSum; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Is there a way to do a subtraction (opposite of
mergeFrom
) ?