Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
Class to do streaming count, mean, standard deviation allows replacements of particular values from the data population
/**
* Class that allows for streaming collection of count, mean, stddev
* Allows combining multiple independent instances
* Allows _replacement_ of values in the stream (i.e. if one of the values of your data population changes
* you can replace it). Replacing values introduces some tiny error, but in testing that error is < 1e-9 even
* in extreme conditions.
*/
public class StreamStats {
private long count = 0;
private double m = 0;
private double s = 0;
public StreamStats() {
}
public StreamStats(long count, double mean, double varianceDeltaSum) {
this.count = count;
this.m = mean;
this.s = varianceDeltaSum;
}
public void add(double value) {
if (count == 0) {
m = value;
count = 1;
return;
}
count += 1;
double prevM = m;
double delta = value - prevM;
m = prevM + (delta / ((double)count));
s += (delta * (value - m));
}
public void replace(double oldValue, double newValue) {
if (count == 0) {
add(newValue);
return;
}
// precisely update the mean
double prevM = m;
double sum = m * count;
sum -= oldValue;
sum += newValue;
m = sum / count;
s -= ((oldValue - prevM) * (oldValue - m));
s += ((newValue - prevM) * (newValue - m));
}
public long count() {
return count;
}
public double mean() {
return m;
}
public double stdDev() {
return Math.sqrt(variance());
}
public double variance() {
return s / (count - 1);
}
public double varianceDeltaSum() {
return s;
}
public void mergeFrom(StreamStats that) {
// from http://www.johndcook.com/blog/skewness_kurtosis/
long both = this.count + that.count;
double newMean = (this.count * this.m + that.count * that.m) / both;
double delta = this.m - that.m;
double delta2 = delta * delta;
double varianceDeltaSum = this.s + that.s + (delta2 * this.count * that.count) / both;
this.count = both;
this.m = newMean;
this.s = varianceDeltaSum;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment