Last active
February 6, 2018 00:42
-
-
Save monkut/eab0426aaaf1c1df9c20 to your computer and use it in GitHub Desktop.
Sample implementation and sample usage of Welford's method for calculating running variance (and standard aggregations)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
A class for managing running calculations for: | |
- min | |
- average | |
- max | |
- stddev | |
- var | |
- count | |
Example Usage: | |
from collections import defaultdict | |
periods = defaultdict(WelfordRunningVariance) | |
values = (('2018-01-02', 1.0), ('2018-01-02', 2.5), ('2018-01-03', 8.7), ('2018-01-03', 1.2)) | |
for month, value in values: | |
periods[month].send(value) | |
Results: | |
{ | |
'2018-01-02': WelfordRunningVariance(min=1.0, avg=1.75, max=2.5, stddev=1.061, var=1.125), | |
'2018-01-03': WelfordRunningVariance(min=1.2, avg=4.95, max=8.7, stddev=5.303, var=28.125) | |
} | |
""" | |
from math import sqrt | |
class WelfordRunningVariance(object): | |
""" | |
Python implentation of Welford's running variance algorithm | |
http://www.johndcook.com/standard_deviation.html | |
""" | |
def __init__(self): | |
self._count = 0 | |
self._mean = None | |
self._last_mean = None | |
self._s = None | |
self._last_s = None | |
self._max = None | |
self._min = None | |
def send(self, value): | |
self._count += 1 | |
if self._count == 1: | |
self._mean = value | |
self._last_mean = value | |
self._last_s = 0.0 | |
self._max = value | |
self._min = value | |
else: | |
self._mean = self._last_mean + (value - self._last_mean)/self._count | |
self._s = self._last_s + (value - self._last_mean) * (value - self._mean) | |
# check & update max/min values if necessary | |
if value > self._max: | |
self._max = value | |
if value < self._min: | |
self._min = value | |
# prepare for next iteration | |
self._last_mean = self._mean | |
self._last_s = self._s | |
def next(self): | |
""" | |
Mimmicing generator function | |
""" | |
return self._mean | |
def count(self): | |
return self._count | |
def mean(self): | |
return self._mean | |
def max(self): | |
return self._max | |
def min(self): | |
return self._min | |
def var(self): | |
result = 0 | |
if self._count >= 2: | |
result = self._s/(self._count - 1) | |
return result | |
def stddev(self): | |
return sqrt(self.var()) | |
def __repr__(self): | |
return str(self) | |
def __str__(self): | |
return 'WelfordRunningVariance(min={}, avg={}, max={}, stddev={}, var={})'.format(*(round(v(), 3) for v in (self.min, self.mean, self.max, self.stddev, self.var))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment