Skip to content

Instantly share code, notes, and snippets.

@monkut
Last active February 6, 2018 00:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save monkut/eab0426aaaf1c1df9c20 to your computer and use it in GitHub Desktop.
Save monkut/eab0426aaaf1c1df9c20 to your computer and use it in GitHub Desktop.
Sample implementation and sample usage of Welford's method for calculating running variance (and standard aggregations)
"""
A class for managing running calculations for:
- min
- average
- max
- stddev
- var
- count
Example Usage:
from collections import defaultdict
periods = defaultdict(WelfordRunningVariance)
values = (('2018-01-02', 1.0), ('2018-01-02', 2.5), ('2018-01-03', 8.7), ('2018-01-03', 1.2))
for month, value in values:
periods[month].send(value)
Results:
{
'2018-01-02': WelfordRunningVariance(min=1.0, avg=1.75, max=2.5, stddev=1.061, var=1.125),
'2018-01-03': WelfordRunningVariance(min=1.2, avg=4.95, max=8.7, stddev=5.303, var=28.125)
}
"""
from math import sqrt
class WelfordRunningVariance(object):
"""
Python implentation of Welford's running variance algorithm
http://www.johndcook.com/standard_deviation.html
"""
def __init__(self):
self._count = 0
self._mean = None
self._last_mean = None
self._s = None
self._last_s = None
self._max = None
self._min = None
def send(self, value):
self._count += 1
if self._count == 1:
self._mean = value
self._last_mean = value
self._last_s = 0.0
self._max = value
self._min = value
else:
self._mean = self._last_mean + (value - self._last_mean)/self._count
self._s = self._last_s + (value - self._last_mean) * (value - self._mean)
# check & update max/min values if necessary
if value > self._max:
self._max = value
if value < self._min:
self._min = value
# prepare for next iteration
self._last_mean = self._mean
self._last_s = self._s
def next(self):
"""
Mimmicing generator function
"""
return self._mean
def count(self):
return self._count
def mean(self):
return self._mean
def max(self):
return self._max
def min(self):
return self._min
def var(self):
result = 0
if self._count >= 2:
result = self._s/(self._count - 1)
return result
def stddev(self):
return sqrt(self.var())
def __repr__(self):
return str(self)
def __str__(self):
return 'WelfordRunningVariance(min={}, avg={}, max={}, stddev={}, var={})'.format(*(round(v(), 3) for v in (self.min, self.mean, self.max, self.stddev, self.var)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment