Skip to content

Instantly share code, notes, and snippets.

@rhee-airilab
Last active September 20, 2017 13:21
Show Gist options
  • Save rhee-airilab/b5fc66cc1fda9f01b2acfa803b7646d8 to your computer and use it in GitHub Desktop.
Save rhee-airilab/b5fc66cc1fda9f01b2acfa803b7646d8 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# code: utf-8
from __future__ import print_function, division, absolute_import
import numpy as np
class CumStats():
"""
Cumulative stats
CumStats(self, ddof=0)
about ddof=0, from numpy.var() documentation:
The mean is normally calculated as x.sum() / N, where N = len(x). If,
however, ddof is specified, the divisor N - ddof is used instead.
In standard statistical practice, ddof=1 provides an unbiased
estimator of the variance of a hypothetical infinite population.
ddof=0 provides a maximum likelihood estimate of the variance
for normally distributed variables.
Usage:
stats = CumStats()
for s in samples:
stats.append(
count=s.shape[0], # count
mean=np.mean(s, axis=0), # mean
var=np.var(s, axis=0), # var
min=np.amin(s, axis=0), # min
max=np.amax(s, axis=0))
print(stats.stats(), file=sys.stderr)
"""
def __init__(self, ddof=0):
self.count = 0
self.mean = 0
self.var = 0
self.min = None
self.max = None
self.ddof = ddof
def stats(self):
return self.count, self.mean, self.var, self.min, self.max
def append(self, count, mean, var, min_=None, max_=None):
# new average
new_count = self.count + count
new_mean = (self.count * self.mean + count * mean) / new_count
# new variance
delta = mean - self.mean
m_a = self.var * (self.count - self.ddof)
m_b = var * (count - self.ddof)
m2 = m_a + m_b + delta * delta * self.count * count / new_count
new_var = m2 / (new_count - self.ddof)
# new min
if self.min is None:
self.min = min_
else:
if min is not None:
self.min = np.minimum(self.min, min_)
# new max
if self.max is None:
self.max = max_
else:
if max is not None:
self.max = np.maximum(self.max, max_)
self.count = new_count
self.mean = new_mean
self.var = new_var
return self.stats()
def append_data(self,data):
count = data.shape[0]
mean = np.mean(data,axis=0)
var = np.var(data,axis=0,ddof=self.ddof)
min_ = np.minimum(data,axis=0)
max_ = np.maximum(data,axis=0)
return self.append(count, mean, var, min_=min_, max_=max_)
if __name__ == '__main__':
"""
unit test
"""
import sys
import pandas as pd
def self_test():
samples = [
np.asarray(np.random.uniform(-0.5,999.0,[np.random.randint(10,300), 800]),dtype=np.float64)
for _ in range(200)]
stats = CumStats()
for s in samples:
stats.append(
count=s.shape[0], # count
mean=np.mean(s, axis=0), # mean
var=np.var(s, axis=0), # var
min_=np.amin(s, axis=0), # min
max_=np.amax(s, axis=0))
# print(stats.stats(), file=sys.stderr)
# compare with whole result
merged = np.concatenate(samples,axis=0)
count = merged.shape[0]
mean = np.mean(merged, axis=0)
var = np.var(merged, axis=0)
min_ = np.min(merged, axis=0)
max_ = np.max(merged, axis=0)
# print((count,mean,var,min_,max_), file=sys.stderr)
# check squared errors
for a, b in zip(stats.stats(), (count,mean,var,min_,max_)):
print(np.amax(np.square(a-b)), file=sys.stderr)
# # extra variance error check?
#
# var1 = stats.stats()[2]
# var2 = var
# print('var1',var1,file=sys.stderr)
# print('var2',var2,file=sys.stderr)
# print('|var1-var2|',np.abs(var1-var2),file=sys.stderr)
# print('max |var1|',np.max(np.abs(var1)),file=sys.stderr)
# print('max |var2|',np.max(np.abs(var2)),file=sys.stderr)
# print('max |var1-var2|',np.max(np.abs(var1-var2)),file=sys.stderr)
if len(sys.argv) < 2:
self_test()
sys.exit(0)
stats = CumStats()
for fn in sys.argv[1:]:
data = np.loadtxt(fn, delimiter=',')
stats.append(
count=data.shape[0],
mean=np.mean(data,0),
var=np.var(data,0))
count, mean, var, _, _ = stats.stats()
std = np.sqrt(var)
print('count =', count, file=sys.stderr)
print('mean:', file=sys.stderr)
print(pd.DataFrame(np.atleast_2d(mean)), file=sys.stderr)
print('std:', file=sys.stderr)
print(pd.DataFrame(np.atleast_2d(std)), file=sys.stderr)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment