Skip to content

Instantly share code, notes, and snippets.

@CarstenSchelp
Last active May 8, 2019 14:00
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save CarstenSchelp/fc61476f4a183f679875502cd72b736b to your computer and use it in GitHub Desktop.
Save CarstenSchelp/fc61476f4a183f679875502cd72b736b to your computer and use it in GitHub Desktop.
Online Covariance Implementation as given on Wikipedia
import numpy as np
class OnlineCovariance:
"""
A class to calculate the mean and the covariance matrix
of the incrementally added data.
The implementation is fully vectorized, which means that
no for-loops or indices are involved when processing
vectors and matrices.
This gist attempts to cleanly implement and extend this
online covariance algorithm provided on wikipedia:
https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Online
"""
def __init__(self, order):
"""
order: int, The order ("number of features") of the incrementally added
dataset and of the resulting covariance matrix.
"""
self._identity = np.identity(order)
self._ones = np.ones(order)
self._count = 0
self._mean = np.zeros(order)
self._cov = np.zeros((order,order))
self._order = order
@property
def count(self):
"""
int, The number of observations that has been added
to this instance of OnlineCovariance.
Returns
"""
return self._count
@property
def mean(self):
"""
double, The mean of the added data.
"""
return self._mean
@property
def cov(self):
"""
array_like, The covariance matrix of the added data.
"""
return self._cov
@property
def corrcoef(self):
"""
array_like, The normalized covariance matrix of the added data.
Consists of the Pearson Correlation Coefficients of the data's features.
"""
if self._count < 1:
return None
variances = (self._cov * self._identity).sum(axis=1)
variance_rows = np.repeat(variances, self._order)
variance_rows.shape = (self._order, self._order)
variance_columns = variance_rows.transpose()
return self._cov / np.sqrt(variance_rows * variance_columns)
def add(self, observation):
"""
Add the given observation to this object.
Parameters
----------
observation: array_like, The observation to add.
"""
if self._order != len(observation):
raise ValueError(f'Observation to add must be of size {self._order}')
self._count += 1
delta_at_nMin1 = np.array(observation - self._mean)
self._mean += delta_at_nMin1 / self._count
weighted_delta_at_n = np.array(observation - self._mean) / self._count
D_at_n = np.repeat(weighted_delta_at_n, self._order)
D_at_n.shape = (self._order, self._order)
D = (delta_at_nMin1 * self._identity).dot(D_at_n.T)
self._cov = self._cov * (self._count - 1) / self._count + D
def merge(self, other):
"""
Merges the current object and the given other object into a new OnlineCovariance.
Parameters
----------
other: OnlineCovariance, The other OnlineCovariance to merge this object with.
Returns
-------
OnlineCovariance
"""
if other._order != self._order:
raise ValueError(f'Cannot merge two OnlineCovariances with different orders. ({self._order} != {other._order})')
merged_cov = OnlineCovariance(self._order)
merged_cov._count = self.count + other.count
count_corr = (other.count * self.count) / merged_cov._count
merged_cov._mean = (self.mean/other.count + other.mean/self.count) * count_corr
mean_diffs = np.repeat(self._mean - other._mean, self._order)
mean_diffs.shape = self._order, self._order
print(mean_diffs * mean_diffs.T)
merged_cov._cov = (self._cov * self.count + other._cov * other._count + mean_diffs * mean_diffs.T * count_corr) / merged_cov.count
return merged_cov
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment