Last active
May 8, 2019 14:00
-
-
Save CarstenSchelp/fc61476f4a183f679875502cd72b736b to your computer and use it in GitHub Desktop.
Online Covariance Implementation as given on Wikipedia
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
class OnlineCovariance: | |
""" | |
A class to calculate the mean and the covariance matrix | |
of the incrementally added data. | |
The implementation is fully vectorized, which means that | |
no for-loops or indices are involved when processing | |
vectors and matrices. | |
This gist attempts to cleanly implement and extend this | |
online covariance algorithm provided on wikipedia: | |
https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Online | |
""" | |
def __init__(self, order): | |
""" | |
order: int, The order ("number of features") of the incrementally added | |
dataset and of the resulting covariance matrix. | |
""" | |
self._identity = np.identity(order) | |
self._ones = np.ones(order) | |
self._count = 0 | |
self._mean = np.zeros(order) | |
self._cov = np.zeros((order,order)) | |
self._order = order | |
@property | |
def count(self): | |
""" | |
int, The number of observations that has been added | |
to this instance of OnlineCovariance. | |
Returns | |
""" | |
return self._count | |
@property | |
def mean(self): | |
""" | |
double, The mean of the added data. | |
""" | |
return self._mean | |
@property | |
def cov(self): | |
""" | |
array_like, The covariance matrix of the added data. | |
""" | |
return self._cov | |
@property | |
def corrcoef(self): | |
""" | |
array_like, The normalized covariance matrix of the added data. | |
Consists of the Pearson Correlation Coefficients of the data's features. | |
""" | |
if self._count < 1: | |
return None | |
variances = (self._cov * self._identity).sum(axis=1) | |
variance_rows = np.repeat(variances, self._order) | |
variance_rows.shape = (self._order, self._order) | |
variance_columns = variance_rows.transpose() | |
return self._cov / np.sqrt(variance_rows * variance_columns) | |
def add(self, observation): | |
""" | |
Add the given observation to this object. | |
Parameters | |
---------- | |
observation: array_like, The observation to add. | |
""" | |
if self._order != len(observation): | |
raise ValueError(f'Observation to add must be of size {self._order}') | |
self._count += 1 | |
delta_at_nMin1 = np.array(observation - self._mean) | |
self._mean += delta_at_nMin1 / self._count | |
weighted_delta_at_n = np.array(observation - self._mean) / self._count | |
D_at_n = np.repeat(weighted_delta_at_n, self._order) | |
D_at_n.shape = (self._order, self._order) | |
D = (delta_at_nMin1 * self._identity).dot(D_at_n.T) | |
self._cov = self._cov * (self._count - 1) / self._count + D | |
def merge(self, other): | |
""" | |
Merges the current object and the given other object into a new OnlineCovariance. | |
Parameters | |
---------- | |
other: OnlineCovariance, The other OnlineCovariance to merge this object with. | |
Returns | |
------- | |
OnlineCovariance | |
""" | |
if other._order != self._order: | |
raise ValueError(f'Cannot merge two OnlineCovariances with different orders. ({self._order} != {other._order})') | |
merged_cov = OnlineCovariance(self._order) | |
merged_cov._count = self.count + other.count | |
count_corr = (other.count * self.count) / merged_cov._count | |
merged_cov._mean = (self.mean/other.count + other.mean/self.count) * count_corr | |
mean_diffs = np.repeat(self._mean - other._mean, self._order) | |
mean_diffs.shape = self._order, self._order | |
print(mean_diffs * mean_diffs.T) | |
merged_cov._cov = (self._cov * self.count + other._cov * other._count + mean_diffs * mean_diffs.T * count_corr) / merged_cov.count | |
return merged_cov |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment