Skip to content

Instantly share code, notes, and snippets.

@cynthia
Last active November 15, 2017 03:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cynthia/4fc0fcf8bb97aee4487d05704a5ee4b4 to your computer and use it in GitHub Desktop.
Save cynthia/4fc0fcf8bb97aee4487d05704a5ee4b4 to your computer and use it in GitHub Desktop.
A experimental Mahalanobis whitening (ZCA) implementation using covariance approximation with FastMCD. I don't know if this is mathematically correct or whether if this works or not. Use with caution.
# -*- coding: utf-8 -*-
import numpy
from sklearn.covariance import MinCovDet
class FastMCDZCA():
"""
An implementation of Mahalanobis whitening (ZCA) using FastMCD. This
implementation has a couple questionable pseudoscience bits that I haven't
fully proved why it works - but anyhow.
https://en.wikipedia.org/wiki/Whitening_transformation
References
----------
.. [Rouseeuw1984] `P. J. Rousseeuw. Least median of squares regression.
J. Am Stat Ass, 79:871, 1984.`
.. [Rousseeuw] `A Fast Algorithm for the Minimum Covariance Determinant
Estimator, 1999, American Statistical Association and the American
Society for Quality, TECHNOMETRICS`
.. [ButlerDavies] `R. W. Butler, P. L. Davies and M. Jhun,
Asymptotics For The Minimum Covariance Determinant Estimator,
The Annals of Statistics, 1993, Vol. 21, No. 3, 1385-1400`
"""
def __init__(self, store_precision=True, assume_centered=False, support_fraction=None, random_state=None):
self.mcd_ = MinCovDet(store_precision=store_precision, assume_centered=assume_centered,
support_fraction=support_fraction, random_state=random_state)
self.U_ = None
self.S_ = None
self.V_ = None
self.epsilon = numpy.finfo(numpy.float).eps
self.zca_ = None
def correct_covariance(self, data):
return self.mcd_(data)
def error_norm(self, comp_cov, norm='frobenius', scaling=True, squared=True):
return self.mcd_.error_norm(comp_cov, norm=norm, scaling=scaling, squared=squared)
def fit(self, X, y=None):
ret = self.mcd_.fit(X, y=y)
self.U_, self.S_, self.V_ = numpy.linalg.svd(self.mcd_.covariance_)
self.zca_ = numpy.dot(self.U_, numpy.dot(numpy.diag(1.0 / numpy.sqrt(self.S_ + self.epsilon)), self.U_.T))
return ret
def transform(self, X):
if self.U_ is None or self.S_ is None or self.V_ is None or self.zca_ is None:
raise RuntimeError("fit() must be called first")
return numpy.dot(self.zca_, X)
def fit_transform(self, X, y=None):
self.fit(X, y=y)
return self.transform(X)
def get_params(self):
raise RuntimeError("Not implemented yet")
def get_precision(self):
return self.mcd_.get_precision()
def mahalanobis(self, observations):
return self.mcd_.mahalanobis(observations)
def reweight_covariance(self, data):
return self.mcd_.reweight_covariance(data)
def score(self, X_test, y=None):
return self.mcd_.score(X_test, y=y)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment