cynthia/fastmcd_zca.py

## fastmcd_zca.py
# -*- coding: utf-8 -*-

import numpy

from sklearn.covariance import MinCovDet

class FastMCDZCA():
    """
    An implementation of Mahalanobis whitening (ZCA) using FastMCD. This
    implementation has a couple questionable pseudoscience bits that I haven't
    fully proved why it works - but anyhow.

    https://en.wikipedia.org/wiki/Whitening_transformation

    References
    ----------

    .. [Rouseeuw1984] `P. J. Rousseeuw. Least median of squares regression.
        J. Am Stat Ass, 79:871, 1984.`
    .. [Rousseeuw] `A Fast Algorithm for the Minimum Covariance Determinant
        Estimator, 1999, American Statistical Association and the American
        Society for Quality, TECHNOMETRICS`
    .. [ButlerDavies] `R. W. Butler, P. L. Davies and M. Jhun,
        Asymptotics For The Minimum Covariance Determinant Estimator,
        The Annals of Statistics, 1993, Vol. 21, No. 3, 1385-1400`
    """
    def __init__(self, store_precision=True, assume_centered=False, support_fraction=None, random_state=None):
        self.mcd_ = MinCovDet(store_precision=store_precision, assume_centered=assume_centered,
                              support_fraction=support_fraction, random_state=random_state)

        self.U_ = None
        self.S_ = None
        self.V_ = None
        self.epsilon = numpy.finfo(numpy.float).eps
        self.zca_ = None

    def correct_covariance(self, data):
        return self.mcd_(data)

    def error_norm(self, comp_cov, norm='frobenius', scaling=True, squared=True):
        return self.mcd_.error_norm(comp_cov, norm=norm, scaling=scaling, squared=squared)

    def fit(self, X, y=None):
        ret = self.mcd_.fit(X, y=y)
        self.U_, self.S_, self.V_ = numpy.linalg.svd(self.mcd_.covariance_)
        self.zca_ = numpy.dot(self.U_, numpy.dot(numpy.diag(1.0 / numpy.sqrt(self.S_ + self.epsilon)), self.U_.T))

        return ret

    def transform(self, X):
        if self.U_ is None or self.S_ is None or self.V_ is None or self.zca_ is None:
            raise RuntimeError("fit() must be called first")

        return numpy.dot(self.zca_, X)

    def fit_transform(self, X, y=None):
        self.fit(X, y=y)

        return self.transform(X)

    def get_params(self):
        raise RuntimeError("Not implemented yet")

    def get_precision(self):
        return self.mcd_.get_precision()

    def mahalanobis(self, observations):
        return self.mcd_.mahalanobis(observations)

    def reweight_covariance(self, data):
        return self.mcd_.reweight_covariance(data)

    def score(self, X_test, y=None):
        return self.mcd_.score(X_test, y=y)
	# -- coding: utf-8 --

	import numpy

	from sklearn.covariance import MinCovDet

	class FastMCDZCA():
	"""
	An implementation of Mahalanobis whitening (ZCA) using FastMCD. This
	implementation has a couple questionable pseudoscience bits that I haven't
	fully proved why it works - but anyhow.

	https://en.wikipedia.org/wiki/Whitening_transformation

	References
	----------

	.. [Rouseeuw1984] `P. J. Rousseeuw. Least median of squares regression.
	J. Am Stat Ass, 79:871, 1984.`
	.. [Rousseeuw] `A Fast Algorithm for the Minimum Covariance Determinant
	Estimator, 1999, American Statistical Association and the American
	Society for Quality, TECHNOMETRICS`
	.. [ButlerDavies] `R. W. Butler, P. L. Davies and M. Jhun,
	Asymptotics For The Minimum Covariance Determinant Estimator,
	The Annals of Statistics, 1993, Vol. 21, No. 3, 1385-1400`
	"""
	def __init__(self, store_precision=True, assume_centered=False, support_fraction=None, random_state=None):
	self.mcd_ = MinCovDet(store_precision=store_precision, assume_centered=assume_centered,
	support_fraction=support_fraction, random_state=random_state)

	self.U_ = None
	self.S_ = None
	self.V_ = None
	self.epsilon = numpy.finfo(numpy.float).eps
	self.zca_ = None

	def correct_covariance(self, data):
	return self.mcd_(data)

	def error_norm(self, comp_cov, norm='frobenius', scaling=True, squared=True):
	return self.mcd_.error_norm(comp_cov, norm=norm, scaling=scaling, squared=squared)

	def fit(self, X, y=None):
	ret = self.mcd_.fit(X, y=y)
	self.U_, self.S_, self.V_ = numpy.linalg.svd(self.mcd_.covariance_)
	self.zca_ = numpy.dot(self.U_, numpy.dot(numpy.diag(1.0 / numpy.sqrt(self.S_ + self.epsilon)), self.U_.T))

	return ret

	def transform(self, X):
	if self.U_ is None or self.S_ is None or self.V_ is None or self.zca_ is None:
	raise RuntimeError("fit() must be called first")

	return numpy.dot(self.zca_, X)

	def fit_transform(self, X, y=None):
	self.fit(X, y=y)

	return self.transform(X)

	def get_params(self):
	raise RuntimeError("Not implemented yet")

	def get_precision(self):
	return self.mcd_.get_precision()

	def mahalanobis(self, observations):
	return self.mcd_.mahalanobis(observations)

	def reweight_covariance(self, data):
	return self.mcd_.reweight_covariance(data)

	def score(self, X_test, y=None):
	return self.mcd_.score(X_test, y=y)