ahwillia/cca.py

## cca.py
import numpy as np
from sklearn.utils.extmath import randomized_svd


def partial_whiten(X, alpha, eigval_tol=1e-7):
    """
    Return regularized whitening transform for a matrix X.

    Parameters
    ----------
    X : ndarray
        Matrix with shape `(m, n)` holding `m` observations
        in `n`-dimensional feature space. Columns of `X` are
        expected to be mean-centered so that `X.T @ X` is
        the covariance matrix.

    alpha : float
        Regularization parameter, `0 <= alpha <= 1`.

    eigval_tol : float
        Eigenvalues of covariance matrix are clipped to this
        minimum value.

    Returns
    -------
    X_whitened : ndarray
        Transformed data matrix.

    Zx : ndarray
        Matrix implementing the whitening transformation.
        `X_whitened = X @ Zx`.
    """

    XtX = (1 - alpha) * (X.T @ X)
    XtX[np.diag_indices_from(XtX)] += alpha

    w, v = np.linalg.eigh(XtX)
    w[w < eigval_tol] = eigval_tol  # clip minimum eigenvalue

    # Matrix holding the whitening transformation.
    Zx = np.linalg.multi_dot((v, np.diag(1 / np.sqrt(w)), v.T))

    # Returned (partially) whitened data and whitening matrix.
    return X @ Zx, Zx


class RidgeCCA:

    def __init__(
            self, n_components=2, alpha=0.0,
            center_data=True, svd_args=dict()):
        """
        n_components: int, (default 2).
            Number of components to keep.

        alpha : float within the interval [0, 1], (default 0.0)
            Strength of regularization on a scale between zero
            (unregularized CCA) and one (Partial Least Squares).

        svd_args : dict
            Specifies parameters for truncated SVD solver
            (see sklearn.decomposition.TruncatedSVD).
        """
        self.n_components = n_components
        self.alpha = alpha
        self.center_data = center_data
        self._svd_args = svd_args

    def fit(self, X, Y):
        """Fit model to data."""

        # Mean-center data.
        if self.center_data:
            self.x_mean_ = x_mean = np.mean(X, axis=0)
            self.y_mean_ = y_mean = np.mean(Y, axis=0)
            Xc = X - x_mean[None, :]
            Yc = Y - y_mean[None, :]
        else:
            self.x_mean_ = None
            self.y_mean_ = None
            Xc, Yc = X, Y

        # Partially whiten both datasets.
        Xw, Zx = partial_whiten(Xc, self.alpha)
        Yw, Zy = partial_whiten(Yc, self.alpha)

        # Compute SVD of cross-covariance matrix.
        Xw_t_Yw = Xw.T @ Yw
        U, S, Vt = randomized_svd(
            Xw_t_Yw, self.n_components, **self._svd_args)

        # Undo the whitening transformation to obtain the transformations
        # on X and Y.
        self.x_weights_ = Zx @ U
        self.y_weights_ = Zy @ Vt.T

    def transform(self, X, Y):
        """Apply the dimension reduction learned on the train data."""
        if self.center_data:
            return (
                (X - self.x_mean_[None, :]) @ self.x_weights_,
                (Y - self.y_mean_[None, :]) @ self.y_weights_
            )
        else:
            return X @ self.x_weights_, Y @ self.y_weights_

    def fit_transform(self, X, Y):
        """Learn and apply the dimension reduction on the train data."""
        self.fit(X, Y)
        return self.transform(X, Y)

    def canon_corrs(self, X, Y):
        """Return the canonical correlation coefficients."""
        tX, tY = self.transform(X, Y)
        denom = np.linalg.norm(tX, axis=0) * np.linalg.norm(tY, axis=0)
        return np.sum(tX * tY, axis=0) / denom
	import numpy as np
	from sklearn.utils.extmath import randomized_svd


	def partial_whiten(X, alpha, eigval_tol=1e-7):
	"""
	Return regularized whitening transform for a matrix X.

	Parameters
	----------
	X : ndarray
	Matrix with shape `(m, n)` holding `m` observations
	in `n`-dimensional feature space. Columns of `X` are
	expected to be mean-centered so that `X.T @ X` is
	the covariance matrix.

	alpha : float
	Regularization parameter, `0 <= alpha <= 1`.

	eigval_tol : float
	Eigenvalues of covariance matrix are clipped to this
	minimum value.

	Returns
	-------
	X_whitened : ndarray
	Transformed data matrix.

	Zx : ndarray
	Matrix implementing the whitening transformation.
	`X_whitened = X @ Zx`.
	"""

	XtX = (1 - alpha) * (X.T @ X)
	XtX[np.diag_indices_from(XtX)] += alpha

	w, v = np.linalg.eigh(XtX)
	w[w < eigval_tol] = eigval_tol # clip minimum eigenvalue

	# Matrix holding the whitening transformation.
	Zx = np.linalg.multi_dot((v, np.diag(1 / np.sqrt(w)), v.T))

	# Returned (partially) whitened data and whitening matrix.
	return X @ Zx, Zx


	class RidgeCCA:

	def __init__(
	self, n_components=2, alpha=0.0,
	center_data=True, svd_args=dict()):
	"""
	n_components: int, (default 2).
	Number of components to keep.

	alpha : float within the interval [0, 1], (default 0.0)
	Strength of regularization on a scale between zero
	(unregularized CCA) and one (Partial Least Squares).

	svd_args : dict
	Specifies parameters for truncated SVD solver
	(see sklearn.decomposition.TruncatedSVD).
	"""
	self.n_components = n_components
	self.alpha = alpha
	self.center_data = center_data
	self._svd_args = svd_args

	def fit(self, X, Y):
	"""Fit model to data."""

	# Mean-center data.
	if self.center_data:
	self.x_mean_ = x_mean = np.mean(X, axis=0)
	self.y_mean_ = y_mean = np.mean(Y, axis=0)
	Xc = X - x_mean[None, :]
	Yc = Y - y_mean[None, :]
	else:
	self.x_mean_ = None
	self.y_mean_ = None
	Xc, Yc = X, Y

	# Partially whiten both datasets.
	Xw, Zx = partial_whiten(Xc, self.alpha)
	Yw, Zy = partial_whiten(Yc, self.alpha)

	# Compute SVD of cross-covariance matrix.
	Xw_t_Yw = Xw.T @ Yw
	U, S, Vt = randomized_svd(
	Xw_t_Yw, self.n_components, **self._svd_args)

	# Undo the whitening transformation to obtain the transformations
	# on X and Y.
	self.x_weights_ = Zx @ U
	self.y_weights_ = Zy @ Vt.T

	def transform(self, X, Y):
	"""Apply the dimension reduction learned on the train data."""
	if self.center_data:
	return (
	(X - self.x_mean_[None, :]) @ self.x_weights_,
	(Y - self.y_mean_[None, :]) @ self.y_weights_
	)
	else:
	return X @ self.x_weights_, Y @ self.y_weights_

	def fit_transform(self, X, Y):
	"""Learn and apply the dimension reduction on the train data."""
	self.fit(X, Y)
	return self.transform(X, Y)

	def canon_corrs(self, X, Y):
	"""Return the canonical correlation coefficients."""
	tX, tY = self.transform(X, Y)
	denom = np.linalg.norm(tX, axis=0) * np.linalg.norm(tY, axis=0)
	return np.sum(tX * tY, axis=0) / denom