Skip to content

Instantly share code, notes, and snippets.

@ahwillia
Created February 11, 2020 23:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ahwillia/f7d6d27e176df65d6baa6aee8b661042 to your computer and use it in GitHub Desktop.
Save ahwillia/f7d6d27e176df65d6baa6aee8b661042 to your computer and use it in GitHub Desktop.
Ridge CCA
import numpy as np
from sklearn.utils.extmath import randomized_svd
def partial_whiten(X, alpha, eigval_tol=1e-7):
"""
Return regularized whitening transform for a matrix X.
Parameters
----------
X : ndarray
Matrix with shape `(m, n)` holding `m` observations
in `n`-dimensional feature space. Columns of `X` are
expected to be mean-centered so that `X.T @ X` is
the covariance matrix.
alpha : float
Regularization parameter, `0 <= alpha <= 1`.
eigval_tol : float
Eigenvalues of covariance matrix are clipped to this
minimum value.
Returns
-------
X_whitened : ndarray
Transformed data matrix.
Zx : ndarray
Matrix implementing the whitening transformation.
`X_whitened = X @ Zx`.
"""
XtX = (1 - alpha) * (X.T @ X)
XtX[np.diag_indices_from(XtX)] += alpha
w, v = np.linalg.eigh(XtX)
w[w < eigval_tol] = eigval_tol # clip minimum eigenvalue
# Matrix holding the whitening transformation.
Zx = np.linalg.multi_dot((v, np.diag(1 / np.sqrt(w)), v.T))
# Returned (partially) whitened data and whitening matrix.
return X @ Zx, Zx
class RidgeCCA:
def __init__(
self, n_components=2, alpha=0.0,
center_data=True, svd_args=dict()):
"""
n_components: int, (default 2).
Number of components to keep.
alpha : float within the interval [0, 1], (default 0.0)
Strength of regularization on a scale between zero
(unregularized CCA) and one (Partial Least Squares).
svd_args : dict
Specifies parameters for truncated SVD solver
(see sklearn.decomposition.TruncatedSVD).
"""
self.n_components = n_components
self.alpha = alpha
self.center_data = center_data
self._svd_args = svd_args
def fit(self, X, Y):
"""Fit model to data."""
# Mean-center data.
if self.center_data:
self.x_mean_ = x_mean = np.mean(X, axis=0)
self.y_mean_ = y_mean = np.mean(Y, axis=0)
Xc = X - x_mean[None, :]
Yc = Y - y_mean[None, :]
else:
self.x_mean_ = None
self.y_mean_ = None
Xc, Yc = X, Y
# Partially whiten both datasets.
Xw, Zx = partial_whiten(Xc, self.alpha)
Yw, Zy = partial_whiten(Yc, self.alpha)
# Compute SVD of cross-covariance matrix.
Xw_t_Yw = Xw.T @ Yw
U, S, Vt = randomized_svd(
Xw_t_Yw, self.n_components, **self._svd_args)
# Undo the whitening transformation to obtain the transformations
# on X and Y.
self.x_weights_ = Zx @ U
self.y_weights_ = Zy @ Vt.T
def transform(self, X, Y):
"""Apply the dimension reduction learned on the train data."""
if self.center_data:
return (
(X - self.x_mean_[None, :]) @ self.x_weights_,
(Y - self.y_mean_[None, :]) @ self.y_weights_
)
else:
return X @ self.x_weights_, Y @ self.y_weights_
def fit_transform(self, X, Y):
"""Learn and apply the dimension reduction on the train data."""
self.fit(X, Y)
return self.transform(X, Y)
def canon_corrs(self, X, Y):
"""Return the canonical correlation coefficients."""
tX, tY = self.transform(X, Y)
denom = np.linalg.norm(tX, axis=0) * np.linalg.norm(tY, axis=0)
return np.sum(tX * tY, axis=0) / denom
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment