Skip to content

Instantly share code, notes, and snippets.

@luisfredgs
Forked from yuyay/kcca.py
Created January 4, 2019 12:55
Show Gist options
  • Save luisfredgs/599acd4d94754b3f02e48f4bb06a2bd8 to your computer and use it in GitHub Desktop.
Save luisfredgs/599acd4d94754b3f02e48f4bb06a2bd8 to your computer and use it in GitHub Desktop.
kernel canonical correlation analysis in python
#! encoding=UTF-8
"""
kernel canonical correlation analysis
"""
import numpy as np
from scipy.linalg import svd
from sklearn.metrics.pairwise import pairwise_kernels, euclidean_distances
class KCCA(object):
def __init__(self, n_components=1, epsilon=1.0, kernel="linear", degree=3, gamma=None, coef0=1, n_jobs=1):
self.n_components = n_components
self.epsilon = epsilon
self.kernel = kernel
self.degree = degree
self.gamma = gamma
self.coef0 = coef0
self.n_jobs = n_jobs
def fit(self, X, Y):
ndata_x, nfeature_x = X.shape
ndata_y, nfeature_y = Y.shape
if ndata_x != ndata_y:
raise Exception("Inequality of number of data between X and Y")
if self.kernel != "precomputed":
Kx = self._pairwise_kernels(X)
Ky = self._pairwise_kernels(Y)
I = self.epsilon * np.identity(ndata_x)
KxI_inv = np.linalg.inv(Kx + I)
KyI_inv = np.linalg.inv(Ky + I)
L = np.dot(KxI_inv, np.dot(Kx, np.dot(Ky, KyI_inv)))
U, s, Vh = svd(L)
self.alpha = np.dot(KxI_inv, U[:, :self.n_components])
self.beta = np.dot(KyI_inv, Vh.T[:, :self.n_components])
return self
def _pairwise_kernels(self, X, Y=None):
return pairwise_kernels(X, Y, metric=self.kernel, filter_params=True, n_jobs=self.n_jobs, degree=self.degree, gamma=self.gamma, coef0=self.coef0)
if __name__ == "__main__":
X = np.random.normal(size=(1000,100))
Y = np.random.normal(size=(1000,20))
kcca = KCCA(n_components=10, kernel="rbf", n_jobs=1, epsilon=0.1).fit(X, Y)
"""
matching on test data
"""
alpha = kcca.alpha
beta = kcca.beta
X_te = np.random.normal(size=(10,100))
Y_te = np.random.normal(size=(10,20))
Kx = kcca._pairwise_kernels(X_te, X)
Ky = kcca._pairwise_kernels(Y_te, Y)
F = np.dot(Kx, alpha)
G = np.dot(Ky, beta)
D = euclidean_distances(F, G)
idx_pred = np.argmin(D, axis=0)
print "matching result:", idx_pred
"""
similarity between true object and predicted object on test data
"""
idx_true = range(10)
C = pairwise_kernels(Y_te[idx_true], Y_te[idx_pred], metric="cosine")
print "1-best mean similarity:", np.mean(C.diagonal())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment